Enha: rename to tts.go; use one buffer;
This commit is contained in:
5
bot.go
5
bot.go
@@ -4,13 +4,13 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"gf-lt/config"
|
"gf-lt/config"
|
||||||
"gf-lt/extra"
|
"gf-lt/extra"
|
||||||
"gf-lt/models"
|
"gf-lt/models"
|
||||||
"gf-lt/rag"
|
"gf-lt/rag"
|
||||||
"gf-lt/storage"
|
"gf-lt/storage"
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net"
|
"net"
|
||||||
@@ -350,6 +350,7 @@ out:
|
|||||||
if cfg.TTS_ENABLED {
|
if cfg.TTS_ENABLED {
|
||||||
// audioStream.TextChan <- chunk
|
// audioStream.TextChan <- chunk
|
||||||
extra.TTSFlushChan <- true
|
extra.TTSFlushChan <- true
|
||||||
|
logger.Info("sending flushchan signal")
|
||||||
}
|
}
|
||||||
break out
|
break out
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,13 +2,14 @@ package extra
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"gf-lt/config"
|
|
||||||
"gf-lt/models"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"gf-lt/config"
|
||||||
|
"gf-lt/models"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -19,16 +20,16 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
TTSTextChan = make(chan string, 10000)
|
TTSTextChan = make(chan string, 10000)
|
||||||
TTSFlushChan = make(chan bool, 1)
|
TTSFlushChan = make(chan bool, 1)
|
||||||
TTSDoneChan = make(chan bool, 1)
|
TTSDoneChan = make(chan bool, 1)
|
||||||
|
endsWithPunctuation = regexp.MustCompile(`[;.!?]$`)
|
||||||
)
|
)
|
||||||
|
|
||||||
type Orator interface {
|
type Orator interface {
|
||||||
Speak(text string) error
|
Speak(text string) error
|
||||||
Stop()
|
Stop()
|
||||||
// pause and resume?
|
// pause and resume?
|
||||||
GetSBuilder() strings.Builder
|
|
||||||
GetLogger() *slog.Logger
|
GetLogger() *slog.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,69 +44,79 @@ type KokoroOrator struct {
|
|||||||
Voice string
|
Voice string
|
||||||
currentStream *beep.Ctrl // Added for playback control
|
currentStream *beep.Ctrl // Added for playback control
|
||||||
textBuffer strings.Builder
|
textBuffer strings.Builder
|
||||||
|
// textBuffer bytes.Buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
func stoproutine(orator Orator) {
|
func (o *KokoroOrator) stoproutine() {
|
||||||
<-TTSDoneChan
|
<-TTSDoneChan
|
||||||
orator.GetLogger().Info("orator got done signal")
|
o.logger.Info("orator got done signal")
|
||||||
orator.Stop()
|
o.Stop()
|
||||||
// drain the channel
|
// drain the channel
|
||||||
for len(TTSTextChan) > 0 {
|
for len(TTSTextChan) > 0 {
|
||||||
<-TTSTextChan
|
<-TTSTextChan
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func readroutine(orator Orator) {
|
func (o *KokoroOrator) readroutine() {
|
||||||
tokenizer, _ := english.NewSentenceTokenizer(nil)
|
tokenizer, _ := english.NewSentenceTokenizer(nil)
|
||||||
var sentenceBuf bytes.Buffer
|
// var sentenceBuf bytes.Buffer
|
||||||
var remainder strings.Builder
|
// var remainder strings.Builder
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case chunk := <-TTSTextChan:
|
case chunk := <-TTSTextChan:
|
||||||
sentenceBuf.WriteString(chunk)
|
// sentenceBuf.WriteString(chunk)
|
||||||
text := sentenceBuf.String()
|
// text := sentenceBuf.String()
|
||||||
|
_, err := o.textBuffer.WriteString(chunk)
|
||||||
|
if err != nil {
|
||||||
|
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
text := o.textBuffer.String()
|
||||||
sentences := tokenizer.Tokenize(text)
|
sentences := tokenizer.Tokenize(text)
|
||||||
|
o.logger.Info("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
|
||||||
for i, sentence := range sentences {
|
for i, sentence := range sentences {
|
||||||
if i == len(sentences)-1 {
|
if i == len(sentences)-1 {
|
||||||
sentenceBuf.Reset()
|
o.textBuffer.Reset()
|
||||||
sentenceBuf.WriteString(sentence.Text)
|
_, err := o.textBuffer.WriteString(sentence.Text)
|
||||||
continue
|
if err != nil {
|
||||||
|
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
continue // if only one (often incomplete) sentence; wait for next chunk
|
||||||
}
|
}
|
||||||
// Send complete sentence to TTS
|
o.logger.Info("calling Speak with sentence", "sent", sentence.Text)
|
||||||
if err := orator.Speak(sentence.Text); err != nil {
|
if err := o.Speak(sentence.Text); err != nil {
|
||||||
orator.GetLogger().Error("tts failed", "sentence", sentence.Text, "error", err)
|
o.logger.Error("tts failed", "sentence", sentence.Text, "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case <-TTSFlushChan:
|
case <-TTSFlushChan:
|
||||||
|
o.logger.Info("got flushchan signal start")
|
||||||
// lln is done get the whole message out
|
// lln is done get the whole message out
|
||||||
// FIXME: loses one token
|
if len(TTSTextChan) > 0 { // otherwise might get stuck
|
||||||
for chunk := range TTSTextChan {
|
for chunk := range TTSTextChan {
|
||||||
// orator.GetLogger().Info("flushing", "chunk", chunk)
|
_, err := o.textBuffer.WriteString(chunk)
|
||||||
// sentenceBuf.WriteString(chunk)
|
if err != nil {
|
||||||
remainder.WriteString(chunk) // I get text here
|
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||||
if len(TTSTextChan) == 0 {
|
continue
|
||||||
break
|
}
|
||||||
|
if len(TTSTextChan) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// INFO: if there is a lot of text it will take some time to make with tts at once
|
// INFO: if there is a lot of text it will take some time to make with tts at once
|
||||||
// to avoid this pause, it might be better to keep splitting on sentences
|
// to avoid this pause, it might be better to keep splitting on sentences
|
||||||
// but keepinig in mind that remainder could be ommited by tokenizer
|
// but keepinig in mind that remainder could be ommited by tokenizer
|
||||||
// Flush remaining text
|
// Flush remaining text
|
||||||
remaining := remainder.String()
|
remaining := o.textBuffer.String()
|
||||||
remainder.Reset()
|
o.logger.Info("got flushchan signal", "rem", remaining)
|
||||||
|
defer o.textBuffer.Reset()
|
||||||
if remaining != "" {
|
if remaining != "" {
|
||||||
// orator.GetLogger().Info("flushing", "remaining", remaining)
|
o.logger.Info("calling Speak with remainder", "rem", remaining)
|
||||||
if err := orator.Speak(remaining); err != nil {
|
if err := o.Speak(remaining); err != nil {
|
||||||
orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err)
|
o.logger.Error("tts failed", "sentence", remaining, "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// case <-TTSDoneChan:
|
|
||||||
// orator.GetLogger().Info("orator got done signal")
|
|
||||||
// orator.Stop()
|
|
||||||
// // it that the best way to empty channel?
|
|
||||||
// close(TTSTextChan)
|
|
||||||
// TTSTextChan = make(chan string, 10000)
|
|
||||||
// return
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -120,8 +131,8 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
|
|||||||
Language: "a",
|
Language: "a",
|
||||||
Voice: "af_bella(1)+af_sky(1)",
|
Voice: "af_bella(1)+af_sky(1)",
|
||||||
}
|
}
|
||||||
go readroutine(orator)
|
go orator.readroutine()
|
||||||
go stoproutine(orator)
|
go orator.stoproutine()
|
||||||
return orator
|
return orator
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,18 +202,13 @@ func (o *KokoroOrator) Speak(text string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: stop works; but new stream does not start afterwards
|
|
||||||
func (o *KokoroOrator) Stop() {
|
func (o *KokoroOrator) Stop() {
|
||||||
// speaker.Clear()
|
// speaker.Clear()
|
||||||
o.logger.Info("attempted to stop orator", "orator", o)
|
o.logger.Info("attempted to stop orator", "orator", o)
|
||||||
speaker.Lock()
|
speaker.Lock()
|
||||||
defer speaker.Unlock()
|
defer speaker.Unlock()
|
||||||
if o.currentStream != nil {
|
if o.currentStream != nil {
|
||||||
o.currentStream.Paused = true
|
// o.currentStream.Paused = true
|
||||||
o.currentStream.Streamer = nil
|
o.currentStream.Streamer = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *KokoroOrator) GetSBuilder() strings.Builder {
|
|
||||||
return o.textBuffer
|
|
||||||
}
|
|
||||||
1
extra/vad.go
Normal file
1
extra/vad.go
Normal file
@@ -0,0 +1 @@
|
|||||||
|
package extra
|
||||||
Reference in New Issue
Block a user