Enha: rename to tts.go; use one buffer;

This commit is contained in:
Grail Finder
2025-05-21 21:16:42 +03:00
parent fe4f759173
commit 4a17dd745c
3 changed files with 57 additions and 49 deletions

5
bot.go
View File

@@ -4,13 +4,13 @@ import (
"bufio" "bufio"
"bytes" "bytes"
"context" "context"
"encoding/json"
"fmt"
"gf-lt/config" "gf-lt/config"
"gf-lt/extra" "gf-lt/extra"
"gf-lt/models" "gf-lt/models"
"gf-lt/rag" "gf-lt/rag"
"gf-lt/storage" "gf-lt/storage"
"encoding/json"
"fmt"
"io" "io"
"log/slog" "log/slog"
"net" "net"
@@ -350,6 +350,7 @@ out:
if cfg.TTS_ENABLED { if cfg.TTS_ENABLED {
// audioStream.TextChan <- chunk // audioStream.TextChan <- chunk
extra.TTSFlushChan <- true extra.TTSFlushChan <- true
logger.Info("sending flushchan signal")
} }
break out break out
} }

View File

@@ -2,13 +2,14 @@ package extra
import ( import (
"bytes" "bytes"
"gf-lt/config"
"gf-lt/models"
"encoding/json" "encoding/json"
"fmt" "fmt"
"gf-lt/config"
"gf-lt/models"
"io" "io"
"log/slog" "log/slog"
"net/http" "net/http"
"regexp"
"strings" "strings"
"time" "time"
@@ -22,13 +23,13 @@ var (
TTSTextChan = make(chan string, 10000) TTSTextChan = make(chan string, 10000)
TTSFlushChan = make(chan bool, 1) TTSFlushChan = make(chan bool, 1)
TTSDoneChan = make(chan bool, 1) TTSDoneChan = make(chan bool, 1)
endsWithPunctuation = regexp.MustCompile(`[;.!?]$`)
) )
type Orator interface { type Orator interface {
Speak(text string) error Speak(text string) error
Stop() Stop()
// pause and resume? // pause and resume?
GetSBuilder() strings.Builder
GetLogger() *slog.Logger GetLogger() *slog.Logger
} }
@@ -43,69 +44,79 @@ type KokoroOrator struct {
Voice string Voice string
currentStream *beep.Ctrl // Added for playback control currentStream *beep.Ctrl // Added for playback control
textBuffer strings.Builder textBuffer strings.Builder
// textBuffer bytes.Buffer
} }
func stoproutine(orator Orator) { func (o *KokoroOrator) stoproutine() {
<-TTSDoneChan <-TTSDoneChan
orator.GetLogger().Info("orator got done signal") o.logger.Info("orator got done signal")
orator.Stop() o.Stop()
// drain the channel // drain the channel
for len(TTSTextChan) > 0 { for len(TTSTextChan) > 0 {
<-TTSTextChan <-TTSTextChan
} }
} }
func readroutine(orator Orator) { func (o *KokoroOrator) readroutine() {
tokenizer, _ := english.NewSentenceTokenizer(nil) tokenizer, _ := english.NewSentenceTokenizer(nil)
var sentenceBuf bytes.Buffer // var sentenceBuf bytes.Buffer
var remainder strings.Builder // var remainder strings.Builder
for { for {
select { select {
case chunk := <-TTSTextChan: case chunk := <-TTSTextChan:
sentenceBuf.WriteString(chunk) // sentenceBuf.WriteString(chunk)
text := sentenceBuf.String() // text := sentenceBuf.String()
sentences := tokenizer.Tokenize(text) _, err := o.textBuffer.WriteString(chunk)
for i, sentence := range sentences { if err != nil {
if i == len(sentences)-1 { o.logger.Warn("failed to write to stringbuilder", "error", err)
sentenceBuf.Reset()
sentenceBuf.WriteString(sentence.Text)
continue continue
} }
// Send complete sentence to TTS text := o.textBuffer.String()
if err := orator.Speak(sentence.Text); err != nil { sentences := tokenizer.Tokenize(text)
orator.GetLogger().Error("tts failed", "sentence", sentence.Text, "error", err) o.logger.Info("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
for i, sentence := range sentences {
if i == len(sentences)-1 {
o.textBuffer.Reset()
_, err := o.textBuffer.WriteString(sentence.Text)
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
continue
}
continue // if only one (often incomplete) sentence; wait for next chunk
}
o.logger.Info("calling Speak with sentence", "sent", sentence.Text)
if err := o.Speak(sentence.Text); err != nil {
o.logger.Error("tts failed", "sentence", sentence.Text, "error", err)
} }
} }
case <-TTSFlushChan: case <-TTSFlushChan:
o.logger.Info("got flushchan signal start")
// lln is done get the whole message out // lln is done get the whole message out
// FIXME: loses one token if len(TTSTextChan) > 0 { // otherwise might get stuck
for chunk := range TTSTextChan { for chunk := range TTSTextChan {
// orator.GetLogger().Info("flushing", "chunk", chunk) _, err := o.textBuffer.WriteString(chunk)
// sentenceBuf.WriteString(chunk) if err != nil {
remainder.WriteString(chunk) // I get text here o.logger.Warn("failed to write to stringbuilder", "error", err)
continue
}
if len(TTSTextChan) == 0 { if len(TTSTextChan) == 0 {
break break
} }
} }
}
// INFO: if there is a lot of text it will take some time to make with tts at once // INFO: if there is a lot of text it will take some time to make with tts at once
// to avoid this pause, it might be better to keep splitting on sentences // to avoid this pause, it might be better to keep splitting on sentences
// but keepinig in mind that remainder could be ommited by tokenizer // but keepinig in mind that remainder could be ommited by tokenizer
// Flush remaining text // Flush remaining text
remaining := remainder.String() remaining := o.textBuffer.String()
remainder.Reset() o.logger.Info("got flushchan signal", "rem", remaining)
defer o.textBuffer.Reset()
if remaining != "" { if remaining != "" {
// orator.GetLogger().Info("flushing", "remaining", remaining) o.logger.Info("calling Speak with remainder", "rem", remaining)
if err := orator.Speak(remaining); err != nil { if err := o.Speak(remaining); err != nil {
orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err) o.logger.Error("tts failed", "sentence", remaining, "error", err)
} }
} }
// case <-TTSDoneChan:
// orator.GetLogger().Info("orator got done signal")
// orator.Stop()
// // it that the best way to empty channel?
// close(TTSTextChan)
// TTSTextChan = make(chan string, 10000)
// return
} }
} }
} }
@@ -120,8 +131,8 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
Language: "a", Language: "a",
Voice: "af_bella(1)+af_sky(1)", Voice: "af_bella(1)+af_sky(1)",
} }
go readroutine(orator) go orator.readroutine()
go stoproutine(orator) go orator.stoproutine()
return orator return orator
} }
@@ -191,18 +202,13 @@ func (o *KokoroOrator) Speak(text string) error {
return nil return nil
} }
// TODO: stop works; but new stream does not start afterwards
func (o *KokoroOrator) Stop() { func (o *KokoroOrator) Stop() {
// speaker.Clear() // speaker.Clear()
o.logger.Info("attempted to stop orator", "orator", o) o.logger.Info("attempted to stop orator", "orator", o)
speaker.Lock() speaker.Lock()
defer speaker.Unlock() defer speaker.Unlock()
if o.currentStream != nil { if o.currentStream != nil {
o.currentStream.Paused = true // o.currentStream.Paused = true
o.currentStream.Streamer = nil o.currentStream.Streamer = nil
} }
} }
func (o *KokoroOrator) GetSBuilder() strings.Builder {
return o.textBuffer
}

1
extra/vad.go Normal file
View File

@@ -0,0 +1 @@
package extra