Fix (tts): interrupt
This commit is contained in:
23
extra/tts.go
23
extra/tts.go
@@ -30,6 +30,7 @@ var (
|
|||||||
TTSFlushChan = make(chan bool, 1)
|
TTSFlushChan = make(chan bool, 1)
|
||||||
TTSDoneChan = make(chan bool, 1)
|
TTSDoneChan = make(chan bool, 1)
|
||||||
// endsWithPunctuation = regexp.MustCompile(`[;.!?]$`)
|
// endsWithPunctuation = regexp.MustCompile(`[;.!?]$`)
|
||||||
|
threeOrMoreDashesRE = regexp.MustCompile(`-{3,}`)
|
||||||
)
|
)
|
||||||
|
|
||||||
// cleanText removes markdown and special characters that are not suitable for TTS
|
// cleanText removes markdown and special characters that are not suitable for TTS
|
||||||
@@ -43,20 +44,16 @@ func cleanText(text string) string {
|
|||||||
text = strings.ReplaceAll(text, "[", "") // Link brackets
|
text = strings.ReplaceAll(text, "[", "") // Link brackets
|
||||||
text = strings.ReplaceAll(text, "]", "") // Link brackets
|
text = strings.ReplaceAll(text, "]", "") // Link brackets
|
||||||
text = strings.ReplaceAll(text, "!", "") // Exclamation marks (if not punctuation)
|
text = strings.ReplaceAll(text, "!", "") // Exclamation marks (if not punctuation)
|
||||||
|
|
||||||
// Remove HTML tags using regex
|
// Remove HTML tags using regex
|
||||||
htmlTagRegex := regexp.MustCompile(`<[^>]*>`)
|
htmlTagRegex := regexp.MustCompile(`<[^>]*>`)
|
||||||
text = htmlTagRegex.ReplaceAllString(text, "")
|
text = htmlTagRegex.ReplaceAllString(text, "")
|
||||||
|
|
||||||
// Split text into lines to handle table separators
|
// Split text into lines to handle table separators
|
||||||
lines := strings.Split(text, "\n")
|
lines := strings.Split(text, "\n")
|
||||||
var filteredLines []string
|
var filteredLines []string
|
||||||
|
|
||||||
for _, line := range lines {
|
for _, line := range lines {
|
||||||
// Check if the line looks like a table separator (e.g., |----|, |===|, | - - - |)
|
// Check if the line looks like a table separator (e.g., |----|, |===|, | - - - |)
|
||||||
// A table separator typically contains only |, -, =, and spaces
|
// A table separator typically contains only |, -, =, and spaces
|
||||||
isTableSeparator := regexp.MustCompile(`^\s*\|\s*[-=\s]+\|\s*$`).MatchString(strings.TrimSpace(line))
|
isTableSeparator := regexp.MustCompile(`^\s*\|\s*[-=\s]+\|\s*$`).MatchString(strings.TrimSpace(line))
|
||||||
|
|
||||||
if !isTableSeparator {
|
if !isTableSeparator {
|
||||||
// If it's not a table separator, remove vertical bars but keep the content
|
// If it's not a table separator, remove vertical bars but keep the content
|
||||||
processedLine := strings.ReplaceAll(line, "|", "")
|
processedLine := strings.ReplaceAll(line, "|", "")
|
||||||
@@ -64,8 +61,8 @@ func cleanText(text string) string {
|
|||||||
}
|
}
|
||||||
// If it is a table separator, skip it (don't add to filteredLines)
|
// If it is a table separator, skip it (don't add to filteredLines)
|
||||||
}
|
}
|
||||||
|
|
||||||
text = strings.Join(filteredLines, "\n")
|
text = strings.Join(filteredLines, "\n")
|
||||||
|
text = threeOrMoreDashesRE.ReplaceAllString(text, "")
|
||||||
text = strings.TrimSpace(text) // Remove leading/trailing whitespace
|
text = strings.TrimSpace(text) // Remove leading/trailing whitespace
|
||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
@@ -89,6 +86,7 @@ type KokoroOrator struct {
|
|||||||
currentStream *beep.Ctrl // Added for playback control
|
currentStream *beep.Ctrl // Added for playback control
|
||||||
currentDone chan bool
|
currentDone chan bool
|
||||||
textBuffer strings.Builder
|
textBuffer strings.Builder
|
||||||
|
interrupt bool
|
||||||
// textBuffer bytes.Buffer
|
// textBuffer bytes.Buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -99,6 +97,7 @@ type GoogleTranslateOrator struct {
|
|||||||
currentStream *beep.Ctrl
|
currentStream *beep.Ctrl
|
||||||
currentDone chan bool
|
currentDone chan bool
|
||||||
textBuffer strings.Builder
|
textBuffer strings.Builder
|
||||||
|
interrupt bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *KokoroOrator) stoproutine() {
|
func (o *KokoroOrator) stoproutine() {
|
||||||
@@ -110,7 +109,9 @@ func (o *KokoroOrator) stoproutine() {
|
|||||||
for len(TTSTextChan) > 0 {
|
for len(TTSTextChan) > 0 {
|
||||||
<-TTSTextChan
|
<-TTSTextChan
|
||||||
}
|
}
|
||||||
|
o.textBuffer.Reset()
|
||||||
o.currentDone <- true
|
o.currentDone <- true
|
||||||
|
o.interrupt = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,6 +122,7 @@ func (o *KokoroOrator) readroutine() {
|
|||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case chunk := <-TTSTextChan:
|
case chunk := <-TTSTextChan:
|
||||||
|
o.interrupt = false
|
||||||
// sentenceBuf.WriteString(chunk)
|
// sentenceBuf.WriteString(chunk)
|
||||||
// text := sentenceBuf.String()
|
// text := sentenceBuf.String()
|
||||||
_, err := o.textBuffer.WriteString(chunk)
|
_, err := o.textBuffer.WriteString(chunk)
|
||||||
@@ -175,6 +177,9 @@ func (o *KokoroOrator) readroutine() {
|
|||||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||||
sentencesRem := tokenizer.Tokenize(remaining)
|
sentencesRem := tokenizer.Tokenize(remaining)
|
||||||
for _, rs := range sentencesRem { // to avoid dumping large volume of text
|
for _, rs := range sentencesRem { // to avoid dumping large volume of text
|
||||||
|
if o.interrupt {
|
||||||
|
break
|
||||||
|
}
|
||||||
if err := o.Speak(rs.Text); err != nil {
|
if err := o.Speak(rs.Text); err != nil {
|
||||||
o.logger.Error("tts failed", "sentence", rs, "error", err)
|
o.logger.Error("tts failed", "sentence", rs, "error", err)
|
||||||
}
|
}
|
||||||
@@ -307,11 +312,13 @@ func (o *GoogleTranslateOrator) stoproutine() {
|
|||||||
<-TTSDoneChan
|
<-TTSDoneChan
|
||||||
o.logger.Debug("orator got done signal")
|
o.logger.Debug("orator got done signal")
|
||||||
o.Stop()
|
o.Stop()
|
||||||
o.currentDone <- true
|
|
||||||
// drain the channel
|
// drain the channel
|
||||||
for len(TTSTextChan) > 0 {
|
for len(TTSTextChan) > 0 {
|
||||||
<-TTSTextChan
|
<-TTSTextChan
|
||||||
}
|
}
|
||||||
|
o.textBuffer.Reset()
|
||||||
|
o.currentDone <- true
|
||||||
|
o.interrupt = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -320,6 +327,7 @@ func (o *GoogleTranslateOrator) readroutine() {
|
|||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case chunk := <-TTSTextChan:
|
case chunk := <-TTSTextChan:
|
||||||
|
o.interrupt = false
|
||||||
_, err := o.textBuffer.WriteString(chunk)
|
_, err := o.textBuffer.WriteString(chunk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||||
@@ -371,6 +379,9 @@ func (o *GoogleTranslateOrator) readroutine() {
|
|||||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||||
sentencesRem := tokenizer.Tokenize(remaining)
|
sentencesRem := tokenizer.Tokenize(remaining)
|
||||||
for _, rs := range sentencesRem { // to avoid dumping large volume of text
|
for _, rs := range sentencesRem { // to avoid dumping large volume of text
|
||||||
|
if o.interrupt {
|
||||||
|
break
|
||||||
|
}
|
||||||
if err := o.Speak(rs.Text); err != nil {
|
if err := o.Speak(rs.Text); err != nil {
|
||||||
o.logger.Error("tts failed", "sentence", rs.Text, "error", err)
|
o.logger.Error("tts failed", "sentence", rs.Text, "error", err)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user