Enha: clean text before sending to tts
This commit is contained in:
68
extra/tts.go
68
extra/tts.go
@@ -13,6 +13,7 @@ import (
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -31,6 +32,44 @@ var (
|
||||
// endsWithPunctuation = regexp.MustCompile(`[;.!?]$`)
|
||||
)
|
||||
|
||||
// cleanText removes markdown and special characters that are not suitable for TTS
|
||||
func cleanText(text string) string {
|
||||
// Remove markdown-like characters that might interfere with TTS
|
||||
text = strings.ReplaceAll(text, "*", "") // Bold/italic markers
|
||||
text = strings.ReplaceAll(text, "#", "") // Headers
|
||||
text = strings.ReplaceAll(text, "_", "") // Underline/italic markers
|
||||
text = strings.ReplaceAll(text, "~", "") // Strikethrough markers
|
||||
text = strings.ReplaceAll(text, "`", "") // Code markers
|
||||
text = strings.ReplaceAll(text, "[", "") // Link brackets
|
||||
text = strings.ReplaceAll(text, "]", "") // Link brackets
|
||||
text = strings.ReplaceAll(text, "!", "") // Exclamation marks (if not punctuation)
|
||||
|
||||
// Remove HTML tags using regex
|
||||
htmlTagRegex := regexp.MustCompile(`<[^>]*>`)
|
||||
text = htmlTagRegex.ReplaceAllString(text, "")
|
||||
|
||||
// Split text into lines to handle table separators
|
||||
lines := strings.Split(text, "\n")
|
||||
var filteredLines []string
|
||||
|
||||
for _, line := range lines {
|
||||
// Check if the line looks like a table separator (e.g., |----|, |===|, | - - - |)
|
||||
// A table separator typically contains only |, -, =, and spaces
|
||||
isTableSeparator := regexp.MustCompile(`^\s*\|\s*[-=\s]+\|\s*$`).MatchString(strings.TrimSpace(line))
|
||||
|
||||
if !isTableSeparator {
|
||||
// If it's not a table separator, remove vertical bars but keep the content
|
||||
processedLine := strings.ReplaceAll(line, "|", "")
|
||||
filteredLines = append(filteredLines, processedLine)
|
||||
}
|
||||
// If it is a table separator, skip it (don't add to filteredLines)
|
||||
}
|
||||
|
||||
text = strings.Join(filteredLines, "\n")
|
||||
text = strings.TrimSpace(text) // Remove leading/trailing whitespace
|
||||
return text
|
||||
}
|
||||
|
||||
type Orator interface {
|
||||
Speak(text string) error
|
||||
Stop()
|
||||
@@ -97,9 +136,13 @@ func (o *KokoroOrator) readroutine() {
|
||||
}
|
||||
continue // if only one (often incomplete) sentence; wait for next chunk
|
||||
}
|
||||
o.logger.Debug("calling Speak with sentence", "sent", sentence.Text)
|
||||
if err := o.Speak(sentence.Text); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", sentence.Text, "error", err)
|
||||
cleanedText := cleanText(sentence.Text)
|
||||
if cleanedText == "" {
|
||||
continue // Skip empty text after cleaning
|
||||
}
|
||||
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
|
||||
if err := o.Speak(cleanedText); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
|
||||
}
|
||||
}
|
||||
case <-TTSFlushChan:
|
||||
@@ -122,6 +165,7 @@ func (o *KokoroOrator) readroutine() {
|
||||
// but keepinig in mind that remainder could be ommited by tokenizer
|
||||
// Flush remaining text
|
||||
remaining := o.textBuffer.String()
|
||||
remaining = cleanText(remaining)
|
||||
o.textBuffer.Reset()
|
||||
if remaining != "" {
|
||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||
@@ -138,14 +182,12 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
|
||||
if provider == "" {
|
||||
provider = "kokoro"
|
||||
}
|
||||
|
||||
switch strings.ToLower(provider) {
|
||||
case "google", "google-translate", "google_translate":
|
||||
language := cfg.TTS_LANGUAGE
|
||||
if language == "" {
|
||||
language = "en"
|
||||
}
|
||||
|
||||
speech := &google_translate_tts.Speech{
|
||||
Folder: os.TempDir() + "/gf-lt-tts", // Temporary directory for caching
|
||||
Language: language,
|
||||
@@ -153,7 +195,6 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
|
||||
Speed: cfg.TTS_SPEED,
|
||||
Handler: &handlers.Beep{},
|
||||
}
|
||||
|
||||
orator := &GoogleTranslateOrator{
|
||||
logger: log,
|
||||
speech: speech,
|
||||
@@ -287,9 +328,13 @@ func (o *GoogleTranslateOrator) readroutine() {
|
||||
}
|
||||
continue // if only one (often incomplete) sentence; wait for next chunk
|
||||
}
|
||||
o.logger.Debug("calling Speak with sentence", "sent", sentence.Text)
|
||||
if err := o.Speak(sentence.Text); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", sentence.Text, "error", err)
|
||||
cleanedText := cleanText(sentence.Text)
|
||||
if cleanedText == "" {
|
||||
continue // Skip empty text after cleaning
|
||||
}
|
||||
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
|
||||
if err := o.Speak(cleanedText); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
|
||||
}
|
||||
}
|
||||
case <-TTSFlushChan:
|
||||
@@ -307,11 +352,8 @@ func (o *GoogleTranslateOrator) readroutine() {
|
||||
}
|
||||
}
|
||||
}
|
||||
// INFO: if there is a lot of text it will take some time to make with tts at once
|
||||
// to avoid this pause, it might be better to keep splitting on sentences
|
||||
// but keepinig in mind that remainder could be ommited by tokenizer
|
||||
// Flush remaining text
|
||||
remaining := o.textBuffer.String()
|
||||
remaining = cleanText(remaining)
|
||||
o.textBuffer.Reset()
|
||||
if remaining != "" {
|
||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||
|
||||
40
extra/tts_test.go
Normal file
40
extra/tts_test.go
Normal file
@@ -0,0 +1,40 @@
|
||||
//go:build extra
|
||||
// +build extra
|
||||
|
||||
package extra
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCleanText(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"Hello world", "Hello world"},
|
||||
{"**Bold text**", "Bold text"},
|
||||
{"*Italic text*", "Italic text"},
|
||||
{"# Header", "Header"},
|
||||
{"_Underlined text_", "Underlined text"},
|
||||
{"~Strikethrough text~", "Strikethrough text"},
|
||||
{"`Code text`", "Code text"},
|
||||
{"[Link text](url)", "Link text(url)"},
|
||||
{"Mixed *markdown* and #headers#!", "Mixed markdown and headers"},
|
||||
{"<html>tags</html>", "tags"},
|
||||
{"|---|", ""}, // Table separator
|
||||
{"|====|", ""}, // Table separator with equals
|
||||
{"| - - - |", ""}, // Table separator with spaced dashes
|
||||
{"| cell1 | cell2 |", "cell1 cell2"}, // Table row with content
|
||||
{" Trailing spaces ", "Trailing spaces"},
|
||||
{"", ""},
|
||||
{"***", ""},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
result := cleanText(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("cleanText(%q) = %q; expected %q", test.input, result, test.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user