Enha: clean text before sending to tts
This commit is contained in:
68
extra/tts.go
68
extra/tts.go
@@ -13,6 +13,7 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -31,6 +32,44 @@ var (
|
|||||||
// endsWithPunctuation = regexp.MustCompile(`[;.!?]$`)
|
// endsWithPunctuation = regexp.MustCompile(`[;.!?]$`)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// cleanText removes markdown and special characters that are not suitable for TTS
|
||||||
|
func cleanText(text string) string {
|
||||||
|
// Remove markdown-like characters that might interfere with TTS
|
||||||
|
text = strings.ReplaceAll(text, "*", "") // Bold/italic markers
|
||||||
|
text = strings.ReplaceAll(text, "#", "") // Headers
|
||||||
|
text = strings.ReplaceAll(text, "_", "") // Underline/italic markers
|
||||||
|
text = strings.ReplaceAll(text, "~", "") // Strikethrough markers
|
||||||
|
text = strings.ReplaceAll(text, "`", "") // Code markers
|
||||||
|
text = strings.ReplaceAll(text, "[", "") // Link brackets
|
||||||
|
text = strings.ReplaceAll(text, "]", "") // Link brackets
|
||||||
|
text = strings.ReplaceAll(text, "!", "") // Exclamation marks (if not punctuation)
|
||||||
|
|
||||||
|
// Remove HTML tags using regex
|
||||||
|
htmlTagRegex := regexp.MustCompile(`<[^>]*>`)
|
||||||
|
text = htmlTagRegex.ReplaceAllString(text, "")
|
||||||
|
|
||||||
|
// Split text into lines to handle table separators
|
||||||
|
lines := strings.Split(text, "\n")
|
||||||
|
var filteredLines []string
|
||||||
|
|
||||||
|
for _, line := range lines {
|
||||||
|
// Check if the line looks like a table separator (e.g., |----|, |===|, | - - - |)
|
||||||
|
// A table separator typically contains only |, -, =, and spaces
|
||||||
|
isTableSeparator := regexp.MustCompile(`^\s*\|\s*[-=\s]+\|\s*$`).MatchString(strings.TrimSpace(line))
|
||||||
|
|
||||||
|
if !isTableSeparator {
|
||||||
|
// If it's not a table separator, remove vertical bars but keep the content
|
||||||
|
processedLine := strings.ReplaceAll(line, "|", "")
|
||||||
|
filteredLines = append(filteredLines, processedLine)
|
||||||
|
}
|
||||||
|
// If it is a table separator, skip it (don't add to filteredLines)
|
||||||
|
}
|
||||||
|
|
||||||
|
text = strings.Join(filteredLines, "\n")
|
||||||
|
text = strings.TrimSpace(text) // Remove leading/trailing whitespace
|
||||||
|
return text
|
||||||
|
}
|
||||||
|
|
||||||
type Orator interface {
|
type Orator interface {
|
||||||
Speak(text string) error
|
Speak(text string) error
|
||||||
Stop()
|
Stop()
|
||||||
@@ -97,9 +136,13 @@ func (o *KokoroOrator) readroutine() {
|
|||||||
}
|
}
|
||||||
continue // if only one (often incomplete) sentence; wait for next chunk
|
continue // if only one (often incomplete) sentence; wait for next chunk
|
||||||
}
|
}
|
||||||
o.logger.Debug("calling Speak with sentence", "sent", sentence.Text)
|
cleanedText := cleanText(sentence.Text)
|
||||||
if err := o.Speak(sentence.Text); err != nil {
|
if cleanedText == "" {
|
||||||
o.logger.Error("tts failed", "sentence", sentence.Text, "error", err)
|
continue // Skip empty text after cleaning
|
||||||
|
}
|
||||||
|
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
|
||||||
|
if err := o.Speak(cleanedText); err != nil {
|
||||||
|
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case <-TTSFlushChan:
|
case <-TTSFlushChan:
|
||||||
@@ -122,6 +165,7 @@ func (o *KokoroOrator) readroutine() {
|
|||||||
// but keepinig in mind that remainder could be ommited by tokenizer
|
// but keepinig in mind that remainder could be ommited by tokenizer
|
||||||
// Flush remaining text
|
// Flush remaining text
|
||||||
remaining := o.textBuffer.String()
|
remaining := o.textBuffer.String()
|
||||||
|
remaining = cleanText(remaining)
|
||||||
o.textBuffer.Reset()
|
o.textBuffer.Reset()
|
||||||
if remaining != "" {
|
if remaining != "" {
|
||||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||||
@@ -138,14 +182,12 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
|
|||||||
if provider == "" {
|
if provider == "" {
|
||||||
provider = "kokoro"
|
provider = "kokoro"
|
||||||
}
|
}
|
||||||
|
|
||||||
switch strings.ToLower(provider) {
|
switch strings.ToLower(provider) {
|
||||||
case "google", "google-translate", "google_translate":
|
case "google", "google-translate", "google_translate":
|
||||||
language := cfg.TTS_LANGUAGE
|
language := cfg.TTS_LANGUAGE
|
||||||
if language == "" {
|
if language == "" {
|
||||||
language = "en"
|
language = "en"
|
||||||
}
|
}
|
||||||
|
|
||||||
speech := &google_translate_tts.Speech{
|
speech := &google_translate_tts.Speech{
|
||||||
Folder: os.TempDir() + "/gf-lt-tts", // Temporary directory for caching
|
Folder: os.TempDir() + "/gf-lt-tts", // Temporary directory for caching
|
||||||
Language: language,
|
Language: language,
|
||||||
@@ -153,7 +195,6 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
|
|||||||
Speed: cfg.TTS_SPEED,
|
Speed: cfg.TTS_SPEED,
|
||||||
Handler: &handlers.Beep{},
|
Handler: &handlers.Beep{},
|
||||||
}
|
}
|
||||||
|
|
||||||
orator := &GoogleTranslateOrator{
|
orator := &GoogleTranslateOrator{
|
||||||
logger: log,
|
logger: log,
|
||||||
speech: speech,
|
speech: speech,
|
||||||
@@ -287,9 +328,13 @@ func (o *GoogleTranslateOrator) readroutine() {
|
|||||||
}
|
}
|
||||||
continue // if only one (often incomplete) sentence; wait for next chunk
|
continue // if only one (often incomplete) sentence; wait for next chunk
|
||||||
}
|
}
|
||||||
o.logger.Debug("calling Speak with sentence", "sent", sentence.Text)
|
cleanedText := cleanText(sentence.Text)
|
||||||
if err := o.Speak(sentence.Text); err != nil {
|
if cleanedText == "" {
|
||||||
o.logger.Error("tts failed", "sentence", sentence.Text, "error", err)
|
continue // Skip empty text after cleaning
|
||||||
|
}
|
||||||
|
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
|
||||||
|
if err := o.Speak(cleanedText); err != nil {
|
||||||
|
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case <-TTSFlushChan:
|
case <-TTSFlushChan:
|
||||||
@@ -307,11 +352,8 @@ func (o *GoogleTranslateOrator) readroutine() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// INFO: if there is a lot of text it will take some time to make with tts at once
|
|
||||||
// to avoid this pause, it might be better to keep splitting on sentences
|
|
||||||
// but keepinig in mind that remainder could be ommited by tokenizer
|
|
||||||
// Flush remaining text
|
|
||||||
remaining := o.textBuffer.String()
|
remaining := o.textBuffer.String()
|
||||||
|
remaining = cleanText(remaining)
|
||||||
o.textBuffer.Reset()
|
o.textBuffer.Reset()
|
||||||
if remaining != "" {
|
if remaining != "" {
|
||||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||||
|
|||||||
40
extra/tts_test.go
Normal file
40
extra/tts_test.go
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
//go:build extra
|
||||||
|
// +build extra
|
||||||
|
|
||||||
|
package extra
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCleanText(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{"Hello world", "Hello world"},
|
||||||
|
{"**Bold text**", "Bold text"},
|
||||||
|
{"*Italic text*", "Italic text"},
|
||||||
|
{"# Header", "Header"},
|
||||||
|
{"_Underlined text_", "Underlined text"},
|
||||||
|
{"~Strikethrough text~", "Strikethrough text"},
|
||||||
|
{"`Code text`", "Code text"},
|
||||||
|
{"[Link text](url)", "Link text(url)"},
|
||||||
|
{"Mixed *markdown* and #headers#!", "Mixed markdown and headers"},
|
||||||
|
{"<html>tags</html>", "tags"},
|
||||||
|
{"|---|", ""}, // Table separator
|
||||||
|
{"|====|", ""}, // Table separator with equals
|
||||||
|
{"| - - - |", ""}, // Table separator with spaced dashes
|
||||||
|
{"| cell1 | cell2 |", "cell1 cell2"}, // Table row with content
|
||||||
|
{" Trailing spaces ", "Trailing spaces"},
|
||||||
|
{"", ""},
|
||||||
|
{"***", ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
result := cleanText(test.input)
|
||||||
|
if result != test.expected {
|
||||||
|
t.Errorf("cleanText(%q) = %q; expected %q", test.input, result, test.expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user