Chore: cleanup logs
This commit is contained in:
@@ -50,9 +50,7 @@ var kokoroPhonemeMap = map[string]int{
|
||||
}
|
||||
|
||||
func (o *KokoroONNXOrator) ensureInitialized(modelPath string) error {
|
||||
o.logger.Debug("ensureInitialized called", "modelPath", modelPath)
|
||||
if o.modelLoaded {
|
||||
o.logger.Debug("model already loaded")
|
||||
return nil
|
||||
}
|
||||
o.mu.Lock()
|
||||
@@ -143,7 +141,6 @@ func (o *KokoroONNXOrator) ensureInitialized(modelPath string) error {
|
||||
}
|
||||
|
||||
func (o *KokoroONNXOrator) textToPhonemes(text string) (string, error) {
|
||||
o.logger.Debug("converting text to phonemes", "text", text)
|
||||
cmd := exec.Command(o.espeakCmd, "-x", "-q", text)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
@@ -152,18 +149,14 @@ func (o *KokoroONNXOrator) textToPhonemes(text string) (string, error) {
|
||||
}
|
||||
|
||||
phonemeStr := strings.TrimSpace(string(output))
|
||||
o.logger.Debug("phonemes generated", "phonemes", phonemeStr)
|
||||
return phonemeStr, nil
|
||||
}
|
||||
|
||||
func (o *KokoroONNXOrator) phonemesToTokens(phonemeStr string) ([]int, error) {
|
||||
o.logger.Debug("converting phonemes to tokens", "phonemes", phonemeStr)
|
||||
|
||||
if phonemeStr == "" {
|
||||
o.logger.Error("empty phoneme string")
|
||||
return nil, fmt.Errorf("empty phoneme string")
|
||||
}
|
||||
|
||||
// Iterate over each character in the phoneme string
|
||||
tokens := make([]int, 0)
|
||||
for _, ch := range phonemeStr {
|
||||
@@ -172,18 +165,14 @@ func (o *KokoroONNXOrator) phonemesToTokens(phonemeStr string) ([]int, error) {
|
||||
tokens = append(tokens, tokenID)
|
||||
}
|
||||
}
|
||||
|
||||
if len(tokens) == 0 {
|
||||
o.logger.Error("no phonemes mapped to tokens", "phonemeStr", phonemeStr)
|
||||
return nil, fmt.Errorf("no valid phonemes mapped to tokens")
|
||||
}
|
||||
o.logger.Debug("tokens generated", "count", len(tokens), "tokens", tokens)
|
||||
return tokens, nil
|
||||
}
|
||||
|
||||
func (o *KokoroONNXOrator) generateAudio(text string) ([]float32, error) {
|
||||
|
||||
o.logger.Debug("generateAudio called", "text", text, "speed", o.speed)
|
||||
if err := o.ensureInitialized(o.modelPath); err != nil {
|
||||
o.logger.Error("ensureInitialized failed", "error", err)
|
||||
return nil, err
|
||||
@@ -203,7 +192,6 @@ func (o *KokoroONNXOrator) generateAudio(text string) ([]float32, error) {
|
||||
}
|
||||
tokens = append([]int{0}, tokens...)
|
||||
tokens = append(tokens, 0)
|
||||
o.logger.Debug("tokens prepared", "count", len(tokens))
|
||||
inputIDs := make([]int64, len(tokens))
|
||||
for i, t := range tokens {
|
||||
inputIDs[i] = int64(t)
|
||||
@@ -217,7 +205,6 @@ func (o *KokoroONNXOrator) generateAudio(text string) ([]float32, error) {
|
||||
return nil, fmt.Errorf("failed to create input tensor: %w", err)
|
||||
}
|
||||
defer func() { _ = inputTensor.Destroy() }()
|
||||
o.logger.Debug("input tensor created", "shape", fmt.Sprintf("[1,%d]", len(inputIDs)))
|
||||
styleTensor, err := onnxruntime_go.NewTensor[float32](
|
||||
onnxruntime_go.NewShape(1, 256),
|
||||
o.styleVector,
|
||||
@@ -236,7 +223,6 @@ func (o *KokoroONNXOrator) generateAudio(text string) ([]float32, error) {
|
||||
return nil, fmt.Errorf("failed to create speed tensor: %w", err)
|
||||
}
|
||||
defer func() { _ = speedTensor.Destroy() }()
|
||||
o.logger.Debug("speed tensor created", "speed", o.speed)
|
||||
outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](
|
||||
onnxruntime_go.NewShape(1, 512),
|
||||
)
|
||||
@@ -245,8 +231,6 @@ func (o *KokoroONNXOrator) generateAudio(text string) ([]float32, error) {
|
||||
return nil, fmt.Errorf("failed to create output tensor: %w", err)
|
||||
}
|
||||
defer func() { _ = outputTensor.Destroy() }()
|
||||
o.logger.Debug("output tensor created", "shape", "[1,512]")
|
||||
o.logger.Info("running ONNX inference", "input_len", len(inputIDs))
|
||||
err = o.session.Run(
|
||||
[]onnxruntime_go.Value{inputTensor, styleTensor, speedTensor},
|
||||
[]onnxruntime_go.Value{outputTensor},
|
||||
@@ -255,26 +239,22 @@ func (o *KokoroONNXOrator) generateAudio(text string) ([]float32, error) {
|
||||
o.logger.Error("ONNX inference failed", "error", err)
|
||||
return nil, fmt.Errorf("ONNX inference failed: %w", err)
|
||||
}
|
||||
o.logger.Debug("ONNX inference completed")
|
||||
audioData := outputTensor.GetData()
|
||||
if len(audioData) == 0 {
|
||||
o.logger.Error("empty audio output from ONNX")
|
||||
return nil, fmt.Errorf("empty audio output")
|
||||
}
|
||||
o.logger.Debug("audio generated", "samples", len(audioData))
|
||||
audio := make([]float32, len(audioData))
|
||||
copy(audio, audioData)
|
||||
return audio, nil
|
||||
}
|
||||
|
||||
func (o *KokoroONNXOrator) Speak(text string) error {
|
||||
o.logger.Debug("KokoroONNX Speak called", "text_len", len(text))
|
||||
audio, err := o.generateAudio(text)
|
||||
if err != nil {
|
||||
o.logger.Error("audio generation failed", "error", err)
|
||||
return fmt.Errorf("audio generation failed: %w", err)
|
||||
}
|
||||
o.logger.Debug("audio ready for playback", "samples", len(audio))
|
||||
// Create streamer for encoding
|
||||
encodeStreamer := beep.StreamerFunc(func(samples [][2]float64) (n int, ok bool) {
|
||||
for i := range samples {
|
||||
@@ -296,14 +276,12 @@ func (o *KokoroONNXOrator) Speak(text string) error {
|
||||
o.logger.Error("wav encoding failed", "error", err)
|
||||
return fmt.Errorf("wav encoding failed: %w", err)
|
||||
}
|
||||
o.logger.Debug("wav encoded", "size", buf.Len())
|
||||
decodedStreamer, format, err := wav.Decode(bytes.NewReader(buf.Bytes()))
|
||||
if err != nil {
|
||||
o.logger.Error("wav decode failed", "error", err)
|
||||
return fmt.Errorf("wav decode failed: %w", err)
|
||||
}
|
||||
defer decodedStreamer.Close()
|
||||
o.logger.Debug("wav decoded", "format", format)
|
||||
if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
|
||||
o.logger.Error("speaker init failed", "error", err)
|
||||
return fmt.Errorf("speaker init failed: %w", err)
|
||||
@@ -313,7 +291,6 @@ func (o *KokoroONNXOrator) Speak(text string) error {
|
||||
o.mu.Lock()
|
||||
o.currentDone = done
|
||||
o.currentStream = &beep.Ctrl{Streamer: beep.Seq(decodedStreamer, beep.Callback(func() {
|
||||
o.logger.Debug("playback finished")
|
||||
o.mu.Lock()
|
||||
close(done)
|
||||
o.currentStream = nil
|
||||
@@ -323,12 +300,10 @@ func (o *KokoroONNXOrator) Speak(text string) error {
|
||||
o.mu.Unlock()
|
||||
speaker.Play(o.currentStream)
|
||||
<-done
|
||||
o.logger.Debug("Speak completed")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *KokoroONNXOrator) Stop() {
|
||||
o.logger.Debug("stopping KokoroONNX orator")
|
||||
speaker.Lock()
|
||||
defer speaker.Unlock()
|
||||
o.mu.Lock()
|
||||
@@ -343,10 +318,8 @@ func (o *KokoroONNXOrator) GetLogger() *slog.Logger {
|
||||
}
|
||||
|
||||
func (o *KokoroONNXOrator) stoproutine() {
|
||||
o.logger.Debug("KokoroONNX stoproutine started")
|
||||
for {
|
||||
<-TTSDoneChan
|
||||
o.logger.Debug("KokoroONNX got done signal")
|
||||
o.Stop()
|
||||
for len(TTSTextChan) > 0 {
|
||||
<-TTSTextChan
|
||||
@@ -361,17 +334,14 @@ func (o *KokoroONNXOrator) stoproutine() {
|
||||
}
|
||||
o.interrupt = true
|
||||
o.mu.Unlock()
|
||||
o.logger.Debug("KokoroONNX stoproutine finished")
|
||||
}
|
||||
}
|
||||
|
||||
func (o *KokoroONNXOrator) readroutine() {
|
||||
o.logger.Debug("KokoroONNX readroutine started")
|
||||
tokenizer, _ := english.NewSentenceTokenizer(nil)
|
||||
for {
|
||||
select {
|
||||
case chunk := <-TTSTextChan:
|
||||
o.logger.Debug("KokoroONNX received chunk", "chunk_len", len(chunk))
|
||||
o.mu.Lock()
|
||||
o.interrupt = false
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
@@ -382,9 +352,7 @@ func (o *KokoroONNXOrator) readroutine() {
|
||||
}
|
||||
text := o.textBuffer.String()
|
||||
sentences := tokenizer.Tokenize(text)
|
||||
o.logger.Debug("KokoroONNX tokenized", "total_sentences", len(sentences), "buffer", text)
|
||||
if len(sentences) <= 1 {
|
||||
o.logger.Debug("KokoroONNX not enough sentences, waiting")
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
@@ -392,14 +360,12 @@ func (o *KokoroONNXOrator) readroutine() {
|
||||
remaining := sentences[len(sentences)-1].Text
|
||||
o.textBuffer.Reset()
|
||||
o.textBuffer.WriteString(remaining)
|
||||
o.logger.Debug("KokoroONNX processing sentences", "count", len(completeSentences))
|
||||
o.mu.Unlock()
|
||||
for _, sentence := range completeSentences {
|
||||
o.mu.Lock()
|
||||
interrupted := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupted {
|
||||
o.logger.Debug("KokoroONNX interrupted, exiting")
|
||||
return
|
||||
}
|
||||
cleanedText := models.CleanText(sentence.Text)
|
||||
@@ -412,7 +378,6 @@ func (o *KokoroONNXOrator) readroutine() {
|
||||
}
|
||||
}
|
||||
case <-TTSFlushChan:
|
||||
o.logger.Debug("KokoroONNX flush signal")
|
||||
if len(TTSTextChan) > 0 {
|
||||
for chunk := range TTSTextChan {
|
||||
o.mu.Lock()
|
||||
|
||||
Reference in New Issue
Block a user