Refactor: cleanup stt mess, config use
This commit is contained in:
16
bot.go
16
bot.go
@@ -149,7 +149,7 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
// resp, err := httpClient.Post(cfg.CurrentAPI, "application/json", body)
|
// resp, err := httpClient.Post(cfg.CurrentAPI, "application/json", body)
|
||||||
resp, err := httpClient.Do(req)
|
resp, err := httpClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("llamacpp api", "error", err, "body", string(bodyBytes))
|
logger.Error("llamacpp api", "error", err)
|
||||||
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil {
|
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil {
|
||||||
logger.Error("failed to notify", "error", err)
|
logger.Error("failed to notify", "error", err)
|
||||||
}
|
}
|
||||||
@@ -498,6 +498,7 @@ func init() {
|
|||||||
//
|
//
|
||||||
logLevel.Set(slog.LevelInfo)
|
logLevel.Set(slog.LevelInfo)
|
||||||
logger = slog.New(slog.NewTextHandler(logfile, &slog.HandlerOptions{Level: logLevel}))
|
logger = slog.New(slog.NewTextHandler(logfile, &slog.HandlerOptions{Level: logLevel}))
|
||||||
|
// TODO: rename and/or put in cfg
|
||||||
store = storage.NewProviderSQL("test.db", logger)
|
store = storage.NewProviderSQL("test.db", logger)
|
||||||
if store == nil {
|
if store == nil {
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
@@ -511,7 +512,7 @@ func init() {
|
|||||||
}
|
}
|
||||||
lastChat := loadOldChatOrGetNew()
|
lastChat := loadOldChatOrGetNew()
|
||||||
chatBody = &models.ChatBody{
|
chatBody = &models.ChatBody{
|
||||||
Model: "modl_name",
|
Model: "modelname",
|
||||||
Stream: true,
|
Stream: true,
|
||||||
Messages: lastChat,
|
Messages: lastChat,
|
||||||
}
|
}
|
||||||
@@ -522,9 +523,10 @@ func init() {
|
|||||||
}
|
}
|
||||||
choseChunkParser()
|
choseChunkParser()
|
||||||
httpClient = createClient(time.Second * 15)
|
httpClient = createClient(time.Second * 15)
|
||||||
// TODO: check config for orator
|
if cfg.TTS_ENABLED {
|
||||||
orator = extra.InitOrator(logger, "http://localhost:8880/v1/audio/speech")
|
orator = extra.InitOrator(logger, cfg.TTS_URL)
|
||||||
asr = extra.NewWhisperSTT(logger, "http://localhost:8081/inference", 44100)
|
}
|
||||||
// go runModelNameTicker(time.Second * 120)
|
if cfg.STT_ENABLED {
|
||||||
// tempLoad()
|
asr = extra.NewWhisperSTT(logger, cfg.STT_URL, 16000)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,3 +15,6 @@ RAGWorkers = 5
|
|||||||
# extra tts
|
# extra tts
|
||||||
TTS_ENABLED = false
|
TTS_ENABLED = false
|
||||||
TTS_URL = "http://localhost:8880/v1/audio/speech"
|
TTS_URL = "http://localhost:8880/v1/audio/speech"
|
||||||
|
# extra stt
|
||||||
|
STT_ENABLED = false
|
||||||
|
STT_URL = "http://localhost:8081/inference"
|
||||||
|
|||||||
@@ -42,6 +42,9 @@ type Config struct {
|
|||||||
// TTS
|
// TTS
|
||||||
TTS_URL string `toml:"TTS_URL"`
|
TTS_URL string `toml:"TTS_URL"`
|
||||||
TTS_ENABLED bool `toml:"TTS_ENABLED"`
|
TTS_ENABLED bool `toml:"TTS_ENABLED"`
|
||||||
|
// STT
|
||||||
|
STT_URL string `toml:"STT_URL"`
|
||||||
|
STT_ENABLED bool `toml:"STT_ENABLED"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadConfigOrDefault(fn string) *Config {
|
func LoadConfigOrDefault(fn string) *Config {
|
||||||
|
|||||||
32
extra/stt.go
32
extra/stt.go
@@ -9,7 +9,7 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"mime/multipart"
|
"mime/multipart"
|
||||||
"net/http"
|
"net/http"
|
||||||
"time"
|
"strings"
|
||||||
|
|
||||||
"github.com/gordonklaus/portaudio"
|
"github.com/gordonklaus/portaudio"
|
||||||
)
|
)
|
||||||
@@ -28,8 +28,7 @@ type WhisperSTT struct {
|
|||||||
logger *slog.Logger
|
logger *slog.Logger
|
||||||
ServerURL string
|
ServerURL string
|
||||||
SampleRate int
|
SampleRate int
|
||||||
RawBuffer *bytes.Buffer
|
AudioBuffer *bytes.Buffer
|
||||||
WavBuffer *bytes.Buffer
|
|
||||||
streamer StreamCloser
|
streamer StreamCloser
|
||||||
recording bool
|
recording bool
|
||||||
}
|
}
|
||||||
@@ -39,8 +38,7 @@ func NewWhisperSTT(logger *slog.Logger, serverURL string, sampleRate int) *Whisp
|
|||||||
logger: logger,
|
logger: logger,
|
||||||
ServerURL: serverURL,
|
ServerURL: serverURL,
|
||||||
SampleRate: sampleRate,
|
SampleRate: sampleRate,
|
||||||
RawBuffer: new(bytes.Buffer),
|
AudioBuffer: new(bytes.Buffer),
|
||||||
WavBuffer: new(bytes.Buffer),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -54,17 +52,14 @@ func (stt *WhisperSTT) StartRecording() error {
|
|||||||
|
|
||||||
func (stt *WhisperSTT) StopRecording() (string, error) {
|
func (stt *WhisperSTT) StopRecording() (string, error) {
|
||||||
stt.recording = false
|
stt.recording = false
|
||||||
time.Sleep(time.Millisecond * 200) // this is not the way
|
|
||||||
// wait loop to finish?
|
// wait loop to finish?
|
||||||
if stt.RawBuffer == nil {
|
if stt.AudioBuffer == nil {
|
||||||
err := errors.New("unexpected nil RawBuffer")
|
err := errors.New("unexpected nil AudioBuffer")
|
||||||
stt.logger.Error(err.Error())
|
stt.logger.Error(err.Error())
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
// Create WAV header first
|
// Create WAV header first
|
||||||
stt.writeWavHeader(stt.WavBuffer, len(stt.RawBuffer.Bytes())) // Write initial header with 0 size
|
body := &bytes.Buffer{}
|
||||||
stt.WavBuffer.Write(stt.RawBuffer.Bytes())
|
|
||||||
body := &bytes.Buffer{} // third buffer?
|
|
||||||
writer := multipart.NewWriter(body)
|
writer := multipart.NewWriter(body)
|
||||||
// Add audio file part
|
// Add audio file part
|
||||||
part, err := writer.CreateFormFile("file", "recording.wav")
|
part, err := writer.CreateFormFile("file", "recording.wav")
|
||||||
@@ -72,11 +67,15 @@ func (stt *WhisperSTT) StopRecording() (string, error) {
|
|||||||
stt.logger.Error("fn: StopRecording", "error", err)
|
stt.logger.Error("fn: StopRecording", "error", err)
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
_, err = io.Copy(part, stt.WavBuffer)
|
// Stream directly to multipart writer: header + raw data
|
||||||
if err != nil {
|
dataSize := stt.AudioBuffer.Len()
|
||||||
|
stt.writeWavHeader(part, dataSize)
|
||||||
|
if _, err := io.Copy(part, stt.AudioBuffer); err != nil {
|
||||||
stt.logger.Error("fn: StopRecording", "error", err)
|
stt.logger.Error("fn: StopRecording", "error", err)
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
// Reset buffer for next recording
|
||||||
|
stt.AudioBuffer.Reset()
|
||||||
// Add response format field
|
// Add response format field
|
||||||
err = writer.WriteField("response_format", "text")
|
err = writer.WriteField("response_format", "text")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -95,13 +94,12 @@ func (stt *WhisperSTT) StopRecording() (string, error) {
|
|||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
// Read and print response
|
// Read and print response
|
||||||
responseText, err := io.ReadAll(resp.Body)
|
responseTextBytes, err := io.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
stt.logger.Error("fn: StopRecording", "error", err)
|
stt.logger.Error("fn: StopRecording", "error", err)
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
stt.logger.Info("got transcript", "text", string(responseText))
|
return strings.TrimRight(string(responseTextBytes), "\n"), nil
|
||||||
return string(responseText), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (stt *WhisperSTT) writeWavHeader(w io.Writer, dataSize int) {
|
func (stt *WhisperSTT) writeWavHeader(w io.Writer, dataSize int) {
|
||||||
@@ -149,7 +147,7 @@ func (stt *WhisperSTT) microphoneStream(sampleRate int) error {
|
|||||||
stt.logger.Error("reading stream", "error", err)
|
stt.logger.Error("reading stream", "error", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err := binary.Write(stt.RawBuffer, binary.LittleEndian, in); err != nil {
|
if err := binary.Write(stt.AudioBuffer, binary.LittleEndian, in); err != nil {
|
||||||
stt.logger.Error("writing to buffer", "error", err)
|
stt.logger.Error("writing to buffer", "error", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
7
tui.go
7
tui.go
@@ -666,6 +666,7 @@ func init() {
|
|||||||
pages.AddPage(imgPage, imgView, true, true)
|
pages.AddPage(imgPage, imgView, true, true)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
// TODO: move to menu or table
|
||||||
// if event.Key() == tcell.KeyCtrlR && cfg.HFToken != "" {
|
// if event.Key() == tcell.KeyCtrlR && cfg.HFToken != "" {
|
||||||
// // rag load
|
// // rag load
|
||||||
// // menu of the text files from defined rag directory
|
// // menu of the text files from defined rag directory
|
||||||
@@ -685,7 +686,7 @@ func init() {
|
|||||||
// pages.AddPage(RAGPage, chatRAGTable, true, true)
|
// pages.AddPage(RAGPage, chatRAGTable, true, true)
|
||||||
// return nil
|
// return nil
|
||||||
// }
|
// }
|
||||||
if event.Key() == tcell.KeyCtrlR {
|
if event.Key() == tcell.KeyCtrlR && cfg.STT_ENABLED {
|
||||||
defer updateStatusLine()
|
defer updateStatusLine()
|
||||||
if asr.IsRecording() {
|
if asr.IsRecording() {
|
||||||
userSpeech, err := asr.StopRecording()
|
userSpeech, err := asr.StopRecording()
|
||||||
@@ -694,7 +695,9 @@ func init() {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if userSpeech != "" {
|
if userSpeech != "" {
|
||||||
textArea.SetText(userSpeech, true)
|
// append indtead of replacing
|
||||||
|
prevText := textArea.GetText()
|
||||||
|
textArea.SetText(prevText+userSpeech, true)
|
||||||
} else {
|
} else {
|
||||||
logger.Warn("empty user speech")
|
logger.Warn("empty user speech")
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user