Chore: stt reworks [WIP]

This commit is contained in:
Grail Finder
2025-11-09 11:28:50 +03:00
parent 8036bf0081
commit 4a581f6c12
4 changed files with 61 additions and 14 deletions

2
bot.go
View File

@@ -606,6 +606,6 @@ func init() {
orator = extra.NewOrator(logger, cfg) orator = extra.NewOrator(logger, cfg)
} }
if cfg.STT_ENABLED { if cfg.STT_ENABLED {
asr = extra.NewWhisperSTT(logger, cfg.STT_URL, 16000) asr = extra.NewSTT(logger, cfg)
} }
} }

View File

@@ -56,8 +56,13 @@ type Config struct {
TTS_ENABLED bool `toml:"TTS_ENABLED"` TTS_ENABLED bool `toml:"TTS_ENABLED"`
TTS_SPEED float32 `toml:"TTS_SPEED"` TTS_SPEED float32 `toml:"TTS_SPEED"`
// STT // STT
STT_TYPE string `toml:"STT_TYPE"` // WHISPER_SERVER, WHISPER_BINARY
STT_URL string `toml:"STT_URL"` STT_URL string `toml:"STT_URL"`
STT_SR int `toml:"STT_SR"`
STT_ENABLED bool `toml:"STT_ENABLED"` STT_ENABLED bool `toml:"STT_ENABLED"`
WhisperBinaryPath string `toml:"WhisperBinaryPath"`
WhisperModelPath string `toml:"WhisperModelPath"`
STT_LANG string `toml:"STT_LANG"`
DBPATH string `toml:"DBPATH"` DBPATH string `toml:"DBPATH"`
} }
@@ -93,6 +98,7 @@ func LoadConfigOrDefault(fn string) *Config {
config.TTS_ENABLED = false config.TTS_ENABLED = false
config.TTS_URL = "http://localhost:8880/v1/audio/speech" config.TTS_URL = "http://localhost:8880/v1/audio/speech"
config.FetchModelNameAPI = "http://localhost:8080/v1/models" config.FetchModelNameAPI = "http://localhost:8080/v1/models"
config.STT_SR = 16000
} }
config.CurrentAPI = config.ChatAPI config.CurrentAPI = config.ChatAPI
config.APIMap = map[string]string{ config.APIMap = map[string]string{

View File

@@ -5,6 +5,7 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
"gf-lt/config"
"io" "io"
"log/slog" "log/slog"
"mime/multipart" "mime/multipart"
@@ -27,7 +28,16 @@ type StreamCloser interface {
Close() error Close() error
} }
type WhisperSTT struct { func NewSTT(logger *slog.Logger, cfg *config.Config) STT {
switch cfg.STT_TYPE {
case "WHISPER_BINARY":
case "WHISPER_SERVER":
return NewWhisperServer(logger, cfg)
}
return NewWhisperServer(logger, cfg)
}
type WhisperServer struct {
logger *slog.Logger logger *slog.Logger
ServerURL string ServerURL string
SampleRate int SampleRate int
@@ -35,16 +45,16 @@ type WhisperSTT struct {
recording bool recording bool
} }
func NewWhisperSTT(logger *slog.Logger, serverURL string, sampleRate int) *WhisperSTT { func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
return &WhisperSTT{ return &WhisperServer{
logger: logger, logger: logger,
ServerURL: serverURL, ServerURL: cfg.STT_URL,
SampleRate: sampleRate, SampleRate: cfg.STT_SR,
AudioBuffer: new(bytes.Buffer), AudioBuffer: new(bytes.Buffer),
} }
} }
func (stt *WhisperSTT) StartRecording() error { func (stt *WhisperServer) StartRecording() error {
if err := stt.microphoneStream(stt.SampleRate); err != nil { if err := stt.microphoneStream(stt.SampleRate); err != nil {
return fmt.Errorf("failed to init microphone: %w", err) return fmt.Errorf("failed to init microphone: %w", err)
} }
@@ -52,7 +62,7 @@ func (stt *WhisperSTT) StartRecording() error {
return nil return nil
} }
func (stt *WhisperSTT) StopRecording() (string, error) { func (stt *WhisperServer) StopRecording() (string, error) {
stt.recording = false stt.recording = false
// wait loop to finish? // wait loop to finish?
if stt.AudioBuffer == nil { if stt.AudioBuffer == nil {
@@ -107,7 +117,7 @@ func (stt *WhisperSTT) StopRecording() (string, error) {
return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
} }
func (stt *WhisperSTT) writeWavHeader(w io.Writer, dataSize int) { func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
header := make([]byte, 44) header := make([]byte, 44)
copy(header[0:4], "RIFF") copy(header[0:4], "RIFF")
binary.LittleEndian.PutUint32(header[4:8], uint32(36+dataSize)) binary.LittleEndian.PutUint32(header[4:8], uint32(36+dataSize))
@@ -127,11 +137,11 @@ func (stt *WhisperSTT) writeWavHeader(w io.Writer, dataSize int) {
} }
} }
func (stt *WhisperSTT) IsRecording() bool { func (stt *WhisperServer) IsRecording() bool {
return stt.recording return stt.recording
} }
func (stt *WhisperSTT) microphoneStream(sampleRate int) error { func (stt *WhisperServer) microphoneStream(sampleRate int) error {
if err := portaudio.Initialize(); err != nil { if err := portaudio.Initialize(); err != nil {
return fmt.Errorf("portaudio init failed: %w", err) return fmt.Errorf("portaudio init failed: %w", err)
} }

31
extra/whisper_binary.go Normal file
View File

@@ -0,0 +1,31 @@
package extra
import (
"context"
"gf-lt/config"
"log/slog"
"os/exec"
"sync"
)
type WhisperBinary struct {
whisperPath string
modelPath string
lang string
ctx context.Context
cancel context.CancelFunc
mu sync.Mutex
running bool
cmd *exec.Cmd
}
func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
ctx, cancel := context.WithCancel(context.Background())
return &WhisperBinary{
whisperPath: cfg.WhisperBinaryPath,
modelPath: cfg.WhisperModelPath,
lang: cfg.STT_LANG,
ctx: ctx,
cancel: cancel,
}
}