Chore: stt reworks [WIP]
This commit is contained in:
2
bot.go
2
bot.go
@@ -606,6 +606,6 @@ func init() {
|
||||
orator = extra.NewOrator(logger, cfg)
|
||||
}
|
||||
if cfg.STT_ENABLED {
|
||||
asr = extra.NewWhisperSTT(logger, cfg.STT_URL, 16000)
|
||||
asr = extra.NewSTT(logger, cfg)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,8 +56,13 @@ type Config struct {
|
||||
TTS_ENABLED bool `toml:"TTS_ENABLED"`
|
||||
TTS_SPEED float32 `toml:"TTS_SPEED"`
|
||||
// STT
|
||||
STT_TYPE string `toml:"STT_TYPE"` // WHISPER_SERVER, WHISPER_BINARY
|
||||
STT_URL string `toml:"STT_URL"`
|
||||
STT_SR int `toml:"STT_SR"`
|
||||
STT_ENABLED bool `toml:"STT_ENABLED"`
|
||||
WhisperBinaryPath string `toml:"WhisperBinaryPath"`
|
||||
WhisperModelPath string `toml:"WhisperModelPath"`
|
||||
STT_LANG string `toml:"STT_LANG"`
|
||||
DBPATH string `toml:"DBPATH"`
|
||||
}
|
||||
|
||||
@@ -93,6 +98,7 @@ func LoadConfigOrDefault(fn string) *Config {
|
||||
config.TTS_ENABLED = false
|
||||
config.TTS_URL = "http://localhost:8880/v1/audio/speech"
|
||||
config.FetchModelNameAPI = "http://localhost:8080/v1/models"
|
||||
config.STT_SR = 16000
|
||||
}
|
||||
config.CurrentAPI = config.ChatAPI
|
||||
config.APIMap = map[string]string{
|
||||
|
||||
30
extra/stt.go
30
extra/stt.go
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"gf-lt/config"
|
||||
"io"
|
||||
"log/slog"
|
||||
"mime/multipart"
|
||||
@@ -27,7 +28,16 @@ type StreamCloser interface {
|
||||
Close() error
|
||||
}
|
||||
|
||||
type WhisperSTT struct {
|
||||
func NewSTT(logger *slog.Logger, cfg *config.Config) STT {
|
||||
switch cfg.STT_TYPE {
|
||||
case "WHISPER_BINARY":
|
||||
case "WHISPER_SERVER":
|
||||
return NewWhisperServer(logger, cfg)
|
||||
}
|
||||
return NewWhisperServer(logger, cfg)
|
||||
}
|
||||
|
||||
type WhisperServer struct {
|
||||
logger *slog.Logger
|
||||
ServerURL string
|
||||
SampleRate int
|
||||
@@ -35,16 +45,16 @@ type WhisperSTT struct {
|
||||
recording bool
|
||||
}
|
||||
|
||||
func NewWhisperSTT(logger *slog.Logger, serverURL string, sampleRate int) *WhisperSTT {
|
||||
return &WhisperSTT{
|
||||
func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
|
||||
return &WhisperServer{
|
||||
logger: logger,
|
||||
ServerURL: serverURL,
|
||||
SampleRate: sampleRate,
|
||||
ServerURL: cfg.STT_URL,
|
||||
SampleRate: cfg.STT_SR,
|
||||
AudioBuffer: new(bytes.Buffer),
|
||||
}
|
||||
}
|
||||
|
||||
func (stt *WhisperSTT) StartRecording() error {
|
||||
func (stt *WhisperServer) StartRecording() error {
|
||||
if err := stt.microphoneStream(stt.SampleRate); err != nil {
|
||||
return fmt.Errorf("failed to init microphone: %w", err)
|
||||
}
|
||||
@@ -52,7 +62,7 @@ func (stt *WhisperSTT) StartRecording() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (stt *WhisperSTT) StopRecording() (string, error) {
|
||||
func (stt *WhisperServer) StopRecording() (string, error) {
|
||||
stt.recording = false
|
||||
// wait loop to finish?
|
||||
if stt.AudioBuffer == nil {
|
||||
@@ -107,7 +117,7 @@ func (stt *WhisperSTT) StopRecording() (string, error) {
|
||||
return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
|
||||
}
|
||||
|
||||
func (stt *WhisperSTT) writeWavHeader(w io.Writer, dataSize int) {
|
||||
func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
|
||||
header := make([]byte, 44)
|
||||
copy(header[0:4], "RIFF")
|
||||
binary.LittleEndian.PutUint32(header[4:8], uint32(36+dataSize))
|
||||
@@ -127,11 +137,11 @@ func (stt *WhisperSTT) writeWavHeader(w io.Writer, dataSize int) {
|
||||
}
|
||||
}
|
||||
|
||||
func (stt *WhisperSTT) IsRecording() bool {
|
||||
func (stt *WhisperServer) IsRecording() bool {
|
||||
return stt.recording
|
||||
}
|
||||
|
||||
func (stt *WhisperSTT) microphoneStream(sampleRate int) error {
|
||||
func (stt *WhisperServer) microphoneStream(sampleRate int) error {
|
||||
if err := portaudio.Initialize(); err != nil {
|
||||
return fmt.Errorf("portaudio init failed: %w", err)
|
||||
}
|
||||
|
||||
31
extra/whisper_binary.go
Normal file
31
extra/whisper_binary.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package extra
|
||||
|
||||
import (
|
||||
"context"
|
||||
"gf-lt/config"
|
||||
"log/slog"
|
||||
"os/exec"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type WhisperBinary struct {
|
||||
whisperPath string
|
||||
modelPath string
|
||||
lang string
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
mu sync.Mutex
|
||||
running bool
|
||||
cmd *exec.Cmd
|
||||
}
|
||||
|
||||
func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
return &WhisperBinary{
|
||||
whisperPath: cfg.WhisperBinaryPath,
|
||||
modelPath: cfg.WhisperModelPath,
|
||||
lang: cfg.STT_LANG,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user