Chore: stt reworks [WIP]
This commit is contained in:
2
bot.go
2
bot.go
@@ -606,6 +606,6 @@ func init() {
|
|||||||
orator = extra.NewOrator(logger, cfg)
|
orator = extra.NewOrator(logger, cfg)
|
||||||
}
|
}
|
||||||
if cfg.STT_ENABLED {
|
if cfg.STT_ENABLED {
|
||||||
asr = extra.NewWhisperSTT(logger, cfg.STT_URL, 16000)
|
asr = extra.NewSTT(logger, cfg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -56,8 +56,13 @@ type Config struct {
|
|||||||
TTS_ENABLED bool `toml:"TTS_ENABLED"`
|
TTS_ENABLED bool `toml:"TTS_ENABLED"`
|
||||||
TTS_SPEED float32 `toml:"TTS_SPEED"`
|
TTS_SPEED float32 `toml:"TTS_SPEED"`
|
||||||
// STT
|
// STT
|
||||||
|
STT_TYPE string `toml:"STT_TYPE"` // WHISPER_SERVER, WHISPER_BINARY
|
||||||
STT_URL string `toml:"STT_URL"`
|
STT_URL string `toml:"STT_URL"`
|
||||||
|
STT_SR int `toml:"STT_SR"`
|
||||||
STT_ENABLED bool `toml:"STT_ENABLED"`
|
STT_ENABLED bool `toml:"STT_ENABLED"`
|
||||||
|
WhisperBinaryPath string `toml:"WhisperBinaryPath"`
|
||||||
|
WhisperModelPath string `toml:"WhisperModelPath"`
|
||||||
|
STT_LANG string `toml:"STT_LANG"`
|
||||||
DBPATH string `toml:"DBPATH"`
|
DBPATH string `toml:"DBPATH"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -93,6 +98,7 @@ func LoadConfigOrDefault(fn string) *Config {
|
|||||||
config.TTS_ENABLED = false
|
config.TTS_ENABLED = false
|
||||||
config.TTS_URL = "http://localhost:8880/v1/audio/speech"
|
config.TTS_URL = "http://localhost:8880/v1/audio/speech"
|
||||||
config.FetchModelNameAPI = "http://localhost:8080/v1/models"
|
config.FetchModelNameAPI = "http://localhost:8080/v1/models"
|
||||||
|
config.STT_SR = 16000
|
||||||
}
|
}
|
||||||
config.CurrentAPI = config.ChatAPI
|
config.CurrentAPI = config.ChatAPI
|
||||||
config.APIMap = map[string]string{
|
config.APIMap = map[string]string{
|
||||||
|
|||||||
30
extra/stt.go
30
extra/stt.go
@@ -5,6 +5,7 @@ import (
|
|||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"gf-lt/config"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"mime/multipart"
|
"mime/multipart"
|
||||||
@@ -27,7 +28,16 @@ type StreamCloser interface {
|
|||||||
Close() error
|
Close() error
|
||||||
}
|
}
|
||||||
|
|
||||||
type WhisperSTT struct {
|
func NewSTT(logger *slog.Logger, cfg *config.Config) STT {
|
||||||
|
switch cfg.STT_TYPE {
|
||||||
|
case "WHISPER_BINARY":
|
||||||
|
case "WHISPER_SERVER":
|
||||||
|
return NewWhisperServer(logger, cfg)
|
||||||
|
}
|
||||||
|
return NewWhisperServer(logger, cfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
type WhisperServer struct {
|
||||||
logger *slog.Logger
|
logger *slog.Logger
|
||||||
ServerURL string
|
ServerURL string
|
||||||
SampleRate int
|
SampleRate int
|
||||||
@@ -35,16 +45,16 @@ type WhisperSTT struct {
|
|||||||
recording bool
|
recording bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewWhisperSTT(logger *slog.Logger, serverURL string, sampleRate int) *WhisperSTT {
|
func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
|
||||||
return &WhisperSTT{
|
return &WhisperServer{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
ServerURL: serverURL,
|
ServerURL: cfg.STT_URL,
|
||||||
SampleRate: sampleRate,
|
SampleRate: cfg.STT_SR,
|
||||||
AudioBuffer: new(bytes.Buffer),
|
AudioBuffer: new(bytes.Buffer),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (stt *WhisperSTT) StartRecording() error {
|
func (stt *WhisperServer) StartRecording() error {
|
||||||
if err := stt.microphoneStream(stt.SampleRate); err != nil {
|
if err := stt.microphoneStream(stt.SampleRate); err != nil {
|
||||||
return fmt.Errorf("failed to init microphone: %w", err)
|
return fmt.Errorf("failed to init microphone: %w", err)
|
||||||
}
|
}
|
||||||
@@ -52,7 +62,7 @@ func (stt *WhisperSTT) StartRecording() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (stt *WhisperSTT) StopRecording() (string, error) {
|
func (stt *WhisperServer) StopRecording() (string, error) {
|
||||||
stt.recording = false
|
stt.recording = false
|
||||||
// wait loop to finish?
|
// wait loop to finish?
|
||||||
if stt.AudioBuffer == nil {
|
if stt.AudioBuffer == nil {
|
||||||
@@ -107,7 +117,7 @@ func (stt *WhisperSTT) StopRecording() (string, error) {
|
|||||||
return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
|
return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (stt *WhisperSTT) writeWavHeader(w io.Writer, dataSize int) {
|
func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
|
||||||
header := make([]byte, 44)
|
header := make([]byte, 44)
|
||||||
copy(header[0:4], "RIFF")
|
copy(header[0:4], "RIFF")
|
||||||
binary.LittleEndian.PutUint32(header[4:8], uint32(36+dataSize))
|
binary.LittleEndian.PutUint32(header[4:8], uint32(36+dataSize))
|
||||||
@@ -127,11 +137,11 @@ func (stt *WhisperSTT) writeWavHeader(w io.Writer, dataSize int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (stt *WhisperSTT) IsRecording() bool {
|
func (stt *WhisperServer) IsRecording() bool {
|
||||||
return stt.recording
|
return stt.recording
|
||||||
}
|
}
|
||||||
|
|
||||||
func (stt *WhisperSTT) microphoneStream(sampleRate int) error {
|
func (stt *WhisperServer) microphoneStream(sampleRate int) error {
|
||||||
if err := portaudio.Initialize(); err != nil {
|
if err := portaudio.Initialize(); err != nil {
|
||||||
return fmt.Errorf("portaudio init failed: %w", err)
|
return fmt.Errorf("portaudio init failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
31
extra/whisper_binary.go
Normal file
31
extra/whisper_binary.go
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
package extra
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"gf-lt/config"
|
||||||
|
"log/slog"
|
||||||
|
"os/exec"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
type WhisperBinary struct {
|
||||||
|
whisperPath string
|
||||||
|
modelPath string
|
||||||
|
lang string
|
||||||
|
ctx context.Context
|
||||||
|
cancel context.CancelFunc
|
||||||
|
mu sync.Mutex
|
||||||
|
running bool
|
||||||
|
cmd *exec.Cmd
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
return &WhisperBinary{
|
||||||
|
whisperPath: cfg.WhisperBinaryPath,
|
||||||
|
modelPath: cfg.WhisperModelPath,
|
||||||
|
lang: cfg.STT_LANG,
|
||||||
|
ctx: ctx,
|
||||||
|
cancel: cancel,
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user