Compare commits
26 Commits
feat/playw
...
enha/tts-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c5a24b2a3f | ||
|
|
0f0c43f327 | ||
|
|
0e55e44f62 | ||
|
|
014e297ae3 | ||
|
|
5f273681df | ||
|
|
17b68bc21f | ||
|
|
edfd43c52a | ||
|
|
62ec55505c | ||
|
|
f9866bcf5a | ||
|
|
822cc48834 | ||
|
|
4ef0a21511 | ||
|
|
d2caebdb4f | ||
|
|
e1f2a8cd7b | ||
|
|
efc92d884c | ||
|
|
ac8c8bb055 | ||
|
|
c2c107c786 | ||
|
|
c2757653a3 | ||
|
|
4bd6883966 | ||
|
|
7c56e27dbe | ||
|
|
fbc955ca37 | ||
|
|
c65c11bcfb | ||
|
|
04f1fd464b | ||
|
|
6e9c453ee0 | ||
|
|
645b7351a8 | ||
|
|
57088565bd | ||
|
|
4b6769e531 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -3,6 +3,8 @@
|
||||
testlog
|
||||
history/
|
||||
*.db
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
config.toml
|
||||
sysprompts/*
|
||||
!sysprompts/alice_bob_carl.json
|
||||
@@ -15,3 +17,4 @@ gflt
|
||||
chat_exports/*.json
|
||||
ragimport
|
||||
.env
|
||||
onnx/
|
||||
|
||||
101
Makefile
101
Makefile
@@ -1,4 +1,4 @@
|
||||
.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve
|
||||
.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve fetch-onnx install-onnx-deps
|
||||
|
||||
run: setconfig
|
||||
go build -tags extra -o gf-lt && ./gf-lt
|
||||
@@ -30,6 +30,105 @@ lint: ## Run linters. Use make install-linters first.
|
||||
lintall: lint
|
||||
noblanks ./...
|
||||
|
||||
fetch-onnx:
|
||||
mkdir -p onnx/embedgemma && curl -o onnx/embedgemma/config.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/config.json && curl -o onnx/embedgemma/tokenizer.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/tokenizer.json && curl -o onnx/embedgemma/model_q4.onnx -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx && curl -o onnx/embedgemma/model_q4.onnx_data -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx_data?download=true
|
||||
|
||||
install-onnx-deps: ## Install ONNX Runtime with CUDA support (or CPU fallback)
|
||||
@echo "=== ONNX Runtime Installer ===" && \
|
||||
echo "" && \
|
||||
echo "Checking for existing ONNX Runtime..." && \
|
||||
if ldconfig -p 2>/dev/null | grep -q libonnxruntime.so.1; then \
|
||||
echo "ONNX Runtime is already installed:" && \
|
||||
ldconfig -p 2>/dev/null | grep libonnxruntime && \
|
||||
echo "" && \
|
||||
echo "Skipping installation. To reinstall, remove existing libs first:" && \
|
||||
echo " sudo rm -f /usr/local/lib/libonnxruntime*.so*" && \
|
||||
exit 0; \
|
||||
fi && \
|
||||
echo "No ONNX Runtime found. Proceeding with installation..." && \
|
||||
echo "" && \
|
||||
echo "Detecting CUDA version..." && \
|
||||
HAS_CUDA=0 && \
|
||||
if command -v nvidia-smi >/dev/null 2>&1; then \
|
||||
CUDA_INFO=$$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1) && \
|
||||
if [ -n "$$CUDA_INFO" ]; then \
|
||||
echo "Found NVIDIA GPU with driver: $$CUDA_INFO" && \
|
||||
HAS_CUDA=1; \
|
||||
else \
|
||||
echo "NVIDIA driver found but could not detect CUDA version"; \
|
||||
fi; \
|
||||
else \
|
||||
echo "No NVIDIA GPU detected (nvidia-smi not found)"; \
|
||||
fi && \
|
||||
echo "" && \
|
||||
echo "Determining ONNX Runtime version..." && \
|
||||
ARCH=$$(uname -m) && \
|
||||
if [ "$$ARCH" = "x86_64" ]; then \
|
||||
ONNX_ARCH="x64"; \
|
||||
elif [ "$$ARCH" = "aarch64" ] || [ "$$ARCH" = "arm64" ]; then \
|
||||
ONNX_ARCH="aarch64"; \
|
||||
else \
|
||||
echo "Unsupported architecture: $$ARCH" && \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "Detected architecture: $$ARCH (ONNX runtime: $$ONNX_ARCH)" && \
|
||||
if [ "$$HAS_CUDA" = "1" ]; then \
|
||||
echo "Installing ONNX Runtime with CUDA support..."; \
|
||||
ONNX_VERSION="1.24.2"; \
|
||||
else \
|
||||
echo "Installing ONNX Runtime (CPU version)..."; \
|
||||
ONNX_VERSION="1.24.2"; \
|
||||
fi && \
|
||||
FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \
|
||||
URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \
|
||||
echo "Downloading $${URL}..." && \
|
||||
mkdir -p /tmp/onnx-install && \
|
||||
curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \
|
||||
echo "Failed to download ONNX Runtime v$${ONNX_VERSION}. Trying v1.18.0..." && \
|
||||
ONNX_VERSION="1.18.0" && \
|
||||
FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \
|
||||
URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \
|
||||
curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \
|
||||
echo "ERROR: Failed to download ONNX Runtime from GitHub" && \
|
||||
echo "" && \
|
||||
echo "Please install manually:" && \
|
||||
echo " 1. Go to https://github.com/microsoft/onnxruntime/releases" && \
|
||||
echo " 2. Download onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \
|
||||
echo " 3. Extract and copy to /usr/local/lib:" && \
|
||||
echo " tar -xzf onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \
|
||||
echo " sudo cp -r onnxruntime-linux-$${ONNX_ARCH}-VERSION/lib/* /usr/local/lib/" && \
|
||||
echo " sudo ldconfig" && \
|
||||
exit 1; \
|
||||
}; \
|
||||
} && \
|
||||
echo "Extracting..." && \
|
||||
cd /tmp/onnx-install && tar -xzf $${FILENAME} && \
|
||||
echo "Installing to /usr/local/lib..." && \
|
||||
ONNX_DIR=$$(find /tmp/onnx-install -maxdepth 1 -type d -name "onnxruntime-linux-*") && \
|
||||
if [ -d "$${ONNX_DIR}/lib" ]; then \
|
||||
cp -r $${ONNX_DIR}/lib/* /usr/local/lib/ 2>/dev/null || sudo cp -r $${ONNX_DIR}/lib/* /usr/local/lib/; \
|
||||
else \
|
||||
echo "ERROR: Could not find lib directory in extracted archive" && \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "Updating library cache..." && \
|
||||
sudo ldconfig 2>/dev/null || ldconfig && \
|
||||
echo "" && \
|
||||
echo "=== Installation complete! ===" && \
|
||||
echo "" && \
|
||||
echo "Installed libraries:" && \
|
||||
ldconfig -p | grep libonnxruntime || echo "(libraries may require logout/relogin to appear)" && \
|
||||
echo "" && \
|
||||
if [ "$$HAS_CUDA" = "1" ]; then \
|
||||
echo "NOTE: CUDA-enabled ONNX Runtime installed."; \
|
||||
echo "Ensure you also have CUDA libraries installed:"; \
|
||||
echo " - libcudnn, libcublas, libcurand"; \
|
||||
else \
|
||||
echo "NOTE: CPU-only ONNX Runtime installed."; \
|
||||
echo "For GPU support, install CUDA and re-run this script."; \
|
||||
fi && \
|
||||
rm -rf /tmp/onnx-install
|
||||
|
||||
# Whisper STT Setup (in batteries directory)
|
||||
setup-whisper: build-whisper download-whisper-model
|
||||
|
||||
|
||||
18
bot.go
18
bot.go
@@ -1393,12 +1393,16 @@ func updateModelLists() {
|
||||
}
|
||||
}
|
||||
// if llama.cpp started after gf-lt?
|
||||
localModelsMu.Lock()
|
||||
LocalModels, err = fetchLCPModelsWithLoadStatus()
|
||||
localModelsMu.Unlock()
|
||||
ml, err := fetchLCPModelsWithLoadStatus()
|
||||
if err != nil {
|
||||
logger.Warn("failed to fetch llama.cpp models", "error", err)
|
||||
}
|
||||
localModelsMu.Lock()
|
||||
LocalModels = ml
|
||||
localModelsMu.Unlock()
|
||||
for statusLineWidget == nil {
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
}
|
||||
// set already loaded model in llama.cpp
|
||||
if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") {
|
||||
localModelsMu.Lock()
|
||||
@@ -1500,7 +1504,13 @@ func init() {
|
||||
os.Exit(1)
|
||||
return
|
||||
}
|
||||
ragger = rag.New(logger, store, cfg)
|
||||
ragger, err = rag.New(logger, store, cfg)
|
||||
if err != nil {
|
||||
logger.Error("failed to create RAG", "error", err)
|
||||
}
|
||||
if ragger != nil && ragger.FallbackMessage() != "" && app != nil {
|
||||
showToast("RAG", "ONNX unavailable, using API: "+ragger.FallbackMessage())
|
||||
}
|
||||
// https://github.com/coreydaley/ggerganov-llama.cpp/blob/master/examples/server/README.md
|
||||
// load all chats in memory
|
||||
if _, err := loadHistoryChats(); err != nil {
|
||||
|
||||
@@ -13,6 +13,9 @@ OpenRouterChatAPI = "https://openrouter.ai/api/v1/chat/completions"
|
||||
# embeddings
|
||||
EmbedURL = "http://localhost:8082/v1/embeddings"
|
||||
HFToken = ""
|
||||
EmbedModelPath = "onnx/embedgemma/model_q4.onnx"
|
||||
EmbedTokenizerPath = "onnx/embedgemma/tokenizer.json"
|
||||
EmbedDims = 768
|
||||
#
|
||||
ShowSys = true
|
||||
LogFile = "log.txt"
|
||||
@@ -24,9 +27,9 @@ ChunkLimit = 100000
|
||||
AutoScrollEnabled = true
|
||||
AutoCleanToolCallsFromCtx = false
|
||||
# rag settings
|
||||
RAGEnabled = false
|
||||
RAGBatchSize = 1
|
||||
RAGWordLimit = 80
|
||||
RAGOverlapWords = 16
|
||||
RAGDir = "ragimport"
|
||||
# extra tts
|
||||
TTS_ENABLED = false
|
||||
|
||||
@@ -34,13 +34,16 @@ type Config struct {
|
||||
ImagePreview bool `toml:"ImagePreview"`
|
||||
EnableMouse bool `toml:"EnableMouse"`
|
||||
// embeddings
|
||||
EmbedURL string `toml:"EmbedURL"`
|
||||
HFToken string `toml:"HFToken"`
|
||||
EmbedURL string `toml:"EmbedURL"`
|
||||
HFToken string `toml:"HFToken"`
|
||||
EmbedModelPath string `toml:"EmbedModelPath"`
|
||||
EmbedTokenizerPath string `toml:"EmbedTokenizerPath"`
|
||||
EmbedDims int `toml:"EmbedDims"`
|
||||
// rag settings
|
||||
RAGEnabled bool `toml:"RAGEnabled"`
|
||||
RAGDir string `toml:"RAGDir"`
|
||||
RAGBatchSize int `toml:"RAGBatchSize"`
|
||||
RAGWordLimit uint32 `toml:"RAGWordLimit"`
|
||||
RAGDir string `toml:"RAGDir"`
|
||||
RAGBatchSize int `toml:"RAGBatchSize"`
|
||||
RAGWordLimit uint32 `toml:"RAGWordLimit"`
|
||||
RAGOverlapWords uint32 `toml:"RAGOverlapWords"`
|
||||
// deepseek
|
||||
DeepSeekChatAPI string `toml:"DeepSeekChatAPI"`
|
||||
DeepSeekCompletionAPI string `toml:"DeepSeekCompletionAPI"`
|
||||
|
||||
@@ -71,9 +71,6 @@ This document explains how to set up and configure the application using the `co
|
||||
#### EmbedURL (`"http://localhost:8082/v1/embeddings"`)
|
||||
- The endpoint for embedding API, used for RAG (Retrieval Augmented Generation) functionality.
|
||||
|
||||
#### RAGEnabled (`false`)
|
||||
- Enable or disable RAG functionality for enhanced context retrieval.
|
||||
|
||||
#### RAGBatchSize (`1`)
|
||||
- Number of documents to process in each RAG batch.
|
||||
|
||||
|
||||
218
extra/google_tts.go
Normal file
218
extra/google_tts.go
Normal file
@@ -0,0 +1,218 @@
|
||||
//go:build extra
|
||||
// +build extra
|
||||
|
||||
package extra
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gf-lt/models"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
google_translate_tts "github.com/GrailFinder/google-translate-tts"
|
||||
"github.com/neurosnap/sentences/english"
|
||||
)
|
||||
|
||||
type GoogleTranslateOrator struct {
|
||||
logger *slog.Logger
|
||||
mu sync.Mutex
|
||||
speech *google_translate_tts.Speech
|
||||
// fields for playback control
|
||||
cmd *exec.Cmd
|
||||
cmdMu sync.Mutex
|
||||
stopCh chan struct{}
|
||||
// text buffer and interrupt flag
|
||||
textBuffer strings.Builder
|
||||
interrupt bool
|
||||
Speed float32
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) stoproutine() {
|
||||
for {
|
||||
<-TTSDoneChan
|
||||
o.logger.Debug("orator got done signal")
|
||||
o.Stop()
|
||||
for len(TTSTextChan) > 0 {
|
||||
<-TTSTextChan
|
||||
}
|
||||
o.mu.Lock()
|
||||
o.textBuffer.Reset()
|
||||
o.interrupt = true
|
||||
o.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) readroutine() {
|
||||
tokenizer, _ := english.NewSentenceTokenizer(nil)
|
||||
for {
|
||||
select {
|
||||
case chunk := <-TTSTextChan:
|
||||
o.mu.Lock()
|
||||
o.interrupt = false
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
if err != nil {
|
||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
text := o.textBuffer.String()
|
||||
sentences := tokenizer.Tokenize(text)
|
||||
o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
|
||||
if len(sentences) <= 1 {
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
completeSentences := sentences[:len(sentences)-1]
|
||||
remaining := sentences[len(sentences)-1].Text
|
||||
o.textBuffer.Reset()
|
||||
o.textBuffer.WriteString(remaining)
|
||||
o.mu.Unlock()
|
||||
for _, sentence := range completeSentences {
|
||||
o.mu.Lock()
|
||||
interrupted := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupted {
|
||||
return
|
||||
}
|
||||
cleanedText := models.CleanText(sentence.Text)
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
|
||||
if err := o.Speak(cleanedText); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
|
||||
}
|
||||
}
|
||||
case <-TTSFlushChan:
|
||||
o.logger.Debug("got flushchan signal start")
|
||||
// lln is done get the whole message out
|
||||
if len(TTSTextChan) > 0 { // otherwise might get stuck
|
||||
for chunk := range TTSTextChan {
|
||||
o.mu.Lock()
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
o.mu.Unlock()
|
||||
if err != nil {
|
||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||
continue
|
||||
}
|
||||
if len(TTSTextChan) == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
o.mu.Lock()
|
||||
remaining := o.textBuffer.String()
|
||||
remaining = models.CleanText(remaining)
|
||||
o.textBuffer.Reset()
|
||||
o.mu.Unlock()
|
||||
if remaining == "" {
|
||||
continue
|
||||
}
|
||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||
sentencesRem := tokenizer.Tokenize(remaining)
|
||||
for _, rs := range sentencesRem { // to avoid dumping large volume of text
|
||||
o.mu.Lock()
|
||||
interrupt := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupt {
|
||||
break
|
||||
}
|
||||
if err := o.Speak(rs.Text); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", rs.Text, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) GetLogger() *slog.Logger {
|
||||
return o.logger
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) Speak(text string) error {
|
||||
o.logger.Debug("fn: Speak is called", "text-len", len(text))
|
||||
// Generate MP3 data directly as an io.Reader
|
||||
reader, err := o.speech.GenerateSpeech(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generate speech failed: %w", err)
|
||||
}
|
||||
// Wrap in io.NopCloser since GenerateSpeech returns io.Reader (no close needed)
|
||||
body := io.NopCloser(reader)
|
||||
defer body.Close()
|
||||
// Build ffplay command with optional speed filter
|
||||
args := []string{"-nodisp", "-autoexit"}
|
||||
if o.Speed > 0.1 && o.Speed != 1.0 {
|
||||
// atempo range is 0.5 to 2.0; you might clamp it here
|
||||
args = append(args, "-af", fmt.Sprintf("atempo=%.2f", o.Speed))
|
||||
}
|
||||
args = append(args, "-i", "pipe:0")
|
||||
cmd := exec.Command("ffplay", args...)
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get stdin pipe: %w", err)
|
||||
}
|
||||
o.cmdMu.Lock()
|
||||
o.cmd = cmd
|
||||
o.stopCh = make(chan struct{})
|
||||
o.cmdMu.Unlock()
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start ffplay: %w", err)
|
||||
}
|
||||
copyErr := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := io.Copy(stdin, body)
|
||||
stdin.Close()
|
||||
copyErr <- err
|
||||
}()
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- cmd.Wait()
|
||||
}()
|
||||
select {
|
||||
case <-o.stopCh:
|
||||
if o.cmd != nil && o.cmd.Process != nil {
|
||||
o.cmd.Process.Kill()
|
||||
}
|
||||
<-done
|
||||
return nil
|
||||
case copyErrVal := <-copyErr:
|
||||
if copyErrVal != nil {
|
||||
if o.cmd != nil && o.cmd.Process != nil {
|
||||
o.cmd.Process.Kill()
|
||||
}
|
||||
<-done
|
||||
return copyErrVal
|
||||
}
|
||||
return <-done
|
||||
case err := <-done:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) Stop() {
|
||||
o.cmdMu.Lock()
|
||||
defer o.cmdMu.Unlock()
|
||||
// Signal any running Speak to stop
|
||||
if o.stopCh != nil {
|
||||
select {
|
||||
case <-o.stopCh: // already closed
|
||||
default:
|
||||
close(o.stopCh)
|
||||
}
|
||||
o.stopCh = nil
|
||||
}
|
||||
// Kill the external player process if it's still running
|
||||
if o.cmd != nil && o.cmd.Process != nil {
|
||||
o.cmd.Process.Kill()
|
||||
o.cmd.Wait() // clean up zombie process
|
||||
o.cmd = nil
|
||||
}
|
||||
// Also reset text buffer and interrupt flag (with o.mu)
|
||||
o.mu.Lock()
|
||||
o.textBuffer.Reset()
|
||||
o.interrupt = true
|
||||
o.mu.Unlock()
|
||||
}
|
||||
259
extra/kokoro.go
Normal file
259
extra/kokoro.go
Normal file
@@ -0,0 +1,259 @@
|
||||
//go:build extra
|
||||
// +build extra
|
||||
|
||||
package extra
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"gf-lt/models"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/neurosnap/sentences/english"
|
||||
)
|
||||
|
||||
type KokoroOrator struct {
|
||||
logger *slog.Logger
|
||||
mu sync.Mutex
|
||||
URL string
|
||||
Format models.AudioFormat
|
||||
Stream bool
|
||||
Speed float32
|
||||
Language string
|
||||
Voice string
|
||||
// fields for playback control
|
||||
cmd *exec.Cmd
|
||||
cmdMu sync.Mutex
|
||||
stopCh chan struct{}
|
||||
// textBuffer, interrupt etc. remain the same
|
||||
textBuffer strings.Builder
|
||||
interrupt bool
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) GetLogger() *slog.Logger {
|
||||
return o.logger
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) Speak(text string) error {
|
||||
o.logger.Debug("fn: Speak is called", "text-len", len(text))
|
||||
body, err := o.requestSound(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
defer body.Close()
|
||||
cmd := exec.Command("ffplay", "-nodisp", "-autoexit", "-i", "pipe:0")
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get stdin pipe: %w", err)
|
||||
}
|
||||
o.cmdMu.Lock()
|
||||
o.cmd = cmd
|
||||
o.stopCh = make(chan struct{})
|
||||
o.cmdMu.Unlock()
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start ffplay: %w", err)
|
||||
}
|
||||
// Copy audio in background
|
||||
copyErr := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := io.Copy(stdin, body)
|
||||
stdin.Close()
|
||||
copyErr <- err
|
||||
}()
|
||||
// Wait for player in background
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- cmd.Wait()
|
||||
}()
|
||||
// Wait for BOTH copy and player, but ensure we block until done
|
||||
select {
|
||||
case <-o.stopCh:
|
||||
// Stop requested: kill player and wait for it to exit
|
||||
if o.cmd != nil && o.cmd.Process != nil {
|
||||
o.cmd.Process.Kill()
|
||||
}
|
||||
<-done // Wait for process to actually exit
|
||||
return nil
|
||||
case copyErrVal := <-copyErr:
|
||||
if copyErrVal != nil {
|
||||
// Copy failed: kill player and wait
|
||||
if o.cmd != nil && o.cmd.Process != nil {
|
||||
o.cmd.Process.Kill()
|
||||
}
|
||||
<-done
|
||||
return copyErrVal
|
||||
}
|
||||
// Copy succeeded, now wait for playback to complete
|
||||
return <-done
|
||||
case err := <-done:
|
||||
// Playback finished normally (copy must have succeeded or player would have exited early)
|
||||
return err
|
||||
}
|
||||
}
|
||||
func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
|
||||
if o.URL == "" {
|
||||
return nil, fmt.Errorf("TTS URL is empty")
|
||||
}
|
||||
payload := map[string]interface{}{
|
||||
"input": text,
|
||||
"voice": o.Voice,
|
||||
"response_format": o.Format,
|
||||
"download_format": o.Format,
|
||||
"stream": o.Stream,
|
||||
"speed": o.Speed,
|
||||
// "return_download_link": true,
|
||||
"lang_code": o.Language,
|
||||
}
|
||||
payloadBytes, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal payload: %w", err)
|
||||
}
|
||||
req, err := http.NewRequest("POST", o.URL, bytes.NewBuffer(payloadBytes)) //nolint:noctx
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("accept", "application/json")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer resp.Body.Close()
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) stoproutine() {
|
||||
for {
|
||||
<-TTSDoneChan
|
||||
o.logger.Debug("orator got done signal")
|
||||
// 1. Stop any ongoing playback (kills external player, closes stopCh)
|
||||
o.Stop()
|
||||
// 2. Drain any pending text chunks
|
||||
for len(TTSTextChan) > 0 {
|
||||
<-TTSTextChan
|
||||
}
|
||||
// 3. Reset internal state
|
||||
o.mu.Lock()
|
||||
o.textBuffer.Reset()
|
||||
o.interrupt = true
|
||||
o.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) Stop() {
|
||||
o.cmdMu.Lock()
|
||||
defer o.cmdMu.Unlock()
|
||||
// Signal any running Speak to stop
|
||||
if o.stopCh != nil {
|
||||
select {
|
||||
case <-o.stopCh: // already closed
|
||||
default:
|
||||
close(o.stopCh)
|
||||
}
|
||||
o.stopCh = nil
|
||||
}
|
||||
// Kill the external player process if it's still running
|
||||
if o.cmd != nil && o.cmd.Process != nil {
|
||||
o.cmd.Process.Kill()
|
||||
o.cmd.Wait() // clean up zombie process
|
||||
o.cmd = nil
|
||||
}
|
||||
// Also reset text buffer and interrupt flag (with o.mu)
|
||||
o.mu.Lock()
|
||||
o.textBuffer.Reset()
|
||||
o.interrupt = true
|
||||
o.mu.Unlock()
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) readroutine() {
|
||||
tokenizer, _ := english.NewSentenceTokenizer(nil)
|
||||
for {
|
||||
select {
|
||||
case chunk := <-TTSTextChan:
|
||||
o.mu.Lock()
|
||||
o.interrupt = false
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
if err != nil {
|
||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
text := o.textBuffer.String()
|
||||
sentences := tokenizer.Tokenize(text)
|
||||
o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
|
||||
if len(sentences) <= 1 {
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
completeSentences := sentences[:len(sentences)-1]
|
||||
remaining := sentences[len(sentences)-1].Text
|
||||
o.textBuffer.Reset()
|
||||
o.textBuffer.WriteString(remaining)
|
||||
o.mu.Unlock()
|
||||
for _, sentence := range completeSentences {
|
||||
o.mu.Lock()
|
||||
interrupted := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupted {
|
||||
return
|
||||
}
|
||||
cleanedText := models.CleanText(sentence.Text)
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
|
||||
if err := o.Speak(cleanedText); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
|
||||
}
|
||||
}
|
||||
case <-TTSFlushChan:
|
||||
o.logger.Debug("got flushchan signal start")
|
||||
// lln is done get the whole message out
|
||||
if len(TTSTextChan) > 0 { // otherwise might get stuck
|
||||
for chunk := range TTSTextChan {
|
||||
o.mu.Lock()
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
o.mu.Unlock()
|
||||
if err != nil {
|
||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||
continue
|
||||
}
|
||||
if len(TTSTextChan) == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// flush remaining text
|
||||
o.mu.Lock()
|
||||
remaining := o.textBuffer.String()
|
||||
remaining = models.CleanText(remaining)
|
||||
o.textBuffer.Reset()
|
||||
o.mu.Unlock()
|
||||
if remaining == "" {
|
||||
continue
|
||||
}
|
||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||
sentencesRem := tokenizer.Tokenize(remaining)
|
||||
for _, rs := range sentencesRem { // to avoid dumping large volume of text
|
||||
o.mu.Lock()
|
||||
interrupt := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupt {
|
||||
break
|
||||
}
|
||||
if err := o.Speak(rs.Text); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", rs, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
406
extra/tts.go
406
extra/tts.go
@@ -4,25 +4,13 @@
|
||||
package extra
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"gf-lt/config"
|
||||
"gf-lt/models"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
google_translate_tts "github.com/GrailFinder/google-translate-tts"
|
||||
"github.com/GrailFinder/google-translate-tts/handlers"
|
||||
"github.com/gopxl/beep/v2"
|
||||
"github.com/gopxl/beep/v2/mp3"
|
||||
"github.com/gopxl/beep/v2/speaker"
|
||||
"github.com/neurosnap/sentences/english"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -39,142 +27,6 @@ type Orator interface {
|
||||
GetLogger() *slog.Logger
|
||||
}
|
||||
|
||||
// impl https://github.com/remsky/Kokoro-FastAPI
|
||||
type KokoroOrator struct {
|
||||
logger *slog.Logger
|
||||
mu sync.Mutex
|
||||
URL string
|
||||
Format models.AudioFormat
|
||||
Stream bool
|
||||
Speed float32
|
||||
Language string
|
||||
Voice string
|
||||
currentStream *beep.Ctrl // Added for playback control
|
||||
currentDone chan bool
|
||||
textBuffer strings.Builder
|
||||
interrupt bool
|
||||
// textBuffer bytes.Buffer
|
||||
}
|
||||
|
||||
// Google Translate TTS implementation
|
||||
type GoogleTranslateOrator struct {
|
||||
logger *slog.Logger
|
||||
mu sync.Mutex
|
||||
speech *google_translate_tts.Speech
|
||||
currentStream *beep.Ctrl
|
||||
currentDone chan bool
|
||||
textBuffer strings.Builder
|
||||
interrupt bool
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) stoproutine() {
|
||||
for {
|
||||
<-TTSDoneChan
|
||||
o.logger.Debug("orator got done signal")
|
||||
o.Stop()
|
||||
// drain the channel
|
||||
for len(TTSTextChan) > 0 {
|
||||
<-TTSTextChan
|
||||
}
|
||||
o.mu.Lock()
|
||||
o.textBuffer.Reset()
|
||||
if o.currentDone != nil {
|
||||
select {
|
||||
case o.currentDone <- true:
|
||||
default:
|
||||
// Channel might be closed, ignore
|
||||
}
|
||||
}
|
||||
o.interrupt = true
|
||||
o.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) readroutine() {
|
||||
tokenizer, _ := english.NewSentenceTokenizer(nil)
|
||||
for {
|
||||
select {
|
||||
case chunk := <-TTSTextChan:
|
||||
o.mu.Lock()
|
||||
o.interrupt = false
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
if err != nil {
|
||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
text := o.textBuffer.String()
|
||||
sentences := tokenizer.Tokenize(text)
|
||||
o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
|
||||
if len(sentences) <= 1 {
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
completeSentences := sentences[:len(sentences)-1]
|
||||
remaining := sentences[len(sentences)-1].Text
|
||||
o.textBuffer.Reset()
|
||||
o.textBuffer.WriteString(remaining)
|
||||
o.mu.Unlock()
|
||||
|
||||
for _, sentence := range completeSentences {
|
||||
o.mu.Lock()
|
||||
interrupted := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupted {
|
||||
return
|
||||
}
|
||||
cleanedText := models.CleanText(sentence.Text)
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
|
||||
if err := o.Speak(cleanedText); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
|
||||
}
|
||||
}
|
||||
case <-TTSFlushChan:
|
||||
o.logger.Debug("got flushchan signal start")
|
||||
// lln is done get the whole message out
|
||||
if len(TTSTextChan) > 0 { // otherwise might get stuck
|
||||
for chunk := range TTSTextChan {
|
||||
o.mu.Lock()
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
o.mu.Unlock()
|
||||
if err != nil {
|
||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||
continue
|
||||
}
|
||||
if len(TTSTextChan) == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// flush remaining text
|
||||
o.mu.Lock()
|
||||
remaining := o.textBuffer.String()
|
||||
remaining = models.CleanText(remaining)
|
||||
o.textBuffer.Reset()
|
||||
o.mu.Unlock()
|
||||
if remaining == "" {
|
||||
continue
|
||||
}
|
||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||
sentencesRem := tokenizer.Tokenize(remaining)
|
||||
for _, rs := range sentencesRem { // to avoid dumping large volume of text
|
||||
o.mu.Lock()
|
||||
interrupt := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupt {
|
||||
break
|
||||
}
|
||||
if err := o.Speak(rs.Text); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", rs, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
|
||||
provider := cfg.TTS_PROVIDER
|
||||
if provider == "" {
|
||||
@@ -204,270 +56,14 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
|
||||
Language: language,
|
||||
Proxy: "", // Proxy not supported
|
||||
Speed: cfg.TTS_SPEED,
|
||||
Handler: &handlers.Beep{},
|
||||
}
|
||||
orator := &GoogleTranslateOrator{
|
||||
logger: log,
|
||||
speech: speech,
|
||||
Speed: cfg.TTS_SPEED,
|
||||
}
|
||||
go orator.readroutine()
|
||||
go orator.stoproutine()
|
||||
return orator
|
||||
}
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) GetLogger() *slog.Logger {
|
||||
return o.logger
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
|
||||
if o.URL == "" {
|
||||
return nil, fmt.Errorf("TTS URL is empty")
|
||||
}
|
||||
payload := map[string]interface{}{
|
||||
"input": text,
|
||||
"voice": o.Voice,
|
||||
"response_format": o.Format,
|
||||
"download_format": o.Format,
|
||||
"stream": o.Stream,
|
||||
"speed": o.Speed,
|
||||
// "return_download_link": true,
|
||||
"lang_code": o.Language,
|
||||
}
|
||||
payloadBytes, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal payload: %w", err)
|
||||
}
|
||||
req, err := http.NewRequest("POST", o.URL, bytes.NewBuffer(payloadBytes)) //nolint:noctx
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("accept", "application/json")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer resp.Body.Close()
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) Speak(text string) error {
|
||||
o.logger.Debug("fn: Speak is called", "text-len", len(text))
|
||||
body, err := o.requestSound(text)
|
||||
if err != nil {
|
||||
o.logger.Error("request failed", "error", err)
|
||||
return fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
defer body.Close()
|
||||
// Decode the mp3 audio from response body
|
||||
streamer, format, err := mp3.Decode(body)
|
||||
if err != nil {
|
||||
o.logger.Error("mp3 decode failed", "error", err)
|
||||
return fmt.Errorf("mp3 decode failed: %w", err)
|
||||
}
|
||||
defer streamer.Close()
|
||||
// here it spams with errors that speaker cannot be initialized more than once, but how would we deal with many audio records then?
|
||||
if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
|
||||
o.logger.Debug("failed to init speaker", "error", err)
|
||||
}
|
||||
done := make(chan bool)
|
||||
o.mu.Lock()
|
||||
o.currentDone = done
|
||||
o.currentStream = &beep.Ctrl{Streamer: beep.Seq(streamer, beep.Callback(func() {
|
||||
o.mu.Lock()
|
||||
close(done)
|
||||
o.currentStream = nil
|
||||
o.currentDone = nil
|
||||
o.mu.Unlock()
|
||||
})), Paused: false}
|
||||
o.mu.Unlock()
|
||||
speaker.Play(o.currentStream)
|
||||
<-done
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *KokoroOrator) Stop() {
|
||||
// speaker.Clear()
|
||||
o.logger.Debug("attempted to stop orator", "orator", o)
|
||||
speaker.Lock()
|
||||
defer speaker.Unlock()
|
||||
o.mu.Lock()
|
||||
defer o.mu.Unlock()
|
||||
if o.currentStream != nil {
|
||||
// o.currentStream.Paused = true
|
||||
o.currentStream.Streamer = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) stoproutine() {
|
||||
for {
|
||||
<-TTSDoneChan
|
||||
o.logger.Debug("orator got done signal")
|
||||
o.Stop()
|
||||
// drain the channel
|
||||
for len(TTSTextChan) > 0 {
|
||||
<-TTSTextChan
|
||||
}
|
||||
o.mu.Lock()
|
||||
o.textBuffer.Reset()
|
||||
if o.currentDone != nil {
|
||||
select {
|
||||
case o.currentDone <- true:
|
||||
default:
|
||||
// Channel might be closed, ignore
|
||||
}
|
||||
}
|
||||
o.interrupt = true
|
||||
o.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) readroutine() {
|
||||
tokenizer, _ := english.NewSentenceTokenizer(nil)
|
||||
for {
|
||||
select {
|
||||
case chunk := <-TTSTextChan:
|
||||
o.mu.Lock()
|
||||
o.interrupt = false
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
if err != nil {
|
||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
text := o.textBuffer.String()
|
||||
sentences := tokenizer.Tokenize(text)
|
||||
o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
|
||||
if len(sentences) <= 1 {
|
||||
o.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
completeSentences := sentences[:len(sentences)-1]
|
||||
remaining := sentences[len(sentences)-1].Text
|
||||
o.textBuffer.Reset()
|
||||
o.textBuffer.WriteString(remaining)
|
||||
o.mu.Unlock()
|
||||
|
||||
for _, sentence := range completeSentences {
|
||||
o.mu.Lock()
|
||||
interrupted := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupted {
|
||||
return
|
||||
}
|
||||
cleanedText := models.CleanText(sentence.Text)
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
|
||||
if err := o.Speak(cleanedText); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
|
||||
}
|
||||
}
|
||||
case <-TTSFlushChan:
|
||||
o.logger.Debug("got flushchan signal start")
|
||||
// lln is done get the whole message out
|
||||
if len(TTSTextChan) > 0 { // otherwise might get stuck
|
||||
for chunk := range TTSTextChan {
|
||||
o.mu.Lock()
|
||||
_, err := o.textBuffer.WriteString(chunk)
|
||||
o.mu.Unlock()
|
||||
if err != nil {
|
||||
o.logger.Warn("failed to write to stringbuilder", "error", err)
|
||||
continue
|
||||
}
|
||||
if len(TTSTextChan) == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
o.mu.Lock()
|
||||
remaining := o.textBuffer.String()
|
||||
remaining = models.CleanText(remaining)
|
||||
o.textBuffer.Reset()
|
||||
o.mu.Unlock()
|
||||
if remaining == "" {
|
||||
continue
|
||||
}
|
||||
o.logger.Debug("calling Speak with remainder", "rem", remaining)
|
||||
sentencesRem := tokenizer.Tokenize(remaining)
|
||||
for _, rs := range sentencesRem { // to avoid dumping large volume of text
|
||||
o.mu.Lock()
|
||||
interrupt := o.interrupt
|
||||
o.mu.Unlock()
|
||||
if interrupt {
|
||||
break
|
||||
}
|
||||
if err := o.Speak(rs.Text); err != nil {
|
||||
o.logger.Error("tts failed", "sentence", rs.Text, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) GetLogger() *slog.Logger {
|
||||
return o.logger
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) Speak(text string) error {
|
||||
o.logger.Debug("fn: Speak is called", "text-len", len(text))
|
||||
// Generate MP3 data using google-translate-tts
|
||||
reader, err := o.speech.GenerateSpeech(text)
|
||||
if err != nil {
|
||||
o.logger.Error("generate speech failed", "error", err)
|
||||
return fmt.Errorf("generate speech failed: %w", err)
|
||||
}
|
||||
// Decode the mp3 audio from reader (wrap with NopCloser for io.ReadCloser)
|
||||
streamer, format, err := mp3.Decode(io.NopCloser(reader))
|
||||
if err != nil {
|
||||
o.logger.Error("mp3 decode failed", "error", err)
|
||||
return fmt.Errorf("mp3 decode failed: %w", err)
|
||||
}
|
||||
defer streamer.Close()
|
||||
playbackStreamer := beep.Streamer(streamer)
|
||||
speed := o.speech.Speed
|
||||
if speed <= 0 {
|
||||
speed = 1.0
|
||||
}
|
||||
if speed != 1.0 {
|
||||
playbackStreamer = beep.ResampleRatio(3, float64(speed), streamer)
|
||||
}
|
||||
// Initialize speaker with the format's sample rate
|
||||
if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
|
||||
o.logger.Debug("failed to init speaker", "error", err)
|
||||
}
|
||||
done := make(chan bool)
|
||||
o.mu.Lock()
|
||||
o.currentDone = done
|
||||
o.currentStream = &beep.Ctrl{Streamer: beep.Seq(playbackStreamer, beep.Callback(func() {
|
||||
o.mu.Lock()
|
||||
close(done)
|
||||
o.currentStream = nil
|
||||
o.currentDone = nil
|
||||
o.mu.Unlock()
|
||||
})), Paused: false}
|
||||
o.mu.Unlock()
|
||||
speaker.Play(o.currentStream)
|
||||
<-done // wait for playback to complete
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *GoogleTranslateOrator) Stop() {
|
||||
o.logger.Debug("attempted to stop google translate orator")
|
||||
speaker.Lock()
|
||||
defer speaker.Unlock()
|
||||
o.mu.Lock()
|
||||
defer o.mu.Unlock()
|
||||
if o.currentStream != nil {
|
||||
o.currentStream.Streamer = nil
|
||||
}
|
||||
// Also stop the speech handler if possible
|
||||
if o.speech != nil {
|
||||
_ = o.speech.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
16
go.mod
16
go.mod
@@ -4,39 +4,41 @@ go 1.25.1
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.5.0
|
||||
github.com/GrailFinder/google-translate-tts v0.1.3
|
||||
github.com/GrailFinder/google-translate-tts v0.1.4
|
||||
github.com/GrailFinder/searchagent v0.2.0
|
||||
github.com/PuerkitoBio/goquery v1.11.0
|
||||
github.com/deckarep/golang-set/v2 v2.8.0
|
||||
github.com/gdamore/tcell/v2 v2.13.2
|
||||
github.com/glebarez/go-sqlite v1.22.0
|
||||
github.com/gopxl/beep/v2 v2.1.1
|
||||
github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728
|
||||
github.com/neurosnap/sentences v1.1.2
|
||||
github.com/playwright-community/playwright-go v0.5700.1
|
||||
github.com/rivo/tview v0.42.0
|
||||
github.com/sugarme/tokenizer v0.3.0
|
||||
github.com/yalue/onnxruntime_go v1.27.0
|
||||
github.com/yuin/goldmark v1.4.13
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||
github.com/deckarep/golang-set/v2 v2.8.0 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/ebitengine/oto/v3 v3.4.0 // indirect
|
||||
github.com/ebitengine/purego v0.9.1 // indirect
|
||||
github.com/emirpasic/gods v1.18.1 // indirect
|
||||
github.com/gdamore/encoding v1.0.1 // indirect
|
||||
github.com/go-jose/go-jose/v3 v3.0.4 // indirect
|
||||
github.com/go-stack/stack v1.8.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/hajimehoshi/go-mp3 v0.3.4 // indirect
|
||||
github.com/hajimehoshi/oto/v2 v2.3.1 // indirect
|
||||
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/schollz/progressbar/v2 v2.15.0 // indirect
|
||||
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c // indirect
|
||||
golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect
|
||||
golang.org/x/net v0.48.0 // indirect
|
||||
golang.org/x/sys v0.39.0 // indirect
|
||||
|
||||
28
go.sum
28
go.sum
@@ -2,8 +2,8 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
|
||||
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
||||
github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=
|
||||
github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
|
||||
github.com/GrailFinder/google-translate-tts v0.1.3 h1:Mww9tNzTWjjSh+OCbTPl/+21oMPKcUecXZfU7nTB/lA=
|
||||
github.com/GrailFinder/google-translate-tts v0.1.3/go.mod h1:YIOLKR7sObazdUCrSex3u9OVBovU55eYgWa25vsQJ18=
|
||||
github.com/GrailFinder/google-translate-tts v0.1.4 h1:NJoPZUGfBrmouQMN19MUcNPNUx4tmf4a8OZRME4E4Mg=
|
||||
github.com/GrailFinder/google-translate-tts v0.1.4/go.mod h1:YIOLKR7sObazdUCrSex3u9OVBovU55eYgWa25vsQJ18=
|
||||
github.com/GrailFinder/searchagent v0.2.0 h1:U2GVjLh/9xZt0xX9OcYk9Q2fMkyzyTiADPUmUisRdtQ=
|
||||
github.com/GrailFinder/searchagent v0.2.0/go.mod h1:d66tn5+22LI8IGJREUsRBT60P0sFdgQgvQRqyvgItrs=
|
||||
github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
|
||||
@@ -17,10 +17,8 @@ github.com/deckarep/golang-set/v2 v2.8.0 h1:swm0rlPCmdWn9mESxKOjWk8hXSqoxOp+Zlfu
|
||||
github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/bQ=
|
||||
github.com/ebitengine/oto/v3 v3.4.0/go.mod h1:IOleLVD0m+CMak3mRVwsYY8vTctQgOM0iiL6S7Ar7eI=
|
||||
github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
|
||||
github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
||||
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
|
||||
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
|
||||
github.com/gdamore/encoding v1.0.1 h1:YzKZckdBL6jVt2Gc+5p82qhrGiqMdG/eNs6Wy0u3Uhw=
|
||||
github.com/gdamore/encoding v1.0.1/go.mod h1:0Z0cMFinngz9kS1QfMjCP8TY7em3bZYeeklsSDPivEo=
|
||||
github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx2g=
|
||||
@@ -39,13 +37,10 @@ github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17k
|
||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gopxl/beep/v2 v2.1.1 h1:6FYIYMm2qPAdWkjX+7xwKrViS1x0Po5kDMdRkq8NVbU=
|
||||
github.com/gopxl/beep/v2 v2.1.1/go.mod h1:ZAm9TGQ9lvpoiFLd4zf5B1IuyxZhgRACMId1XJbaW0E=
|
||||
github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b h1:WEuQWBxelOGHA6z9lABqaMLMrfwVyMdN3UgRLT+YUPo=
|
||||
github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b/go.mod h1:esZFQEUwqC+l76f2R8bIWSwXMaPbp79PppwZ1eJhFco=
|
||||
github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
|
||||
github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
|
||||
github.com/hajimehoshi/oto/v2 v2.3.1 h1:qrLKpNus2UfD674oxckKjNJmesp9hMh7u7QCrStB3Rc=
|
||||
github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||
@@ -61,12 +56,14 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
||||
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||
github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7ZoUw=
|
||||
github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
|
||||
github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U=
|
||||
github.com/playwright-community/playwright-go v0.5700.1/go.mod h1:MlSn1dZrx8rszbCxY6x3qK89ZesJUYVx21B2JnkoNF0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
@@ -77,10 +74,19 @@ github.com/rivo/tview v0.42.0 h1:b/ftp+RxtDsHSaynXTbJb+/n/BxDEi+W3UfF5jILK6c=
|
||||
github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/schollz/progressbar/v2 v2.15.0 h1:dVzHQ8fHRmtPjD3K10jT3Qgn/+H+92jhPrhmxIJfDz8=
|
||||
github.com/schollz/progressbar/v2 v2.15.0/go.mod h1:UdPq3prGkfQ7MOzZKlDRpYKcFqEMczbD7YmbPgpzKMI=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c h1:pwb4kNSHb4K89ymCaN+5lPH/MwnfSVg4rzGDh4d+iy4=
|
||||
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c/go.mod h1:2gwkXLWbDGUQWeL3RtpCmcY4mzCtU13kb9UsAg9xMaw=
|
||||
github.com/sugarme/tokenizer v0.3.0 h1:FE8DYbNSz/kSbgEo9l/RjgYHkIJYEdskumitFQBE9FE=
|
||||
github.com/sugarme/tokenizer v0.3.0/go.mod h1:VJ+DLK5ZEZwzvODOWwY0cw+B1dabTd3nCB5HuFCItCc=
|
||||
github.com/yalue/onnxruntime_go v1.27.0 h1:c1YSgDNtpf0WGtxj3YeRIb8VC5LmM1J+Ve3uHdteC1U=
|
||||
github.com/yalue/onnxruntime_go v1.27.0/go.mod h1:b4X26A8pekNb1ACJ58wAXgNKeUCGEAQ9dmACut9Sm/4=
|
||||
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
|
||||
@@ -521,7 +521,7 @@ func updateFlexLayout() {
|
||||
if shellMode {
|
||||
flex.AddItem(shellInput, 0, 10, false)
|
||||
} else {
|
||||
flex.AddItem(textArea, 0, 10, false)
|
||||
flex.AddItem(bottomFlex, 0, 10, true)
|
||||
}
|
||||
if positionVisible {
|
||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
||||
|
||||
@@ -115,9 +115,6 @@ func makePropsTable(props map[string]float32) *tview.Table {
|
||||
row++
|
||||
}
|
||||
// Add checkboxes
|
||||
addCheckboxRow("RAG use", cfg.RAGEnabled, func(checked bool) {
|
||||
cfg.RAGEnabled = checked
|
||||
})
|
||||
addCheckboxRow("Inject role", injectRole, func(checked bool) {
|
||||
injectRole = checked
|
||||
})
|
||||
|
||||
314
rag/embedder.go
314
rag/embedder.go
@@ -9,6 +9,13 @@ import (
|
||||
"gf-lt/models"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/sugarme/tokenizer"
|
||||
"github.com/sugarme/tokenizer/pretrained"
|
||||
"github.com/yalue/onnxruntime_go"
|
||||
)
|
||||
|
||||
// Embedder defines the interface for embedding text
|
||||
@@ -27,8 +34,10 @@ type APIEmbedder struct {
|
||||
func NewAPIEmbedder(l *slog.Logger, cfg *config.Config) *APIEmbedder {
|
||||
return &APIEmbedder{
|
||||
logger: l,
|
||||
client: &http.Client{},
|
||||
cfg: cfg,
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
cfg: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,11 +143,302 @@ func (a *APIEmbedder) EmbedSlice(lines []string) ([][]float32, error) {
|
||||
return embeddings, nil
|
||||
}
|
||||
|
||||
// TODO: ONNXEmbedder implementation would go here
|
||||
// This would require:
|
||||
// 1. Loading ONNX models locally
|
||||
// 2. Using a Go ONNX runtime (like gorgonia/onnx or similar)
|
||||
// 3. Converting text to embeddings without external API calls
|
||||
//
|
||||
// For now, we'll focus on the API implementation which is already working in the current system,
|
||||
// and can be extended later when we have ONNX runtime integration
|
||||
type ONNXEmbedder struct {
|
||||
session *onnxruntime_go.DynamicAdvancedSession
|
||||
tokenizer *tokenizer.Tokenizer
|
||||
tokenizerPath string
|
||||
dims int
|
||||
logger *slog.Logger
|
||||
mu sync.Mutex
|
||||
modelPath string
|
||||
}
|
||||
|
||||
var onnxInitOnce sync.Once
|
||||
var onnxReady bool
|
||||
var onnxLibPath string
|
||||
var cudaLibPath string
|
||||
|
||||
var onnxLibPaths = []string{
|
||||
"/usr/lib/libonnxruntime.so",
|
||||
"/usr/lib/libonnxruntime.so.1.24.2",
|
||||
"/usr/local/lib/libonnxruntime.so",
|
||||
"/usr/lib/x86_64-linux-gnu/libonnxruntime.so",
|
||||
"/opt/onnxruntime/lib/libonnxruntime.so",
|
||||
}
|
||||
|
||||
var cudaLibPaths = []string{
|
||||
"/usr/lib/libonnxruntime_providers_cuda.so",
|
||||
"/usr/local/lib/libonnxruntime_providers_cuda.so",
|
||||
"/opt/onnxruntime/lib/libonnxruntime_providers_cuda.so",
|
||||
}
|
||||
|
||||
func findONNXLibrary() string {
|
||||
for _, path := range onnxLibPaths {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return path
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func findCUDALibrary() string {
|
||||
for _, path := range cudaLibPaths {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return path
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Logger) (*ONNXEmbedder, error) {
|
||||
// Check if model and tokenizer files exist
|
||||
if _, err := os.Stat(modelPath); err != nil {
|
||||
return nil, fmt.Errorf("ONNX model not found: %w", err)
|
||||
}
|
||||
if _, err := os.Stat(tokenizerPath); err != nil {
|
||||
return nil, fmt.Errorf("tokenizer not found: %w", err)
|
||||
}
|
||||
|
||||
// Find ONNX library
|
||||
onnxLibPath = findONNXLibrary()
|
||||
if onnxLibPath == "" {
|
||||
return nil, errors.New("ONNX runtime library not found in standard locations")
|
||||
}
|
||||
|
||||
// Find CUDA provider library (optional)
|
||||
cudaLibPath = findCUDALibrary()
|
||||
if cudaLibPath == "" {
|
||||
fmt.Println("WARNING: CUDA provider library not found, will use CPU")
|
||||
}
|
||||
emb := &ONNXEmbedder{
|
||||
tokenizerPath: tokenizerPath,
|
||||
dims: dims,
|
||||
logger: logger,
|
||||
modelPath: modelPath,
|
||||
}
|
||||
return emb, nil
|
||||
}
|
||||
|
||||
func (e *ONNXEmbedder) ensureInitialized() error {
|
||||
if e.session != nil {
|
||||
return nil
|
||||
}
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
if e.session != nil {
|
||||
return nil
|
||||
}
|
||||
// Load tokenizer lazily
|
||||
if e.tokenizer == nil {
|
||||
tok, err := pretrained.FromFile(e.tokenizerPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load tokenizer: %w", err)
|
||||
}
|
||||
e.tokenizer = tok
|
||||
}
|
||||
onnxInitOnce.Do(func() {
|
||||
onnxruntime_go.SetSharedLibraryPath(onnxLibPath)
|
||||
if err := onnxruntime_go.InitializeEnvironment(); err != nil {
|
||||
e.logger.Error("failed to initialize ONNX runtime", "error", err)
|
||||
onnxReady = false
|
||||
return
|
||||
}
|
||||
// Register CUDA provider if available
|
||||
if cudaLibPath != "" {
|
||||
if err := onnxruntime_go.RegisterExecutionProviderLibrary("CUDA", cudaLibPath); err != nil {
|
||||
e.logger.Warn("failed to register CUDA provider", "error", err)
|
||||
}
|
||||
}
|
||||
onnxReady = true
|
||||
})
|
||||
if !onnxReady {
|
||||
return errors.New("ONNX runtime not ready")
|
||||
}
|
||||
// Create session options
|
||||
opts, err := onnxruntime_go.NewSessionOptions()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create session options: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = opts.Destroy()
|
||||
}()
|
||||
|
||||
// Try to add CUDA provider
|
||||
useCUDA := cudaLibPath != ""
|
||||
if useCUDA {
|
||||
cudaOpts, err := onnxruntime_go.NewCUDAProviderOptions()
|
||||
if err != nil {
|
||||
e.logger.Warn("failed to create CUDA provider options, falling back to CPU", "error", err)
|
||||
useCUDA = false
|
||||
} else {
|
||||
defer func() {
|
||||
_ = cudaOpts.Destroy()
|
||||
}()
|
||||
if err := cudaOpts.Update(map[string]string{"device_id": "0"}); err != nil {
|
||||
e.logger.Warn("failed to update CUDA options, falling back to CPU", "error", err)
|
||||
useCUDA = false
|
||||
} else if err := opts.AppendExecutionProviderCUDA(cudaOpts); err != nil {
|
||||
e.logger.Warn("failed to append CUDA provider, falling back to CPU", "error", err)
|
||||
useCUDA = false
|
||||
}
|
||||
}
|
||||
}
|
||||
if useCUDA {
|
||||
e.logger.Info("Using CUDA for ONNX inference")
|
||||
} else {
|
||||
e.logger.Info("Using CPU for ONNX inference")
|
||||
}
|
||||
|
||||
// Create session with options
|
||||
session, err := onnxruntime_go.NewDynamicAdvancedSession(
|
||||
e.getModelPath(),
|
||||
[]string{"input_ids", "attention_mask"},
|
||||
[]string{"sentence_embedding"},
|
||||
opts,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create ONNX session: %w", err)
|
||||
}
|
||||
e.session = session
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *ONNXEmbedder) getModelPath() string {
|
||||
return e.modelPath
|
||||
}
|
||||
|
||||
func (e *ONNXEmbedder) Destroy() error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
if e.session != nil {
|
||||
if err := e.session.Destroy(); err != nil {
|
||||
return fmt.Errorf("failed to destroy ONNX session: %w", err)
|
||||
}
|
||||
e.session = nil
|
||||
e.logger.Info("ONNX session destroyed, VRAM freed")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
|
||||
if err := e.ensureInitialized(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// 1. Tokenize
|
||||
encoding, err := e.tokenizer.EncodeSingle(text)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("tokenization failed: %w", err)
|
||||
}
|
||||
// 2. Convert to int64 and create attention mask
|
||||
ids := encoding.Ids
|
||||
inputIDs := make([]int64, len(ids))
|
||||
attentionMask := make([]int64, len(ids))
|
||||
for i, id := range ids {
|
||||
inputIDs[i] = int64(id)
|
||||
attentionMask[i] = 1
|
||||
}
|
||||
// 3. Create input tensors (shape: [1, seq_len])
|
||||
seqLen := int64(len(inputIDs))
|
||||
inputIDsTensor, err := onnxruntime_go.NewTensor[int64](
|
||||
onnxruntime_go.NewShape(1, seqLen),
|
||||
inputIDs,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create input_ids tensor: %w", err)
|
||||
}
|
||||
defer func() { _ = inputIDsTensor.Destroy() }()
|
||||
maskTensor, err := onnxruntime_go.NewTensor[int64](
|
||||
onnxruntime_go.NewShape(1, seqLen),
|
||||
attentionMask,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create attention_mask tensor: %w", err)
|
||||
}
|
||||
defer func() { _ = maskTensor.Destroy() }()
|
||||
// 4. Create output tensor
|
||||
outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](
|
||||
onnxruntime_go.NewShape(1, int64(e.dims)),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create output tensor: %w", err)
|
||||
}
|
||||
defer func() { _ = outputTensor.Destroy() }()
|
||||
// 5. Run inference
|
||||
err = e.session.Run(
|
||||
[]onnxruntime_go.Value{inputIDsTensor, maskTensor},
|
||||
[]onnxruntime_go.Value{outputTensor},
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("inference failed: %w", err)
|
||||
}
|
||||
// 6. Copy output data
|
||||
outputData := outputTensor.GetData()
|
||||
embedding := make([]float32, len(outputData))
|
||||
copy(embedding, outputData)
|
||||
return embedding, nil
|
||||
}
|
||||
|
||||
func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
|
||||
if err := e.ensureInitialized(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
encodings := make([]*tokenizer.Encoding, len(texts))
|
||||
maxLen := 0
|
||||
for i, txt := range texts {
|
||||
enc, err := e.tokenizer.EncodeSingle(txt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
encodings[i] = enc
|
||||
if l := len(enc.Ids); l > maxLen {
|
||||
maxLen = l
|
||||
}
|
||||
}
|
||||
batchSize := len(texts)
|
||||
inputIDs := make([]int64, batchSize*maxLen)
|
||||
attentionMask := make([]int64, batchSize*maxLen)
|
||||
for i, enc := range encodings {
|
||||
ids := enc.Ids
|
||||
offset := i * maxLen
|
||||
for j, id := range ids {
|
||||
inputIDs[offset+j] = int64(id)
|
||||
attentionMask[offset+j] = 1
|
||||
}
|
||||
// Remaining positions are already zero (padding)
|
||||
}
|
||||
// Create tensors with shape [batchSize, maxLen]
|
||||
inputTensor, _ := onnxruntime_go.NewTensor[int64](
|
||||
onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
|
||||
inputIDs,
|
||||
)
|
||||
defer func() { _ = inputTensor.Destroy() }()
|
||||
maskTensor, _ := onnxruntime_go.NewTensor[int64](
|
||||
onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
|
||||
attentionMask,
|
||||
)
|
||||
defer func() { _ = maskTensor.Destroy() }()
|
||||
outputTensor, _ := onnxruntime_go.NewEmptyTensor[float32](
|
||||
onnxruntime_go.NewShape(int64(batchSize), int64(e.dims)),
|
||||
)
|
||||
defer func() { _ = outputTensor.Destroy() }()
|
||||
err := e.session.Run(
|
||||
[]onnxruntime_go.Value{inputTensor, maskTensor},
|
||||
[]onnxruntime_go.Value{outputTensor},
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Extract embeddings per batch item
|
||||
data := outputTensor.GetData()
|
||||
embeddings := make([][]float32, batchSize)
|
||||
for i := 0; i < batchSize; i++ {
|
||||
start := i * e.dims
|
||||
emb := make([]float32, e.dims)
|
||||
copy(emb, data[start:start+e.dims])
|
||||
embeddings[i] = emb
|
||||
}
|
||||
return embeddings, nil
|
||||
}
|
||||
|
||||
736
rag/rag.go
736
rag/rag.go
@@ -1,6 +1,7 @@
|
||||
package rag
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"gf-lt/config"
|
||||
@@ -9,51 +10,168 @@ import (
|
||||
"log/slog"
|
||||
"path"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/neurosnap/sentences/english"
|
||||
)
|
||||
|
||||
const ()
|
||||
|
||||
var (
|
||||
// Status messages for TUI integration
|
||||
LongJobStatusCh = make(chan string, 10) // Increased buffer size to prevent blocking
|
||||
LongJobStatusCh = make(chan string, 100) // Increased buffer size for parallel batch updates
|
||||
FinishedRAGStatus = "finished loading RAG file; press Enter"
|
||||
LoadedFileRAGStatus = "loaded file"
|
||||
ErrRAGStatus = "some error occurred; failed to transfer data to vector db"
|
||||
)
|
||||
|
||||
type RAG struct {
|
||||
logger *slog.Logger
|
||||
store storage.FullRepo
|
||||
cfg *config.Config
|
||||
embedder Embedder
|
||||
storage *VectorStorage
|
||||
mu sync.Mutex
|
||||
logger *slog.Logger
|
||||
store storage.FullRepo
|
||||
cfg *config.Config
|
||||
embedder Embedder
|
||||
storage *VectorStorage
|
||||
mu sync.RWMutex
|
||||
idleMu sync.Mutex
|
||||
fallbackMsg string
|
||||
idleTimer *time.Timer
|
||||
idleTimeout time.Duration
|
||||
}
|
||||
|
||||
func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) *RAG {
|
||||
// Initialize with API embedder by default, could be configurable later
|
||||
embedder := NewAPIEmbedder(l, cfg)
|
||||
// batchTask represents a single batch to be embedded
|
||||
type batchTask struct {
|
||||
batchIndex int
|
||||
paragraphs []string
|
||||
filename string
|
||||
totalBatches int
|
||||
}
|
||||
|
||||
// batchResult represents the result of embedding a batch
|
||||
type batchResult struct {
|
||||
batchIndex int
|
||||
embeddings [][]float32
|
||||
paragraphs []string
|
||||
filename string
|
||||
}
|
||||
|
||||
// sendStatusNonBlocking sends a status message without blocking
|
||||
func (r *RAG) sendStatusNonBlocking(status string) {
|
||||
select {
|
||||
case LongJobStatusCh <- status:
|
||||
default:
|
||||
r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", status)
|
||||
}
|
||||
}
|
||||
|
||||
func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) (*RAG, error) {
|
||||
var embedder Embedder
|
||||
var fallbackMsg string
|
||||
if cfg.EmbedModelPath != "" && cfg.EmbedTokenizerPath != "" {
|
||||
emb, err := NewONNXEmbedder(cfg.EmbedModelPath, cfg.EmbedTokenizerPath, cfg.EmbedDims, l)
|
||||
if err != nil {
|
||||
l.Error("failed to create ONNX embedder, falling back to API", "error", err)
|
||||
fallbackMsg = err.Error()
|
||||
embedder = NewAPIEmbedder(l, cfg)
|
||||
} else {
|
||||
embedder = emb
|
||||
l.Info("using ONNX embedder", "model", cfg.EmbedModelPath, "dims", cfg.EmbedDims)
|
||||
}
|
||||
} else {
|
||||
embedder = NewAPIEmbedder(l, cfg)
|
||||
l.Info("using API embedder", "url", cfg.EmbedURL)
|
||||
}
|
||||
rag := &RAG{
|
||||
logger: l,
|
||||
store: s,
|
||||
cfg: cfg,
|
||||
embedder: embedder,
|
||||
storage: NewVectorStorage(l, s),
|
||||
logger: l,
|
||||
store: s,
|
||||
cfg: cfg,
|
||||
embedder: embedder,
|
||||
storage: NewVectorStorage(l, s),
|
||||
fallbackMsg: fallbackMsg,
|
||||
idleTimeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
// Note: Vector tables are created via database migrations, not at runtime
|
||||
|
||||
return rag
|
||||
return rag, nil
|
||||
}
|
||||
|
||||
func wordCounter(sentence string) int {
|
||||
return len(strings.Split(strings.TrimSpace(sentence), " "))
|
||||
func createChunks(sentences []string, wordLimit, overlapWords uint32) []string {
|
||||
if len(sentences) == 0 {
|
||||
return nil
|
||||
}
|
||||
if overlapWords >= wordLimit {
|
||||
overlapWords = wordLimit / 2
|
||||
}
|
||||
var chunks []string
|
||||
i := 0
|
||||
for i < len(sentences) {
|
||||
var chunkWords []string
|
||||
wordCount := 0
|
||||
j := i
|
||||
for j < len(sentences) && wordCount <= int(wordLimit) {
|
||||
sentence := sentences[j]
|
||||
words := strings.Fields(sentence)
|
||||
chunkWords = append(chunkWords, sentence)
|
||||
wordCount += len(words)
|
||||
j++
|
||||
// If this sentence alone exceeds limit, still include it and stop
|
||||
if wordCount > int(wordLimit) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if len(chunkWords) == 0 {
|
||||
break
|
||||
}
|
||||
chunk := strings.Join(chunkWords, " ")
|
||||
chunks = append(chunks, chunk)
|
||||
if j >= len(sentences) {
|
||||
break
|
||||
}
|
||||
// Move i forward by skipping overlap
|
||||
if overlapWords == 0 {
|
||||
i = j
|
||||
continue
|
||||
}
|
||||
// Calculate how many sentences to skip to achieve overlapWords
|
||||
overlapRemaining := int(overlapWords)
|
||||
newI := i
|
||||
for newI < j && overlapRemaining > 0 {
|
||||
words := len(strings.Fields(sentences[newI]))
|
||||
overlapRemaining -= words
|
||||
if overlapRemaining >= 0 {
|
||||
newI++
|
||||
}
|
||||
}
|
||||
if newI == i {
|
||||
newI = j
|
||||
}
|
||||
i = newI
|
||||
}
|
||||
return chunks
|
||||
}
|
||||
|
||||
func sanitizeFTSQuery(query string) string {
|
||||
// Remove double quotes and other problematic characters for FTS5
|
||||
query = strings.ReplaceAll(query, "\"", " ")
|
||||
query = strings.ReplaceAll(query, "'", " ")
|
||||
query = strings.ReplaceAll(query, ";", " ")
|
||||
query = strings.ReplaceAll(query, "\\", " ")
|
||||
query = strings.TrimSpace(query)
|
||||
if query == "" {
|
||||
return "*" // match all
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
func (r *RAG) LoadRAG(fpath string) error {
|
||||
return r.LoadRAGWithContext(context.Background(), fpath)
|
||||
}
|
||||
|
||||
func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
fileText, err := ExtractText(fpath)
|
||||
@@ -61,11 +179,9 @@ func (r *RAG) LoadRAG(fpath string) error {
|
||||
return err
|
||||
}
|
||||
r.logger.Debug("rag: loaded file", "fp", fpath)
|
||||
select {
|
||||
case LongJobStatusCh <- LoadedFileRAGStatus:
|
||||
default:
|
||||
r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", LoadedFileRAGStatus)
|
||||
}
|
||||
|
||||
// Send initial status (non-blocking with retry)
|
||||
r.sendStatusNonBlocking(LoadedFileRAGStatus)
|
||||
tokenizer, err := english.NewSentenceTokenizer(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -75,31 +191,9 @@ func (r *RAG) LoadRAG(fpath string) error {
|
||||
for i, s := range sentences {
|
||||
sents[i] = s.Text
|
||||
}
|
||||
// Group sentences into paragraphs based on word limit
|
||||
paragraphs := []string{}
|
||||
par := strings.Builder{}
|
||||
for i := 0; i < len(sents); i++ {
|
||||
if strings.TrimSpace(sents[i]) != "" {
|
||||
if par.Len() > 0 {
|
||||
par.WriteString(" ")
|
||||
}
|
||||
par.WriteString(sents[i])
|
||||
}
|
||||
if wordCounter(par.String()) > int(r.cfg.RAGWordLimit) {
|
||||
paragraph := strings.TrimSpace(par.String())
|
||||
if paragraph != "" {
|
||||
paragraphs = append(paragraphs, paragraph)
|
||||
}
|
||||
par.Reset()
|
||||
}
|
||||
}
|
||||
// Handle any remaining content in the paragraph buffer
|
||||
if par.Len() > 0 {
|
||||
paragraph := strings.TrimSpace(par.String())
|
||||
if paragraph != "" {
|
||||
paragraphs = append(paragraphs, paragraph)
|
||||
}
|
||||
}
|
||||
|
||||
// Create chunks with overlap
|
||||
paragraphs := createChunks(sents, r.cfg.RAGWordLimit, r.cfg.RAGOverlapWords)
|
||||
// Adjust batch size if needed
|
||||
if len(paragraphs) < r.cfg.RAGBatchSize && len(paragraphs) > 0 {
|
||||
r.cfg.RAGBatchSize = len(paragraphs)
|
||||
@@ -107,91 +201,348 @@ func (r *RAG) LoadRAG(fpath string) error {
|
||||
if len(paragraphs) == 0 {
|
||||
return errors.New("no valid paragraphs found in file")
|
||||
}
|
||||
// Process paragraphs in batches synchronously
|
||||
batchCount := 0
|
||||
for i := 0; i < len(paragraphs); i += r.cfg.RAGBatchSize {
|
||||
end := i + r.cfg.RAGBatchSize
|
||||
if end > len(paragraphs) {
|
||||
end = len(paragraphs)
|
||||
}
|
||||
batch := paragraphs[i:end]
|
||||
batchCount++
|
||||
// Filter empty paragraphs
|
||||
nonEmptyBatch := make([]string, 0, len(batch))
|
||||
for _, p := range batch {
|
||||
if strings.TrimSpace(p) != "" {
|
||||
nonEmptyBatch = append(nonEmptyBatch, strings.TrimSpace(p))
|
||||
totalBatches := (len(paragraphs) + r.cfg.RAGBatchSize - 1) / r.cfg.RAGBatchSize
|
||||
r.logger.Debug("starting parallel embedding", "total_batches", totalBatches, "batch_size", r.cfg.RAGBatchSize)
|
||||
|
||||
// Determine concurrency level
|
||||
concurrency := runtime.NumCPU()
|
||||
if concurrency > totalBatches {
|
||||
concurrency = totalBatches
|
||||
}
|
||||
if concurrency < 1 {
|
||||
concurrency = 1
|
||||
}
|
||||
// If using ONNX embedder, limit concurrency to 1 due to mutex serialization
|
||||
var isONNX bool
|
||||
if _, isONNX = r.embedder.(*ONNXEmbedder); isONNX {
|
||||
concurrency = 1
|
||||
}
|
||||
embedderType := "API"
|
||||
if isONNX {
|
||||
embedderType = "ONNX"
|
||||
}
|
||||
r.logger.Debug("parallel embedding setup",
|
||||
"total_batches", totalBatches,
|
||||
"concurrency", concurrency,
|
||||
"embedder", embedderType,
|
||||
"batch_size", r.cfg.RAGBatchSize)
|
||||
|
||||
// Create context with timeout (30 minutes) and cancellation for error handling
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
// Channels for task distribution and results
|
||||
taskCh := make(chan batchTask, totalBatches)
|
||||
resultCh := make(chan batchResult, totalBatches)
|
||||
errorCh := make(chan error, totalBatches)
|
||||
|
||||
// Start worker goroutines
|
||||
var wg sync.WaitGroup
|
||||
for w := 0; w < concurrency; w++ {
|
||||
wg.Add(1)
|
||||
go r.embeddingWorker(ctx, w, taskCh, resultCh, errorCh, &wg)
|
||||
}
|
||||
|
||||
// Close task channel after all tasks are sent (by separate goroutine)
|
||||
go func() {
|
||||
// Ensure task channel is closed when this goroutine exits
|
||||
defer close(taskCh)
|
||||
r.logger.Debug("task distributor started", "total_batches", totalBatches)
|
||||
for i := 0; i < totalBatches; i++ {
|
||||
start := i * r.cfg.RAGBatchSize
|
||||
end := start + r.cfg.RAGBatchSize
|
||||
if end > len(paragraphs) {
|
||||
end = len(paragraphs)
|
||||
}
|
||||
batch := paragraphs[start:end]
|
||||
|
||||
// Filter empty paragraphs
|
||||
nonEmptyBatch := make([]string, 0, len(batch))
|
||||
for _, p := range batch {
|
||||
if strings.TrimSpace(p) != "" {
|
||||
nonEmptyBatch = append(nonEmptyBatch, strings.TrimSpace(p))
|
||||
}
|
||||
}
|
||||
|
||||
task := batchTask{
|
||||
batchIndex: i,
|
||||
paragraphs: nonEmptyBatch,
|
||||
filename: path.Base(fpath),
|
||||
totalBatches: totalBatches,
|
||||
}
|
||||
|
||||
select {
|
||||
case taskCh <- task:
|
||||
r.logger.Debug("task distributor sent batch", "batch", i, "paragraphs", len(nonEmptyBatch))
|
||||
case <-ctx.Done():
|
||||
r.logger.Debug("task distributor cancelled", "batches_sent", i+1, "total_batches", totalBatches)
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(nonEmptyBatch) == 0 {
|
||||
r.logger.Debug("task distributor finished", "batches_sent", totalBatches)
|
||||
}()
|
||||
|
||||
// Wait for workers to finish and close result channel
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultCh)
|
||||
}()
|
||||
|
||||
// Process results in order and write to database
|
||||
nextExpectedBatch := 0
|
||||
resultsBuffer := make(map[int]batchResult)
|
||||
filename := path.Base(fpath)
|
||||
batchesProcessed := 0
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
|
||||
case err := <-errorCh:
|
||||
// First error from any worker, cancel everything
|
||||
cancel()
|
||||
r.logger.Error("embedding worker failed", "error", err)
|
||||
r.sendStatusNonBlocking(ErrRAGStatus)
|
||||
return fmt.Errorf("embedding failed: %w", err)
|
||||
|
||||
case result, ok := <-resultCh:
|
||||
if !ok {
|
||||
// All results processed
|
||||
resultCh = nil
|
||||
r.logger.Debug("result channel closed", "batches_processed", batchesProcessed, "total_batches", totalBatches)
|
||||
continue
|
||||
}
|
||||
|
||||
// Store result in buffer
|
||||
resultsBuffer[result.batchIndex] = result
|
||||
|
||||
// Process buffered results in order
|
||||
for {
|
||||
if res, exists := resultsBuffer[nextExpectedBatch]; exists {
|
||||
// Write this batch to database
|
||||
if err := r.writeBatchToStorage(ctx, res, filename); err != nil {
|
||||
cancel()
|
||||
return err
|
||||
}
|
||||
|
||||
batchesProcessed++
|
||||
// Send progress update
|
||||
statusMsg := fmt.Sprintf("processed batch %d/%d", batchesProcessed, totalBatches)
|
||||
r.sendStatusNonBlocking(statusMsg)
|
||||
|
||||
delete(resultsBuffer, nextExpectedBatch)
|
||||
nextExpectedBatch++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
// No channels ready, check for deadlock conditions
|
||||
if resultCh == nil && nextExpectedBatch < totalBatches {
|
||||
// Missing batch results after result channel closed
|
||||
r.logger.Error("missing batch results",
|
||||
"expected", totalBatches,
|
||||
"received", nextExpectedBatch,
|
||||
"missing", totalBatches-nextExpectedBatch)
|
||||
|
||||
// Wait a short time for any delayed errors, then cancel
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
cancel()
|
||||
return fmt.Errorf("missing batch results: expected %d, got %d", totalBatches, nextExpectedBatch)
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case err := <-errorCh:
|
||||
cancel()
|
||||
r.logger.Error("embedding worker failed after result channel closed", "error", err)
|
||||
r.sendStatusNonBlocking(ErrRAGStatus)
|
||||
return fmt.Errorf("embedding failed: %w", err)
|
||||
}
|
||||
}
|
||||
// If we reach here, no deadlock yet, just busy loop prevention
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
// Check if we're done
|
||||
if resultCh == nil && nextExpectedBatch >= totalBatches {
|
||||
r.logger.Debug("all batches processed successfully", "total", totalBatches)
|
||||
break
|
||||
}
|
||||
}
|
||||
r.logger.Debug("finished writing vectors", "batches", batchesProcessed)
|
||||
r.resetIdleTimer()
|
||||
r.sendStatusNonBlocking(FinishedRAGStatus)
|
||||
return nil
|
||||
}
|
||||
|
||||
// embeddingWorker processes batch embedding tasks
|
||||
func (r *RAG) embeddingWorker(ctx context.Context, workerID int, taskCh <-chan batchTask, resultCh chan<- batchResult, errorCh chan<- error, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
r.logger.Debug("embedding worker started", "worker", workerID)
|
||||
|
||||
// Panic recovery to ensure worker doesn't crash silently
|
||||
defer func() {
|
||||
if rec := recover(); rec != nil {
|
||||
r.logger.Error("embedding worker panicked", "worker", workerID, "panic", rec)
|
||||
// Try to send error, but don't block if channel is full
|
||||
select {
|
||||
case errorCh <- fmt.Errorf("worker %d panicked: %v", workerID, rec):
|
||||
default:
|
||||
r.logger.Warn("error channel full, dropping panic error", "worker", workerID)
|
||||
}
|
||||
}
|
||||
}()
|
||||
for task := range taskCh {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
r.logger.Debug("embedding worker cancelled", "worker", workerID)
|
||||
return
|
||||
default:
|
||||
}
|
||||
r.logger.Debug("worker processing batch", "worker", workerID, "batch", task.batchIndex, "paragraphs", len(task.paragraphs), "total_batches", task.totalBatches)
|
||||
|
||||
// Skip empty batches
|
||||
if len(task.paragraphs) == 0 {
|
||||
select {
|
||||
case resultCh <- batchResult{
|
||||
batchIndex: task.batchIndex,
|
||||
embeddings: nil,
|
||||
paragraphs: nil,
|
||||
filename: task.filename,
|
||||
}:
|
||||
case <-ctx.Done():
|
||||
r.logger.Debug("embedding worker cancelled while sending empty batch", "worker", workerID)
|
||||
return
|
||||
}
|
||||
r.logger.Debug("worker sent empty batch", "worker", workerID, "batch", task.batchIndex)
|
||||
continue
|
||||
}
|
||||
// Embed the batch
|
||||
embeddings, err := r.embedder.EmbedSlice(nonEmptyBatch)
|
||||
// Embed with retry for API embedder
|
||||
embeddings, err := r.embedWithRetry(ctx, task.paragraphs, 3)
|
||||
if err != nil {
|
||||
r.logger.Error("failed to embed batch", "error", err, "batch", batchCount)
|
||||
// Try to send error, but don't block indefinitely
|
||||
select {
|
||||
case LongJobStatusCh <- ErrRAGStatus:
|
||||
default:
|
||||
r.logger.Warn("LongJobStatusCh channel full, dropping message")
|
||||
case errorCh <- fmt.Errorf("worker %d batch %d: %w", workerID, task.batchIndex, err):
|
||||
case <-ctx.Done():
|
||||
r.logger.Debug("embedding worker cancelled while sending error", "worker", workerID)
|
||||
}
|
||||
return fmt.Errorf("failed to embed batch %d: %w", batchCount, err)
|
||||
return
|
||||
}
|
||||
if len(embeddings) != len(nonEmptyBatch) {
|
||||
err := errors.New("embedding count mismatch")
|
||||
r.logger.Error("embedding mismatch", "expected", len(nonEmptyBatch), "got", len(embeddings))
|
||||
return err
|
||||
}
|
||||
// Write vectors to storage
|
||||
filename := path.Base(fpath)
|
||||
for j, text := range nonEmptyBatch {
|
||||
vector := models.VectorRow{
|
||||
Embeddings: embeddings[j],
|
||||
RawText: text,
|
||||
Slug: fmt.Sprintf("%s_%d_%d", filename, batchCount, j),
|
||||
FileName: filename,
|
||||
}
|
||||
if err := r.storage.WriteVector(&vector); err != nil {
|
||||
r.logger.Error("failed to write vector to DB", "error", err, "slug", vector.Slug)
|
||||
select {
|
||||
case LongJobStatusCh <- ErrRAGStatus:
|
||||
default:
|
||||
r.logger.Warn("LongJobStatusCh channel full, dropping message")
|
||||
}
|
||||
return fmt.Errorf("failed to write vector: %w", err)
|
||||
}
|
||||
}
|
||||
r.logger.Debug("wrote batch to db", "batch", batchCount, "size", len(nonEmptyBatch))
|
||||
// Send progress status
|
||||
statusMsg := fmt.Sprintf("processed batch %d/%d", batchCount, (len(paragraphs)+r.cfg.RAGBatchSize-1)/r.cfg.RAGBatchSize)
|
||||
// Send result with context awareness
|
||||
select {
|
||||
case LongJobStatusCh <- statusMsg:
|
||||
default:
|
||||
r.logger.Warn("LongJobStatusCh channel full, dropping message")
|
||||
case resultCh <- batchResult{
|
||||
batchIndex: task.batchIndex,
|
||||
embeddings: embeddings,
|
||||
paragraphs: task.paragraphs,
|
||||
filename: task.filename,
|
||||
}:
|
||||
case <-ctx.Done():
|
||||
r.logger.Debug("embedding worker cancelled while sending result", "worker", workerID)
|
||||
return
|
||||
}
|
||||
r.logger.Debug("worker completed batch", "worker", workerID, "batch", task.batchIndex, "embeddings", len(embeddings))
|
||||
}
|
||||
r.logger.Debug("embedding worker finished", "worker", workerID)
|
||||
}
|
||||
|
||||
// embedWithRetry attempts embedding with exponential backoff for API embedder
|
||||
func (r *RAG) embedWithRetry(ctx context.Context, paragraphs []string, maxRetries int) ([][]float32, error) {
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxRetries; attempt++ {
|
||||
if attempt > 0 {
|
||||
// Exponential backoff
|
||||
backoff := time.Duration(attempt*attempt) * time.Second
|
||||
if backoff > 10*time.Second {
|
||||
backoff = 10 * time.Second
|
||||
}
|
||||
select {
|
||||
case <-time.After(backoff):
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
r.logger.Debug("retrying embedding", "attempt", attempt, "max_retries", maxRetries)
|
||||
}
|
||||
|
||||
embeddings, err := r.embedder.EmbedSlice(paragraphs)
|
||||
if err == nil {
|
||||
// Validate embedding count
|
||||
if len(embeddings) != len(paragraphs) {
|
||||
return nil, fmt.Errorf("embedding count mismatch: expected %d, got %d", len(paragraphs), len(embeddings))
|
||||
}
|
||||
return embeddings, nil
|
||||
}
|
||||
|
||||
lastErr = err
|
||||
// Only retry for API embedder errors (network/timeout)
|
||||
// For ONNX embedder, fail fast
|
||||
if _, isAPI := r.embedder.(*APIEmbedder); !isAPI {
|
||||
break
|
||||
}
|
||||
}
|
||||
r.logger.Debug("finished writing vectors", "batches", batchCount)
|
||||
select {
|
||||
case LongJobStatusCh <- FinishedRAGStatus:
|
||||
default:
|
||||
r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", FinishedRAGStatus)
|
||||
return nil, fmt.Errorf("embedding failed after %d attempts: %w", maxRetries, lastErr)
|
||||
}
|
||||
|
||||
// writeBatchToStorage writes a single batch of vectors to the database
|
||||
func (r *RAG) writeBatchToStorage(ctx context.Context, result batchResult, filename string) error {
|
||||
if len(result.embeddings) == 0 {
|
||||
// Empty batch, skip
|
||||
return nil
|
||||
}
|
||||
// Check context before starting
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// Build all vectors for batch write
|
||||
vectors := make([]*models.VectorRow, 0, len(result.paragraphs))
|
||||
for j, text := range result.paragraphs {
|
||||
vectors = append(vectors, &models.VectorRow{
|
||||
Embeddings: result.embeddings[j],
|
||||
RawText: text,
|
||||
Slug: fmt.Sprintf("%s_%d_%d", filename, result.batchIndex+1, j),
|
||||
FileName: filename,
|
||||
})
|
||||
}
|
||||
|
||||
// Write all vectors in a single transaction
|
||||
if err := r.storage.WriteVectors(vectors); err != nil {
|
||||
r.logger.Error("failed to write vectors batch to DB", "error", err, "batch", result.batchIndex+1, "size", len(vectors))
|
||||
r.sendStatusNonBlocking(ErrRAGStatus)
|
||||
return fmt.Errorf("failed to write vectors batch: %w", err)
|
||||
}
|
||||
r.logger.Debug("wrote batch to db", "batch", result.batchIndex+1, "size", len(result.paragraphs))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *RAG) LineToVector(line string) ([]float32, error) {
|
||||
r.resetIdleTimer()
|
||||
return r.embedder.Embed(line)
|
||||
}
|
||||
|
||||
func (r *RAG) SearchEmb(emb *models.EmbeddingResp) ([]models.VectorRow, error) {
|
||||
return r.storage.SearchClosest(emb.Embedding)
|
||||
func (r *RAG) searchEmb(emb *models.EmbeddingResp, limit int) ([]models.VectorRow, error) {
|
||||
r.resetIdleTimer()
|
||||
return r.storage.SearchClosest(emb.Embedding, limit)
|
||||
}
|
||||
|
||||
func (r *RAG) searchKeyword(query string, limit int) ([]models.VectorRow, error) {
|
||||
r.resetIdleTimer()
|
||||
sanitized := sanitizeFTSQuery(query)
|
||||
return r.storage.SearchKeyword(sanitized, limit)
|
||||
}
|
||||
|
||||
func (r *RAG) ListLoaded() ([]string, error) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
return r.storage.ListFiles()
|
||||
}
|
||||
|
||||
func (r *RAG) RemoveFile(filename string) error {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.resetIdleTimer()
|
||||
return r.storage.RemoveEmbByFileName(filename)
|
||||
}
|
||||
|
||||
@@ -211,10 +562,13 @@ func (r *RAG) RefineQuery(query string) string {
|
||||
return original
|
||||
}
|
||||
query = strings.ToLower(query)
|
||||
for _, stopWord := range stopWords {
|
||||
wordPattern := `\b` + stopWord + `\b`
|
||||
re := regexp.MustCompile(wordPattern)
|
||||
query = re.ReplaceAllString(query, "")
|
||||
words := strings.Fields(query)
|
||||
if len(words) >= 3 {
|
||||
for _, stopWord := range stopWords {
|
||||
wordPattern := `\b` + stopWord + `\b`
|
||||
re := regexp.MustCompile(wordPattern)
|
||||
query = re.ReplaceAllString(query, "")
|
||||
}
|
||||
}
|
||||
query = strings.TrimSpace(query)
|
||||
if len(query) < 5 {
|
||||
@@ -246,7 +600,7 @@ func (r *RAG) extractImportantPhrases(query string) string {
|
||||
break
|
||||
}
|
||||
}
|
||||
if isImportant || len(word) > 3 {
|
||||
if isImportant || len(word) >= 3 {
|
||||
important = append(important, word)
|
||||
}
|
||||
}
|
||||
@@ -265,6 +619,36 @@ func (r *RAG) GenerateQueryVariations(query string) []string {
|
||||
if len(parts) == 0 {
|
||||
return variations
|
||||
}
|
||||
// Get loaded filenames to filter out filename terms
|
||||
filenames, err := r.storage.ListFiles()
|
||||
if err == nil && len(filenames) > 0 {
|
||||
// Convert to lowercase for case-insensitive matching
|
||||
lowerFilenames := make([]string, len(filenames))
|
||||
for i, f := range filenames {
|
||||
lowerFilenames[i] = strings.ToLower(f)
|
||||
}
|
||||
filteredParts := make([]string, 0, len(parts))
|
||||
for _, part := range parts {
|
||||
partLower := strings.ToLower(part)
|
||||
skip := false
|
||||
for _, fn := range lowerFilenames {
|
||||
if strings.Contains(fn, partLower) || strings.Contains(partLower, fn) {
|
||||
skip = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !skip {
|
||||
filteredParts = append(filteredParts, part)
|
||||
}
|
||||
}
|
||||
// If filteredParts not empty and different from original, add filtered query
|
||||
if len(filteredParts) > 0 && len(filteredParts) != len(parts) {
|
||||
filteredQuery := strings.Join(filteredParts, " ")
|
||||
if len(filteredQuery) >= 5 {
|
||||
variations = append(variations, filteredQuery)
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(parts) >= 2 {
|
||||
trimmed := strings.Join(parts[:len(parts)-1], " ")
|
||||
if len(trimmed) >= 5 {
|
||||
@@ -328,9 +712,14 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
|
||||
})
|
||||
unique := make([]models.VectorRow, 0)
|
||||
seen := make(map[string]bool)
|
||||
fileCounts := make(map[string]int)
|
||||
for i := range scored {
|
||||
if !seen[scored[i].row.Slug] {
|
||||
if fileCounts[scored[i].row.FileName] >= 2 {
|
||||
continue
|
||||
}
|
||||
seen[scored[i].row.Slug] = true
|
||||
fileCounts[scored[i].row.FileName]++
|
||||
unique = append(unique, scored[i].row)
|
||||
}
|
||||
}
|
||||
@@ -341,6 +730,9 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
|
||||
}
|
||||
|
||||
func (r *RAG) SynthesizeAnswer(results []models.VectorRow, query string) (string, error) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
r.resetIdleTimer()
|
||||
if len(results) == 0 {
|
||||
return "No relevant information found in the vector database.", nil
|
||||
}
|
||||
@@ -369,7 +761,7 @@ func (r *RAG) SynthesizeAnswer(results []models.VectorRow, query string) (string
|
||||
Embedding: emb,
|
||||
Index: 0,
|
||||
}
|
||||
topResults, err := r.SearchEmb(embResp)
|
||||
topResults, err := r.searchEmb(embResp, 1)
|
||||
if err != nil {
|
||||
r.logger.Error("failed to search for synthesis context", "error", err)
|
||||
return "", err
|
||||
@@ -396,9 +788,14 @@ func truncateString(s string, maxLen int) string {
|
||||
}
|
||||
|
||||
func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
r.resetIdleTimer()
|
||||
refined := r.RefineQuery(query)
|
||||
variations := r.GenerateQueryVariations(refined)
|
||||
allResults := make([]models.VectorRow, 0)
|
||||
|
||||
// Collect embedding search results from all variations
|
||||
var embResults []models.VectorRow
|
||||
seen := make(map[string]bool)
|
||||
for _, q := range variations {
|
||||
emb, err := r.LineToVector(q)
|
||||
@@ -406,29 +803,78 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
|
||||
r.logger.Error("failed to embed query variation", "error", err, "query", q)
|
||||
continue
|
||||
}
|
||||
|
||||
embResp := &models.EmbeddingResp{
|
||||
Embedding: emb,
|
||||
Index: 0,
|
||||
}
|
||||
|
||||
results, err := r.SearchEmb(embResp)
|
||||
results, err := r.searchEmb(embResp, limit*2) // Get more candidates
|
||||
if err != nil {
|
||||
r.logger.Error("failed to search embeddings", "error", err, "query", q)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, row := range results {
|
||||
if !seen[row.Slug] {
|
||||
seen[row.Slug] = true
|
||||
allResults = append(allResults, row)
|
||||
embResults = append(embResults, row)
|
||||
}
|
||||
}
|
||||
}
|
||||
reranked := r.RerankResults(allResults, query)
|
||||
if len(reranked) > limit {
|
||||
reranked = reranked[:limit]
|
||||
// Sort embedding results by distance (lower is better)
|
||||
sort.Slice(embResults, func(i, j int) bool {
|
||||
return embResults[i].Distance < embResults[j].Distance
|
||||
})
|
||||
|
||||
// Perform keyword search
|
||||
kwResults, err := r.searchKeyword(refined, limit*2)
|
||||
if err != nil {
|
||||
r.logger.Warn("keyword search failed, using only embeddings", "error", err)
|
||||
kwResults = nil
|
||||
}
|
||||
// Sort keyword results by distance (already sorted by BM25 score)
|
||||
// kwResults already sorted by distance (lower is better)
|
||||
|
||||
// Combine using Reciprocal Rank Fusion (RRF)
|
||||
const rrfK = 60
|
||||
type scoredRow struct {
|
||||
row models.VectorRow
|
||||
score float64
|
||||
}
|
||||
scoreMap := make(map[string]float64)
|
||||
// Add embedding results
|
||||
for rank, row := range embResults {
|
||||
score := 1.0 / (float64(rank) + rrfK)
|
||||
scoreMap[row.Slug] += score
|
||||
}
|
||||
// Add keyword results
|
||||
for rank, row := range kwResults {
|
||||
score := 1.0 / (float64(rank) + rrfK)
|
||||
scoreMap[row.Slug] += score
|
||||
// Ensure row exists in combined results
|
||||
if _, exists := seen[row.Slug]; !exists {
|
||||
embResults = append(embResults, row)
|
||||
}
|
||||
}
|
||||
// Create slice of scored rows
|
||||
scoredRows := make([]scoredRow, 0, len(embResults))
|
||||
for _, row := range embResults {
|
||||
score := scoreMap[row.Slug]
|
||||
scoredRows = append(scoredRows, scoredRow{row: row, score: score})
|
||||
}
|
||||
// Sort by descending RRF score
|
||||
sort.Slice(scoredRows, func(i, j int) bool {
|
||||
return scoredRows[i].score > scoredRows[j].score
|
||||
})
|
||||
// Take top limit
|
||||
if len(scoredRows) > limit {
|
||||
scoredRows = scoredRows[:limit]
|
||||
}
|
||||
// Convert back to VectorRow
|
||||
finalResults := make([]models.VectorRow, len(scoredRows))
|
||||
for i, sr := range scoredRows {
|
||||
finalResults[i] = sr.row
|
||||
}
|
||||
// Apply reranking heuristics
|
||||
reranked := r.RerankResults(finalResults, query)
|
||||
return reranked, nil
|
||||
}
|
||||
|
||||
@@ -437,16 +883,58 @@ var (
|
||||
ragOnce sync.Once
|
||||
)
|
||||
|
||||
func (r *RAG) FallbackMessage() string {
|
||||
return r.fallbackMsg
|
||||
}
|
||||
|
||||
func Init(c *config.Config, l *slog.Logger, s storage.FullRepo) error {
|
||||
var err error
|
||||
ragOnce.Do(func() {
|
||||
if c == nil || l == nil || s == nil {
|
||||
return
|
||||
}
|
||||
ragInstance = New(l, s, c)
|
||||
ragInstance, err = New(l, s, c)
|
||||
})
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
|
||||
func GetInstance() *RAG {
|
||||
return ragInstance
|
||||
}
|
||||
|
||||
func (r *RAG) resetIdleTimer() {
|
||||
r.idleMu.Lock()
|
||||
defer r.idleMu.Unlock()
|
||||
if r.idleTimer != nil {
|
||||
r.idleTimer.Stop()
|
||||
}
|
||||
r.idleTimer = time.AfterFunc(r.idleTimeout, func() {
|
||||
r.freeONNXMemory()
|
||||
})
|
||||
}
|
||||
|
||||
func (r *RAG) freeONNXMemory() {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if onnx, ok := r.embedder.(*ONNXEmbedder); ok {
|
||||
if err := onnx.Destroy(); err != nil {
|
||||
r.logger.Error("failed to free ONNX memory", "error", err)
|
||||
} else {
|
||||
r.logger.Info("freed ONNX VRAM after idle timeout")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *RAG) Destroy() {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if r.idleTimer != nil {
|
||||
r.idleTimer.Stop()
|
||||
r.idleTimer = nil
|
||||
}
|
||||
if onnx, ok := r.embedder.(*ONNXEmbedder); ok {
|
||||
if err := onnx.Destroy(); err != nil {
|
||||
r.logger.Error("failed to destroy ONNX embedder", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
232
rag/storage.go
232
rag/storage.go
@@ -1,6 +1,7 @@
|
||||
package rag
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"gf-lt/models"
|
||||
@@ -62,6 +63,17 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
embeddingSize := len(row.Embeddings)
|
||||
// Start transaction
|
||||
tx, err := vs.sqlxDB.Beginx()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
||||
// Serialize the embeddings to binary
|
||||
serializedEmbeddings := SerializeVector(row.Embeddings)
|
||||
@@ -69,10 +81,102 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
|
||||
"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES (?, ?, ?, ?)",
|
||||
tableName,
|
||||
)
|
||||
if _, err := vs.sqlxDB.Exec(query, serializedEmbeddings, row.Slug, row.RawText, row.FileName); err != nil {
|
||||
if _, err := tx.Exec(query, serializedEmbeddings, row.Slug, row.RawText, row.FileName); err != nil {
|
||||
vs.logger.Error("failed to write vector", "error", err, "slug", row.Slug)
|
||||
return err
|
||||
}
|
||||
// Insert into FTS table
|
||||
ftsQuery := `INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES (?, ?, ?, ?)`
|
||||
if _, err := tx.Exec(ftsQuery, row.Slug, row.RawText, row.FileName, embeddingSize); err != nil {
|
||||
vs.logger.Error("failed to write to FTS table", "error", err, "slug", row.Slug)
|
||||
return err
|
||||
}
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
vs.logger.Error("failed to commit transaction", "error", err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteVectors stores multiple embedding vectors in a single transaction
|
||||
func (vs *VectorStorage) WriteVectors(rows []*models.VectorRow) error {
|
||||
if len(rows) == 0 {
|
||||
return nil
|
||||
}
|
||||
// SQLite has limit of 999 parameters per statement, each row uses 4 parameters
|
||||
const maxBatchSize = 200 // 200 * 4 = 800 < 999
|
||||
if len(rows) > maxBatchSize {
|
||||
// Process in chunks
|
||||
for i := 0; i < len(rows); i += maxBatchSize {
|
||||
end := i + maxBatchSize
|
||||
if end > len(rows) {
|
||||
end = len(rows)
|
||||
}
|
||||
if err := vs.WriteVectors(rows[i:end]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// All rows should have same embedding size (same model)
|
||||
firstSize := len(rows[0].Embeddings)
|
||||
for i, row := range rows {
|
||||
if len(row.Embeddings) != firstSize {
|
||||
return fmt.Errorf("embedding size mismatch: row %d has size %d, expected %d", i, len(row.Embeddings), firstSize)
|
||||
}
|
||||
}
|
||||
tableName, err := vs.getTableName(rows[0].Embeddings)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Start transaction
|
||||
tx, err := vs.sqlxDB.Beginx()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
||||
// Build batch insert for embeddings table
|
||||
embeddingPlaceholders := make([]string, 0, len(rows))
|
||||
embeddingArgs := make([]any, 0, len(rows)*4)
|
||||
for _, row := range rows {
|
||||
embeddingPlaceholders = append(embeddingPlaceholders, "(?, ?, ?, ?)")
|
||||
embeddingArgs = append(embeddingArgs, SerializeVector(row.Embeddings), row.Slug, row.RawText, row.FileName)
|
||||
}
|
||||
embeddingQuery := fmt.Sprintf(
|
||||
"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES %s",
|
||||
tableName,
|
||||
strings.Join(embeddingPlaceholders, ", "),
|
||||
)
|
||||
if _, err := tx.Exec(embeddingQuery, embeddingArgs...); err != nil {
|
||||
vs.logger.Error("failed to write vectors batch", "error", err, "batch_size", len(rows))
|
||||
return err
|
||||
}
|
||||
// Build batch insert for FTS table
|
||||
ftsPlaceholders := make([]string, 0, len(rows))
|
||||
ftsArgs := make([]any, 0, len(rows)*4)
|
||||
embeddingSize := len(rows[0].Embeddings)
|
||||
for _, row := range rows {
|
||||
ftsPlaceholders = append(ftsPlaceholders, "(?, ?, ?, ?)")
|
||||
ftsArgs = append(ftsArgs, row.Slug, row.RawText, row.FileName, embeddingSize)
|
||||
}
|
||||
ftsQuery := "INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES " +
|
||||
strings.Join(ftsPlaceholders, ", ")
|
||||
if _, err := tx.Exec(ftsQuery, ftsArgs...); err != nil {
|
||||
vs.logger.Error("failed to write FTS batch", "error", err, "batch_size", len(rows))
|
||||
return err
|
||||
}
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
vs.logger.Error("failed to commit transaction", "error", err)
|
||||
return err
|
||||
}
|
||||
vs.logger.Debug("wrote vectors batch", "batch_size", len(rows))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -98,30 +202,25 @@ func (vs *VectorStorage) getTableName(emb []float32) (string, error) {
|
||||
}
|
||||
|
||||
// SearchClosest finds vectors closest to the query vector using efficient cosine similarity calculation
|
||||
func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, error) {
|
||||
func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.VectorRow, error) {
|
||||
if limit <= 0 {
|
||||
limit = 10
|
||||
}
|
||||
tableName, err := vs.getTableName(query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// For better performance, instead of loading all vectors at once,
|
||||
// we'll implement batching and potentially add L2 distance-based pre-filtering
|
||||
// since cosine similarity is related to L2 distance for normalized vectors
|
||||
|
||||
querySQL := "SELECT embeddings, slug, raw_text, filename FROM " + tableName
|
||||
rows, err := vs.sqlxDB.Query(querySQL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
// Use a min-heap or simple slice to keep track of top 3 closest vectors
|
||||
type SearchResult struct {
|
||||
vector models.VectorRow
|
||||
distance float32
|
||||
}
|
||||
var topResults []SearchResult
|
||||
// Process vectors one by one to avoid loading everything into memory
|
||||
for rows.Next() {
|
||||
var (
|
||||
embeddingsBlob []byte
|
||||
@@ -132,12 +231,9 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
|
||||
vs.logger.Error("failed to scan row", "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
storedEmbeddings := DeserializeVector(embeddingsBlob)
|
||||
|
||||
// Calculate cosine similarity (returns value between -1 and 1, where 1 is most similar)
|
||||
similarity := cosineSimilarity(query, storedEmbeddings)
|
||||
distance := 1 - similarity // Convert to distance where 0 is most similar
|
||||
distance := 1 - similarity
|
||||
|
||||
result := SearchResult{
|
||||
vector: models.VectorRow{
|
||||
@@ -149,20 +245,14 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
|
||||
distance: distance,
|
||||
}
|
||||
|
||||
// Add to top results and maintain only top 3
|
||||
topResults = append(topResults, result)
|
||||
|
||||
// Sort and keep only top 3
|
||||
sort.Slice(topResults, func(i, j int) bool {
|
||||
return topResults[i].distance < topResults[j].distance
|
||||
})
|
||||
|
||||
if len(topResults) > 3 {
|
||||
topResults = topResults[:3] // Keep only closest 3
|
||||
if len(topResults) > limit {
|
||||
topResults = topResults[:limit]
|
||||
}
|
||||
}
|
||||
|
||||
// Convert back to VectorRow slice
|
||||
results := make([]models.VectorRow, 0, len(topResults))
|
||||
for _, result := range topResults {
|
||||
result.vector.Distance = result.distance
|
||||
@@ -171,6 +261,100 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// GetVectorBySlug retrieves a vector row by its slug
|
||||
func (vs *VectorStorage) GetVectorBySlug(slug string) (*models.VectorRow, error) {
|
||||
embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
|
||||
for _, size := range embeddingSizes {
|
||||
table := fmt.Sprintf("embeddings_%d", size)
|
||||
query := fmt.Sprintf("SELECT embeddings, slug, raw_text, filename FROM %s WHERE slug = ?", table)
|
||||
row := vs.sqlxDB.QueryRow(query, slug)
|
||||
var (
|
||||
embeddingsBlob []byte
|
||||
retrievedSlug, rawText, fileName string
|
||||
)
|
||||
if err := row.Scan(&embeddingsBlob, &retrievedSlug, &rawText, &fileName); err != nil {
|
||||
// No row in this table, continue to next size
|
||||
continue
|
||||
}
|
||||
storedEmbeddings := DeserializeVector(embeddingsBlob)
|
||||
return &models.VectorRow{
|
||||
Embeddings: storedEmbeddings,
|
||||
Slug: retrievedSlug,
|
||||
RawText: rawText,
|
||||
FileName: fileName,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("vector with slug %s not found", slug)
|
||||
}
|
||||
|
||||
// SearchKeyword performs full-text search using FTS5
|
||||
func (vs *VectorStorage) SearchKeyword(query string, limit int) ([]models.VectorRow, error) {
|
||||
// Use FTS5 bm25 ranking. bm25 returns negative values where more negative is better.
|
||||
// We'll order by bm25 (ascending) and limit.
|
||||
ftsQuery := `SELECT slug, raw_text, filename, bm25(fts_embeddings) as score
|
||||
FROM fts_embeddings
|
||||
WHERE fts_embeddings MATCH ?
|
||||
ORDER BY score
|
||||
LIMIT ?`
|
||||
|
||||
// Try original query first
|
||||
rows, err := vs.sqlxDB.Query(ftsQuery, query, limit)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("FTS search failed: %w", err)
|
||||
}
|
||||
results, err := vs.scanRows(rows)
|
||||
rows.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If no results and query contains multiple terms, try OR fallback
|
||||
if len(results) == 0 && strings.Contains(query, " ") && !strings.Contains(strings.ToUpper(query), " OR ") {
|
||||
// Build OR query: term1 OR term2 OR term3
|
||||
terms := strings.Fields(query)
|
||||
if len(terms) > 1 {
|
||||
orQuery := strings.Join(terms, " OR ")
|
||||
rows, err := vs.sqlxDB.Query(ftsQuery, orQuery, limit)
|
||||
if err != nil {
|
||||
// Return original empty results rather than error
|
||||
return results, nil
|
||||
}
|
||||
orResults, err := vs.scanRows(rows)
|
||||
rows.Close()
|
||||
if err == nil {
|
||||
results = orResults
|
||||
}
|
||||
}
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// scanRows converts SQL rows to VectorRow slice
|
||||
func (vs *VectorStorage) scanRows(rows *sql.Rows) ([]models.VectorRow, error) {
|
||||
var results []models.VectorRow
|
||||
for rows.Next() {
|
||||
var slug, rawText, fileName string
|
||||
var score float64
|
||||
if err := rows.Scan(&slug, &rawText, &fileName, &score); err != nil {
|
||||
vs.logger.Error("failed to scan FTS row", "error", err)
|
||||
continue
|
||||
}
|
||||
// Convert BM25 score to distance-like metric (lower is better)
|
||||
// BM25 is negative, more negative is better. We'll normalize to positive distance.
|
||||
distance := float32(-score) // Make positive (since score is negative)
|
||||
if distance < 0 {
|
||||
distance = 0
|
||||
}
|
||||
results = append(results, models.VectorRow{
|
||||
Slug: slug,
|
||||
RawText: rawText,
|
||||
FileName: fileName,
|
||||
Distance: distance,
|
||||
})
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// ListFiles returns a list of all loaded files
|
||||
func (vs *VectorStorage) ListFiles() ([]string, error) {
|
||||
fileLists := make([][]string, 0)
|
||||
@@ -215,6 +399,10 @@ func (vs *VectorStorage) ListFiles() ([]string, error) {
|
||||
// RemoveEmbByFileName removes all embeddings associated with a specific filename
|
||||
func (vs *VectorStorage) RemoveEmbByFileName(filename string) error {
|
||||
var errors []string
|
||||
// Delete from FTS table first
|
||||
if _, err := vs.sqlxDB.Exec("DELETE FROM fts_embeddings WHERE filename = ?", filename); err != nil {
|
||||
errors = append(errors, err.Error())
|
||||
}
|
||||
embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
|
||||
for _, size := range embeddingSizes {
|
||||
table := fmt.Sprintf("embeddings_%d", size)
|
||||
|
||||
2
storage/migrations/003_add_fts.down.sql
Normal file
2
storage/migrations/003_add_fts.down.sql
Normal file
@@ -0,0 +1,2 @@
|
||||
-- Drop FTS5 virtual table
|
||||
DROP TABLE IF EXISTS fts_embeddings;
|
||||
15
storage/migrations/003_add_fts.up.sql
Normal file
15
storage/migrations/003_add_fts.up.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
-- Create FTS5 virtual table for full-text search
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS fts_embeddings USING fts5(
|
||||
slug UNINDEXED,
|
||||
raw_text,
|
||||
filename UNINDEXED,
|
||||
embedding_size UNINDEXED,
|
||||
tokenize='porter unicode61' -- Use porter stemmer and unicode61 tokenizer
|
||||
);
|
||||
|
||||
-- Create triggers to maintain FTS table when embeddings are inserted/deleted
|
||||
-- Note: We'll handle inserts/deletes programmatically for simplicity
|
||||
-- but triggers could be added here if needed.
|
||||
|
||||
-- Indexes for performance (FTS5 manages its own indexes)
|
||||
-- No additional indexes needed for FTS5 virtual table.
|
||||
2
storage/migrations/004_populate_fts.down.sql
Normal file
2
storage/migrations/004_populate_fts.down.sql
Normal file
@@ -0,0 +1,2 @@
|
||||
-- Clear FTS table (optional)
|
||||
DELETE FROM fts_embeddings;
|
||||
26
storage/migrations/004_populate_fts.up.sql
Normal file
26
storage/migrations/004_populate_fts.up.sql
Normal file
@@ -0,0 +1,26 @@
|
||||
-- Populate FTS table with existing embeddings
|
||||
DELETE FROM fts_embeddings;
|
||||
|
||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||
SELECT slug, raw_text, filename, 384 FROM embeddings_384;
|
||||
|
||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||
SELECT slug, raw_text, filename, 768 FROM embeddings_768;
|
||||
|
||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||
SELECT slug, raw_text, filename, 1024 FROM embeddings_1024;
|
||||
|
||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||
SELECT slug, raw_text, filename, 1536 FROM embeddings_1536;
|
||||
|
||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||
SELECT slug, raw_text, filename, 2048 FROM embeddings_2048;
|
||||
|
||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||
SELECT slug, raw_text, filename, 3072 FROM embeddings_3072;
|
||||
|
||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||
SELECT slug, raw_text, filename, 4096 FROM embeddings_4096;
|
||||
|
||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||
SELECT slug, raw_text, filename, 5120 FROM embeddings_5120;
|
||||
@@ -102,6 +102,22 @@ func NewProviderSQL(dbPath string, logger *slog.Logger) FullRepo {
|
||||
logger.Error("failed to open db connection", "error", err)
|
||||
return nil
|
||||
}
|
||||
// Enable WAL mode for better concurrency and performance
|
||||
if _, err := db.Exec("PRAGMA journal_mode = WAL;"); err != nil {
|
||||
logger.Warn("failed to enable WAL mode", "error", err)
|
||||
}
|
||||
if _, err := db.Exec("PRAGMA synchronous = NORMAL;"); err != nil {
|
||||
logger.Warn("failed to set synchronous mode", "error", err)
|
||||
}
|
||||
// Increase cache size for better performance
|
||||
if _, err := db.Exec("PRAGMA cache_size = -2000;"); err != nil {
|
||||
logger.Warn("failed to set cache size", "error", err)
|
||||
}
|
||||
// Log actual journal mode for debugging
|
||||
var journalMode string
|
||||
if err := db.QueryRow("PRAGMA journal_mode;").Scan(&journalMode); err == nil {
|
||||
logger.Debug("SQLite journal mode", "mode", journalMode)
|
||||
}
|
||||
p := ProviderSQL{db: db, logger: logger}
|
||||
if err := p.Migrate(); err != nil {
|
||||
logger.Error("migration failed, app cannot start", "error", err)
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"gf-lt/models"
|
||||
"sort"
|
||||
"unsafe"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
@@ -11,7 +12,7 @@ import (
|
||||
|
||||
type VectorRepo interface {
|
||||
WriteVector(*models.VectorRow) error
|
||||
SearchClosest(q []float32) ([]models.VectorRow, error)
|
||||
SearchClosest(q []float32, limit int) ([]models.VectorRow, error)
|
||||
ListFiles() ([]string, error)
|
||||
RemoveEmbByFileName(filename string) error
|
||||
DB() *sqlx.DB
|
||||
@@ -79,7 +80,7 @@ func (p ProviderSQL) WriteVector(row *models.VectorRow) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
|
||||
func (p ProviderSQL) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
|
||||
tableName, err := fetchTableName(q)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -94,7 +95,7 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
|
||||
vector models.VectorRow
|
||||
distance float32
|
||||
}
|
||||
var topResults []SearchResult
|
||||
var allResults []SearchResult
|
||||
for rows.Next() {
|
||||
var (
|
||||
embeddingsBlob []byte
|
||||
@@ -119,28 +120,19 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
|
||||
},
|
||||
distance: distance,
|
||||
}
|
||||
|
||||
// Add to top results and maintain only top results
|
||||
topResults = append(topResults, result)
|
||||
|
||||
// Sort and keep only top results
|
||||
// We'll keep the top 3 closest vectors
|
||||
if len(topResults) > 3 {
|
||||
// Simple sort and truncate to maintain only 3 best matches
|
||||
for i := 0; i < len(topResults); i++ {
|
||||
for j := i + 1; j < len(topResults); j++ {
|
||||
if topResults[i].distance > topResults[j].distance {
|
||||
topResults[i], topResults[j] = topResults[j], topResults[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
topResults = topResults[:3]
|
||||
}
|
||||
allResults = append(allResults, result)
|
||||
}
|
||||
// Sort by distance
|
||||
sort.Slice(allResults, func(i, j int) bool {
|
||||
return allResults[i].distance < allResults[j].distance
|
||||
})
|
||||
// Truncate to limit
|
||||
if len(allResults) > limit {
|
||||
allResults = allResults[:limit]
|
||||
}
|
||||
|
||||
// Convert back to VectorRow slice
|
||||
results := make([]models.VectorRow, len(topResults))
|
||||
for i, result := range topResults {
|
||||
results := make([]models.VectorRow, len(allResults))
|
||||
for i, result := range allResults {
|
||||
result.vector.Distance = result.distance
|
||||
results[i] = result.vector
|
||||
}
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"sys_prompt": "A game of cluedo. Players are {{user}}, {{char}}, {{char2}};\n\nrooms: hall, lounge, dinning room kitchen, ballroom, conservatory, billiard room, library, study;\nweapons: candlestick, dagger, lead pipe, revolver, rope, spanner;\npeople: miss Scarlett, colonel Mustard, mrs. White, reverend Green, mrs. Peacock, professor Plum;\n\nA murder happened in a mansion with 9 rooms. Victim is dr. Black.\nPlayers goal is to find out who commited a murder, in what room and with what weapon.\nWeapons, people and rooms not involved in murder are distributed between players (as cards) by tool agent.\nThe objective of the game is to deduce the details of the murder. There are six characters, six murder weapons, and nine rooms, leaving the players with 324 possibilities. As soon as a player enters a room, they may make a suggestion as to the details, naming a suspect, the room they are in, and the weapon. For example: \"I suspect Professor Plum, in the Dining Room, with the candlestick\".\nOnce a player makes a suggestion, the others are called upon to disprove it.\nBefore the player's move, tool agent will remind that players their cards. There are two types of moves: making a suggestion (suggestion_move) and disproving other player suggestion (evidence_move);\nIn this version player wins when the correct details are named in the suggestion_move.\n\n<example_game>\n{{user}}:\nlet's start a game of cluedo!\ntool: cards of {{char}} are 'LEAD PIPE', 'BALLROOM', 'CONSERVATORY', 'STUDY', 'Mrs. White'; suggestion_move;\n{{char}}:\n(putting miss Scarlet into the Hall with the Revolver) \"I suspect miss Scarlett, in the Hall, with the revolver.\"\ntool: cards of {{char2}} are 'SPANNER', 'DAGGER', 'Professor Plum', 'LIBRARY', 'Mrs. Peacock'; evidence_move;\n{{char2}}:\n\"No objections.\" (no cards matching the suspicion of {{char}})\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; evidence_move;\n{{user}}:\n\"I object. Miss Scarlett is innocent.\" (shows card with 'Miss Scarlett')\ntool: cards of {{char2}} are 'SPANNER', 'DAGGER', 'Professor Plum', 'LIBRARY', 'Mrs. Peacock'; suggestion_move;\n{{char2}}:\n*So it was not Miss Scarlett, good to know.*\n(moves Mrs. White to the Billiard Room) \"It might have been Mrs. White, in the Billiard Room, with the Revolver.\"\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; evidence_move;\n{{user}}:\n(no matching cards for the assumption of {{char2}}) \"Sounds possible to me.\"\ntool: cards of {{char}} are 'LEAD PIPE', 'BALLROOM', 'CONSERVATORY', 'STUDY', 'Mrs. White'; evidence_move;\n{{char}}:\n(shows Mrs. White card) \"No. Was not Mrs. White\"\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; suggestion_move;\n{{user}}:\n*So not Mrs. White...* (moves Reverend Green into the Billiard Room) \"I suspect Reverend Green, in the Billiard Room, with the Revolver.\"\ntool: Correct. It was Reverend Green in the Billiard Room, with the revolver. {{user}} wins.\n</example_game>",
|
||||
"role": "CluedoPlayer",
|
||||
"role2": "CluedoEnjoyer",
|
||||
"filepath": "sysprompts/cluedo.json",
|
||||
"first_msg": "Hey guys! Want to play cluedo?"
|
||||
}
|
||||
118
tui.go
118
tui.go
@@ -29,6 +29,8 @@ var (
|
||||
statusLineWidget *tview.TextView
|
||||
helpView *tview.TextView
|
||||
flex *tview.Flex
|
||||
bottomFlex *tview.Flex
|
||||
notificationWidget *tview.TextView
|
||||
imgView *tview.Image
|
||||
defaultImage = "sysprompts/llama.png"
|
||||
indexPickWindow *tview.InputField
|
||||
@@ -36,6 +38,7 @@ var (
|
||||
roleEditWindow *tview.InputField
|
||||
shellInput *tview.InputField
|
||||
confirmModal *tview.Modal
|
||||
toastTimer *time.Timer
|
||||
confirmPageName = "confirm"
|
||||
fullscreenMode bool
|
||||
positionVisible bool = true
|
||||
@@ -137,8 +140,8 @@ func setShellMode(enabled bool) {
|
||||
}()
|
||||
}
|
||||
|
||||
// showToast displays a temporary message in the top‑right corner.
|
||||
// It auto‑hides after 3 seconds and disappears when clicked.
|
||||
// showToast displays a temporary notification in the bottom-right corner.
|
||||
// It auto-hides after 3 seconds.
|
||||
func showToast(title, message string) {
|
||||
sanitize := func(s string, maxLen int) string {
|
||||
sanitized := strings.Map(func(r rune) rune {
|
||||
@@ -154,33 +157,68 @@ func showToast(title, message string) {
|
||||
}
|
||||
title = sanitize(title, 50)
|
||||
message = sanitize(message, 197)
|
||||
notification := tview.NewTextView().
|
||||
SetTextAlign(tview.AlignCenter).
|
||||
SetDynamicColors(true).
|
||||
SetRegions(true).
|
||||
SetText(fmt.Sprintf("[yellow]%s[-]\n", message)).
|
||||
SetChangedFunc(func() {
|
||||
app.Draw()
|
||||
if toastTimer != nil {
|
||||
toastTimer.Stop()
|
||||
}
|
||||
// show blocking notification to not mess up flex
|
||||
if fullscreenMode {
|
||||
notification := tview.NewTextView().
|
||||
SetTextAlign(tview.AlignCenter).
|
||||
SetDynamicColors(true).
|
||||
SetRegions(true).
|
||||
SetText(fmt.Sprintf("[yellow]%s[-]\n", message)).
|
||||
SetChangedFunc(func() {
|
||||
app.Draw()
|
||||
})
|
||||
notification.SetTitleAlign(tview.AlignLeft).
|
||||
SetBorder(true).
|
||||
SetTitle(title)
|
||||
// Wrap it in a full‑screen Flex to position it in the top‑right corner.
|
||||
// Outer Flex (row) pushes content to the top; inner Flex (column) pushes to the right.
|
||||
background := tview.NewFlex().SetDirection(tview.FlexRow).
|
||||
AddItem(nil, 0, 1, false). // top spacer
|
||||
AddItem(tview.NewFlex().SetDirection(tview.FlexColumn).
|
||||
AddItem(nil, 0, 1, false). // left spacer
|
||||
AddItem(notification, 40, 1, true), // notification width 40
|
||||
5, 1, false) // notification height 5
|
||||
// Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
|
||||
pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
|
||||
pages.AddPage(pageName, background, true, true)
|
||||
// Auto‑dismiss after 2 seconds, since blocking is more annoying
|
||||
time.AfterFunc(2*time.Second, func() {
|
||||
app.QueueUpdateDraw(func() {
|
||||
if pages.HasPage(pageName) {
|
||||
pages.RemovePage(pageName)
|
||||
}
|
||||
})
|
||||
})
|
||||
notification.SetTitleAlign(tview.AlignLeft).
|
||||
SetBorder(true).
|
||||
SetTitle(title)
|
||||
// Wrap it in a full‑screen Flex to position it in the top‑right corner.
|
||||
// Outer Flex (row) pushes content to the top; inner Flex (column) pushes to the right.
|
||||
background := tview.NewFlex().SetDirection(tview.FlexRow).
|
||||
AddItem(nil, 0, 1, false). // top spacer
|
||||
AddItem(tview.NewFlex().SetDirection(tview.FlexColumn).
|
||||
AddItem(nil, 0, 1, false). // left spacer
|
||||
AddItem(notification, 40, 1, true), // notification width 40
|
||||
5, 1, false) // notification height 5
|
||||
// Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
|
||||
pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
|
||||
pages.AddPage(pageName, background, true, true)
|
||||
// Auto‑dismiss after 3 seconds.
|
||||
time.AfterFunc(3*time.Second, func() {
|
||||
return
|
||||
}
|
||||
notificationWidget.SetTitle(title)
|
||||
notificationWidget.SetText(fmt.Sprintf("[yellow]%s[-]", message))
|
||||
go func() {
|
||||
app.QueueUpdateDraw(func() {
|
||||
if pages.HasPage(pageName) {
|
||||
pages.RemovePage(pageName)
|
||||
flex.RemoveItem(bottomFlex)
|
||||
flex.RemoveItem(statusLineWidget)
|
||||
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
|
||||
AddItem(textArea, 0, 1, true).
|
||||
AddItem(notificationWidget, 40, 1, false)
|
||||
flex.AddItem(bottomFlex, 0, 10, true)
|
||||
if positionVisible {
|
||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
||||
}
|
||||
})
|
||||
}()
|
||||
toastTimer = time.AfterFunc(3*time.Second, func() {
|
||||
app.QueueUpdateDraw(func() {
|
||||
flex.RemoveItem(bottomFlex)
|
||||
flex.RemoveItem(statusLineWidget)
|
||||
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
|
||||
AddItem(textArea, 0, 1, true).
|
||||
AddItem(notificationWidget, 0, 0, false)
|
||||
flex.AddItem(bottomFlex, 0, 10, true)
|
||||
if positionVisible {
|
||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
||||
}
|
||||
})
|
||||
})
|
||||
@@ -235,7 +273,7 @@ func init() {
|
||||
shellHistoryPos = -1
|
||||
}
|
||||
// Handle Tab key for @ file completion
|
||||
if event.Key() == tcell.KeyTab {
|
||||
if event.Key() == tcell.KeyTab && shellMode {
|
||||
currentText := shellInput.GetText()
|
||||
atIndex := strings.LastIndex(currentText, "@")
|
||||
if atIndex >= 0 {
|
||||
@@ -286,12 +324,26 @@ func init() {
|
||||
SetDynamicColors(true).
|
||||
SetRegions(true).
|
||||
SetChangedFunc(func() {
|
||||
// INFO:
|
||||
// https://github.com/rivo/tview/wiki/Concurrency#event-handlers
|
||||
// although already called by default per tview specs
|
||||
// calling it explicitly makes text streaming to look more smooth
|
||||
app.Draw()
|
||||
})
|
||||
notificationWidget = tview.NewTextView().
|
||||
SetTextAlign(tview.AlignCenter).
|
||||
SetDynamicColors(true).
|
||||
SetRegions(true).
|
||||
SetChangedFunc(func() {
|
||||
})
|
||||
notificationWidget.SetBorder(true).SetTitle("notification")
|
||||
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
|
||||
AddItem(textArea, 0, 1, true).
|
||||
AddItem(notificationWidget, 0, 0, false)
|
||||
//
|
||||
flex = tview.NewFlex().SetDirection(tview.FlexRow).
|
||||
AddItem(textView, 0, 40, false).
|
||||
AddItem(textArea, 0, 10, true) // Restore original height
|
||||
AddItem(bottomFlex, 0, 10, true)
|
||||
if positionVisible {
|
||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
||||
}
|
||||
@@ -360,10 +412,14 @@ func init() {
|
||||
// y += h / 2
|
||||
// return x, y, w, h
|
||||
// })
|
||||
notificationWidget.SetDrawFunc(func(screen tcell.Screen, x, y, w, h int) (int, int, int, int) {
|
||||
y += h / 2
|
||||
return x, y, w, h
|
||||
})
|
||||
// Initially set up flex without search bar
|
||||
flex = tview.NewFlex().SetDirection(tview.FlexRow).
|
||||
AddItem(textView, 0, 40, false).
|
||||
AddItem(textArea, 0, 10, true) // Restore original height
|
||||
AddItem(bottomFlex, 0, 10, true)
|
||||
if positionVisible {
|
||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
||||
}
|
||||
@@ -1095,7 +1151,7 @@ func init() {
|
||||
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
|
||||
return nil
|
||||
}
|
||||
if event.Key() == tcell.KeyTab {
|
||||
if event.Key() == tcell.KeyTab && !shellMode {
|
||||
currentF := app.GetFocus()
|
||||
if currentF == textArea {
|
||||
currentText := textArea.GetText()
|
||||
|
||||
Reference in New Issue
Block a user