Merge branch 'master' into feat/agent-flow

This commit is contained in:
Grail Finder
2026-03-09 07:07:36 +03:00
36 changed files with 3354 additions and 1302 deletions

3
.gitignore vendored
View File

@@ -3,6 +3,8 @@
testlog testlog
history/ history/
*.db *.db
*.db-shm
*.db-wal
config.toml config.toml
sysprompts/* sysprompts/*
!sysprompts/alice_bob_carl.json !sysprompts/alice_bob_carl.json
@@ -15,3 +17,4 @@ gflt
chat_exports/*.json chat_exports/*.json
ragimport ragimport
.env .env
onnx/

108
Makefile
View File

@@ -1,4 +1,4 @@
.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve .PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve fetch-onnx install-onnx-deps
run: setconfig run: setconfig
go build -tags extra -o gf-lt && ./gf-lt go build -tags extra -o gf-lt && ./gf-lt
@@ -30,6 +30,105 @@ lint: ## Run linters. Use make install-linters first.
lintall: lint lintall: lint
noblanks ./... noblanks ./...
fetch-onnx:
mkdir -p onnx/embedgemma && curl -o onnx/embedgemma/config.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/config.json && curl -o onnx/embedgemma/tokenizer.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/tokenizer.json && curl -o onnx/embedgemma/model_q4.onnx -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx && curl -o onnx/embedgemma/model_q4.onnx_data -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx_data?download=true
install-onnx-deps: ## Install ONNX Runtime with CUDA support (or CPU fallback)
@echo "=== ONNX Runtime Installer ===" && \
echo "" && \
echo "Checking for existing ONNX Runtime..." && \
if ldconfig -p 2>/dev/null | grep -q libonnxruntime.so.1; then \
echo "ONNX Runtime is already installed:" && \
ldconfig -p 2>/dev/null | grep libonnxruntime && \
echo "" && \
echo "Skipping installation. To reinstall, remove existing libs first:" && \
echo " sudo rm -f /usr/local/lib/libonnxruntime*.so*" && \
exit 0; \
fi && \
echo "No ONNX Runtime found. Proceeding with installation..." && \
echo "" && \
echo "Detecting CUDA version..." && \
HAS_CUDA=0 && \
if command -v nvidia-smi >/dev/null 2>&1; then \
CUDA_INFO=$$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1) && \
if [ -n "$$CUDA_INFO" ]; then \
echo "Found NVIDIA GPU with driver: $$CUDA_INFO" && \
HAS_CUDA=1; \
else \
echo "NVIDIA driver found but could not detect CUDA version"; \
fi; \
else \
echo "No NVIDIA GPU detected (nvidia-smi not found)"; \
fi && \
echo "" && \
echo "Determining ONNX Runtime version..." && \
ARCH=$$(uname -m) && \
if [ "$$ARCH" = "x86_64" ]; then \
ONNX_ARCH="x64"; \
elif [ "$$ARCH" = "aarch64" ] || [ "$$ARCH" = "arm64" ]; then \
ONNX_ARCH="aarch64"; \
else \
echo "Unsupported architecture: $$ARCH" && \
exit 1; \
fi && \
echo "Detected architecture: $$ARCH (ONNX runtime: $$ONNX_ARCH)" && \
if [ "$$HAS_CUDA" = "1" ]; then \
echo "Installing ONNX Runtime with CUDA support..."; \
ONNX_VERSION="1.24.2"; \
else \
echo "Installing ONNX Runtime (CPU version)..."; \
ONNX_VERSION="1.24.2"; \
fi && \
FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \
URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \
echo "Downloading $${URL}..." && \
mkdir -p /tmp/onnx-install && \
curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \
echo "Failed to download ONNX Runtime v$${ONNX_VERSION}. Trying v1.18.0..." && \
ONNX_VERSION="1.18.0" && \
FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \
URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \
curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \
echo "ERROR: Failed to download ONNX Runtime from GitHub" && \
echo "" && \
echo "Please install manually:" && \
echo " 1. Go to https://github.com/microsoft/onnxruntime/releases" && \
echo " 2. Download onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \
echo " 3. Extract and copy to /usr/local/lib:" && \
echo " tar -xzf onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \
echo " sudo cp -r onnxruntime-linux-$${ONNX_ARCH}-VERSION/lib/* /usr/local/lib/" && \
echo " sudo ldconfig" && \
exit 1; \
}; \
} && \
echo "Extracting..." && \
cd /tmp/onnx-install && tar -xzf $${FILENAME} && \
echo "Installing to /usr/local/lib..." && \
ONNX_DIR=$$(find /tmp/onnx-install -maxdepth 1 -type d -name "onnxruntime-linux-*") && \
if [ -d "$${ONNX_DIR}/lib" ]; then \
cp -r $${ONNX_DIR}/lib/* /usr/local/lib/ 2>/dev/null || sudo cp -r $${ONNX_DIR}/lib/* /usr/local/lib/; \
else \
echo "ERROR: Could not find lib directory in extracted archive" && \
exit 1; \
fi && \
echo "Updating library cache..." && \
sudo ldconfig 2>/dev/null || ldconfig && \
echo "" && \
echo "=== Installation complete! ===" && \
echo "" && \
echo "Installed libraries:" && \
ldconfig -p | grep libonnxruntime || echo "(libraries may require logout/relogin to appear)" && \
echo "" && \
if [ "$$HAS_CUDA" = "1" ]; then \
echo "NOTE: CUDA-enabled ONNX Runtime installed."; \
echo "Ensure you also have CUDA libraries installed:"; \
echo " - libcudnn, libcublas, libcurand"; \
else \
echo "NOTE: CPU-only ONNX Runtime installed."; \
echo "For GPU support, install CUDA and re-run this script."; \
fi && \
rm -rf /tmp/onnx-install
# Whisper STT Setup (in batteries directory) # Whisper STT Setup (in batteries directory)
setup-whisper: build-whisper download-whisper-model setup-whisper: build-whisper download-whisper-model
@@ -44,11 +143,10 @@ build-whisper: ## Build whisper.cpp from source in batteries directory
download-whisper-model: ## Download Whisper model for STT in batteries directory download-whisper-model: ## Download Whisper model for STT in batteries directory
@echo "Downloading Whisper model for STT..." @echo "Downloading Whisper model for STT..."
@if [ ! -d "batteries/whisper.cpp" ]; then \ @if [ ! -d "batteries/whisper.cpp/models" ]; then \
echo "Please run 'make setup-whisper' first to clone the repository."; \ mkdir -p "batteries/whisper.cpp/models" \
exit 1; \
fi fi
@cd batteries/whisper.cpp && bash ./models/download-ggml-model.sh large-v3-turbo-q5_0 curl -o batteries/whisper.cpp/models/ggml-large-v3-turbo-q5_0.bin -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin?download=true"
@echo "Whisper model downloaded successfully!" @echo "Whisper model downloaded successfully!"
# Docker targets for STT/TTS services (in batteries directory) # Docker targets for STT/TTS services (in batteries directory)

View File

@@ -13,6 +13,12 @@ made with use of [tview](https://github.com/rivo/tview)
#### how it looks #### how it looks
![how it looks](assets/ex01.png) ![how it looks](assets/ex01.png)
#### dependencies
- make
- go
- ffmpeg (extra)
#### how to install #### how to install
(requires golang) (requires golang)
clone the project clone the project

View File

@@ -6,19 +6,27 @@ services:
ports: ports:
- "8081:8081" - "8081:8081"
volumes: volumes:
- whisper_models:/app/models - ./whisper.cpp/models/ggml-large-v3-turbo-q5_0.bin:/app/models/ggml-large-v3-turbo-q5_0.bin
working_dir: /app working_dir: /app
entrypoint: "" entrypoint: ""
command: > command: >
sh -c " sh -c "
if [ ! -f /app/models/ggml-large-v3-turbo.bin ]; then if [ ! -f /app/models/ggml-large-v3-turbo-q5_0.bin ]; then
echo 'Downloading ggml-large-v3-turbo model...' echo 'Downloading ggml-large-v3-turboq5_0 model...'
./download-ggml-model.sh large-v3-turbo /app/models curl -o /app/models/ggml-large-v3-turbo-q5_0.bin -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin?download=true"
fi && fi &&
./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo.bin -t 4 -p 1 --port 8081 --host 0.0.0.0 ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo-q5_0.bin -t 4 -p 1 --port 8081 --host 0.0.0.0
" "
environment: environment:
- WHISPER_LOG_LEVEL=3 - WHISPER_LOG_LEVEL=3
# For GPU support, uncomment the following lines:
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
# Restart policy in case the service fails # Restart policy in case the service fails
restart: unless-stopped restart: unless-stopped
@@ -45,7 +53,5 @@ services:
volumes: volumes:
models: models:
driver: local driver: local
audio:
driver: local
whisper_models: whisper_models:
driver: local driver: local

128
bot.go
View File

@@ -16,13 +16,13 @@ import (
"log/slog" "log/slog"
"net" "net"
"net/http" "net/http"
"net/url"
"os" "os"
"regexp" "regexp"
"slices" "slices"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"time" "time"
) )
@@ -41,7 +41,7 @@ var (
store storage.FullRepo store storage.FullRepo
defaultFirstMsg = "Hello! What can I do for you?" defaultFirstMsg = "Hello! What can I do for you?"
defaultStarter = []models.RoleMsg{} defaultStarter = []models.RoleMsg{}
interruptResp = false interruptResp atomic.Bool
ragger *rag.RAG ragger *rag.RAG
chunkParser ChunkParser chunkParser ChunkParser
lastToolCall *models.FuncCall lastToolCall *models.FuncCall
@@ -253,12 +253,7 @@ func createClient(connectTimeout time.Duration) *http.Client {
} }
func warmUpModel() { func warmUpModel() {
u, err := url.Parse(cfg.CurrentAPI) if !isLocalLlamacpp() {
if err != nil {
return
}
host := u.Hostname()
if host != "localhost" && host != "127.0.0.1" && host != "::1" {
return return
} }
// Check if model is already loaded // Check if model is already loaded
@@ -649,7 +644,7 @@ func sendMsgToLLM(body io.Reader) {
// continue // continue
} }
if len(line) <= 1 { if len(line) <= 1 {
if interruptResp { if interruptResp.Load() {
goto interrupt // get unstuck from bad connection goto interrupt // get unstuck from bad connection
} }
continue // skip \n continue // skip \n
@@ -742,8 +737,7 @@ func sendMsgToLLM(body io.Reader) {
lastToolCall.ID = chunk.ToolID lastToolCall.ID = chunk.ToolID
} }
interrupt: interrupt:
if interruptResp { // read bytes, so it would not get into beginning of the next req if interruptResp.Load() { // read bytes, so it would not get into beginning of the next req
// interruptResp = false
logger.Info("interrupted bot response", "chunk_counter", counter) logger.Info("interrupted bot response", "chunk_counter", counter)
streamDone <- true streamDone <- true
break break
@@ -776,14 +770,14 @@ func showSpinner() {
if cfg.WriteNextMsgAsCompletionAgent != "" { if cfg.WriteNextMsgAsCompletionAgent != "" {
botPersona = cfg.WriteNextMsgAsCompletionAgent botPersona = cfg.WriteNextMsgAsCompletionAgent
} }
for botRespMode || toolRunningMode { for botRespMode.Load() || toolRunningMode.Load() {
time.Sleep(400 * time.Millisecond) time.Sleep(400 * time.Millisecond)
spin := i % len(spinners) spin := i % len(spinners)
app.QueueUpdateDraw(func() { app.QueueUpdateDraw(func() {
switch { switch {
case toolRunningMode: case toolRunningMode.Load():
textArea.SetTitle(spinners[spin] + " tool") textArea.SetTitle(spinners[spin] + " tool")
case botRespMode: case botRespMode.Load():
textArea.SetTitle(spinners[spin] + " " + botPersona + " (F6 to interrupt)") textArea.SetTitle(spinners[spin] + " " + botPersona + " (F6 to interrupt)")
default: default:
textArea.SetTitle(spinners[spin] + " input") textArea.SetTitle(spinners[spin] + " input")
@@ -797,8 +791,8 @@ func showSpinner() {
} }
func chatRound(r *models.ChatRoundReq) error { func chatRound(r *models.ChatRoundReq) error {
interruptResp = false interruptResp.Store(false)
botRespMode = true botRespMode.Store(true)
go showSpinner() go showSpinner()
updateStatusLine() updateStatusLine()
botPersona := cfg.AssistantRole botPersona := cfg.AssistantRole
@@ -806,7 +800,7 @@ func chatRound(r *models.ChatRoundReq) error {
botPersona = cfg.WriteNextMsgAsCompletionAgent botPersona = cfg.WriteNextMsgAsCompletionAgent
} }
defer func() { defer func() {
botRespMode = false botRespMode.Store(false)
ClearImageAttachment() ClearImageAttachment()
}() }()
// check that there is a model set to use if is not local // check that there is a model set to use if is not local
@@ -857,7 +851,7 @@ out:
if thinkingCollapsed { if thinkingCollapsed {
// Show placeholder immediately when thinking starts in collapsed mode // Show placeholder immediately when thinking starts in collapsed mode
fmt.Fprint(textView, "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]") fmt.Fprint(textView, "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]")
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
respText.WriteString(chunk) respText.WriteString(chunk)
@@ -872,7 +866,7 @@ out:
// Thinking already displayed as placeholder, just update respText // Thinking already displayed as placeholder, just update respText
respText.WriteString(chunk) respText.WriteString(chunk)
justExitedThinkingCollapsed = true justExitedThinkingCollapsed = true
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
continue continue
@@ -893,8 +887,10 @@ out:
fmt.Fprint(textView, chunk) fmt.Fprint(textView, chunk)
respText.WriteString(chunk) respText.WriteString(chunk)
// Update the message in chatBody.Messages so it persists during Alt+T // Update the message in chatBody.Messages so it persists during Alt+T
chatBody.Messages[msgIdx].Content = respText.String() if !r.Resume {
if scrollToEndEnabled { chatBody.Messages[msgIdx].Content += respText.String()
}
if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
// Send chunk to audio stream handler // Send chunk to audio stream handler
@@ -904,7 +900,7 @@ out:
case toolChunk := <-openAIToolChan: case toolChunk := <-openAIToolChan:
fmt.Fprint(textView, toolChunk) fmt.Fprint(textView, toolChunk)
toolResp.WriteString(toolChunk) toolResp.WriteString(toolChunk)
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
case <-streamDone: case <-streamDone:
@@ -912,7 +908,7 @@ out:
chunk := <-chunkChan chunk := <-chunkChan
fmt.Fprint(textView, chunk) fmt.Fprint(textView, chunk)
respText.WriteString(chunk) respText.WriteString(chunk)
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
if cfg.TTS_ENABLED { if cfg.TTS_ENABLED {
@@ -934,7 +930,7 @@ out:
} }
lastRespStats = nil lastRespStats = nil
} }
botRespMode = false botRespMode.Store(false)
if r.Resume { if r.Resume {
chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String() chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String()
updatedMsg := chatBody.Messages[len(chatBody.Messages)-1] updatedMsg := chatBody.Messages[len(chatBody.Messages)-1]
@@ -963,7 +959,7 @@ out:
} }
// Strip think blocks before parsing for tool calls // Strip think blocks before parsing for tool calls
respTextNoThink := thinkBlockRE.ReplaceAllString(respText.String(), "") respTextNoThink := thinkBlockRE.ReplaceAllString(respText.String(), "")
if interruptResp { if interruptResp.Load() {
return nil return nil
} }
if findCall(respTextNoThink, toolResp.String()) { if findCall(respTextNoThink, toolResp.String()) {
@@ -1198,9 +1194,9 @@ func findCall(msg, toolCall string) bool {
} }
// Show tool call progress indicator before execution // Show tool call progress indicator before execution
fmt.Fprintf(textView, "\n[yellow::i][tool: %s...][-:-:-]", fc.Name) fmt.Fprintf(textView, "\n[yellow::i][tool: %s...][-:-:-]", fc.Name)
toolRunningMode = true toolRunningMode.Store(true)
resp := callToolWithAgent(fc.Name, fc.Args) resp := callToolWithAgent(fc.Name, fc.Args)
toolRunningMode = false toolRunningMode.Store(false)
toolMsg := string(resp) toolMsg := string(resp)
logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg) logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
// Create tool response message with the proper tool_call_id // Create tool response message with the proper tool_call_id
@@ -1393,14 +1389,17 @@ func updateModelLists() {
} }
} }
// if llama.cpp started after gf-lt? // if llama.cpp started after gf-lt?
localModelsMu.Lock() ml, err := fetchLCPModelsWithLoadStatus()
LocalModels, err = fetchLCPModelsWithLoadStatus()
localModelsMu.Unlock()
if err != nil { if err != nil {
logger.Warn("failed to fetch llama.cpp models", "error", err) logger.Warn("failed to fetch llama.cpp models", "error", err)
} }
localModelsMu.Lock()
LocalModels = ml
localModelsMu.Unlock()
// set already loaded model in llama.cpp // set already loaded model in llama.cpp
if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") { if !isLocalLlamacpp() {
return
}
localModelsMu.Lock() localModelsMu.Lock()
defer localModelsMu.Unlock() defer localModelsMu.Unlock()
for i := range LocalModels { for i := range LocalModels {
@@ -1408,14 +1407,13 @@ func updateModelLists() {
m := strings.TrimPrefix(LocalModels[i], models.LoadedMark) m := strings.TrimPrefix(LocalModels[i], models.LoadedMark)
cfg.CurrentModel = m cfg.CurrentModel = m
chatBody.Model = m chatBody.Model = m
cachedModelColor = "green" cachedModelColor.Store("green")
updateStatusLine() updateStatusLine()
updateToolCapabilities() updateToolCapabilities()
app.Draw() app.Draw()
return return
} }
} }
}
} }
func refreshLocalModelsIfEmpty() { func refreshLocalModelsIfEmpty() {
@@ -1500,7 +1498,13 @@ func init() {
os.Exit(1) os.Exit(1)
return return
} }
ragger = rag.New(logger, store, cfg) ragger, err = rag.New(logger, store, cfg)
if err != nil {
logger.Error("failed to create RAG", "error", err)
}
if ragger != nil && ragger.FallbackMessage() != "" && app != nil {
showToast("RAG", "ONNX unavailable, using API: "+ragger.FallbackMessage())
}
// https://github.com/coreydaley/ggerganov-llama.cpp/blob/master/examples/server/README.md // https://github.com/coreydaley/ggerganov-llama.cpp/blob/master/examples/server/README.md
// load all chats in memory // load all chats in memory
if _, err := loadHistoryChats(); err != nil { if _, err := loadHistoryChats(); err != nil {
@@ -1541,57 +1545,9 @@ func init() {
} }
} }
} }
// Initialize scrollToEndEnabled based on config // atomic default values
scrollToEndEnabled = cfg.AutoScrollEnabled cachedModelColor.Store("orange")
go updateModelLists()
go chatWatcher(ctx) go chatWatcher(ctx)
} initTUI()
initTools()
func getValidKnowToRecipient(msg *models.RoleMsg) (string, bool) {
if cfg == nil || !cfg.CharSpecificContextEnabled {
return "", false
}
// case where all roles are in the tag => public message
cr := listChatRoles()
slices.Sort(cr)
slices.Sort(msg.KnownTo)
if slices.Equal(cr, msg.KnownTo) {
logger.Info("got msg with tag mentioning every role")
return "", false
}
// Check each character in the KnownTo list
for _, recipient := range msg.KnownTo {
if recipient == msg.Role || recipient == cfg.ToolRole {
// weird cases, skip
continue
}
// Skip if this is the user character (user handles their own turn)
// If user is in KnownTo, stop processing - it's the user's turn
if recipient == cfg.UserRole || recipient == cfg.WriteNextMsgAs {
return "", false
}
return recipient, true
}
return "", false
}
// triggerPrivateMessageResponses checks if a message was sent privately to specific characters
// and triggers those non-user characters to respond
func triggerPrivateMessageResponses(msg *models.RoleMsg) {
recipient, ok := getValidKnowToRecipient(msg)
if !ok || recipient == "" {
return
}
// Trigger the recipient character to respond
triggerMsg := recipient + ":\n"
// Send empty message so LLM continues naturally from the conversation
crr := &models.ChatRoundReq{
UserMsg: triggerMsg,
Role: recipient,
Resume: true,
}
fmt.Fprintf(textView, "\n[-:-:b](%d) ", len(chatBody.Messages))
fmt.Fprint(textView, roleToIcon(recipient))
fmt.Fprint(textView, "[-:-:-]\n")
chatRoundChan <- crr
} }

View File

@@ -13,6 +13,9 @@ OpenRouterChatAPI = "https://openrouter.ai/api/v1/chat/completions"
# embeddings # embeddings
EmbedURL = "http://localhost:8082/v1/embeddings" EmbedURL = "http://localhost:8082/v1/embeddings"
HFToken = "" HFToken = ""
EmbedModelPath = "onnx/embedgemma/model_q4.onnx"
EmbedTokenizerPath = "onnx/embedgemma/tokenizer.json"
EmbedDims = 768
# #
ShowSys = true ShowSys = true
LogFile = "log.txt" LogFile = "log.txt"
@@ -24,9 +27,9 @@ ChunkLimit = 100000
AutoScrollEnabled = true AutoScrollEnabled = true
AutoCleanToolCallsFromCtx = false AutoCleanToolCallsFromCtx = false
# rag settings # rag settings
RAGEnabled = false
RAGBatchSize = 1 RAGBatchSize = 1
RAGWordLimit = 80 RAGWordLimit = 250
RAGOverlapWords = 25
RAGDir = "ragimport" RAGDir = "ragimport"
# extra tts # extra tts
TTS_ENABLED = false TTS_ENABLED = false

View File

@@ -27,7 +27,6 @@ type Config struct {
WriteNextMsgAs string WriteNextMsgAs string
WriteNextMsgAsCompletionAgent string WriteNextMsgAsCompletionAgent string
SkipLLMResp bool SkipLLMResp bool
AutoCleanToolCallsFromCtx bool `toml:"AutoCleanToolCallsFromCtx"`
DBPATH string `toml:"DBPATH"` DBPATH string `toml:"DBPATH"`
FilePickerDir string `toml:"FilePickerDir"` FilePickerDir string `toml:"FilePickerDir"`
FilePickerExts string `toml:"FilePickerExts"` FilePickerExts string `toml:"FilePickerExts"`
@@ -36,11 +35,14 @@ type Config struct {
// embeddings // embeddings
EmbedURL string `toml:"EmbedURL"` EmbedURL string `toml:"EmbedURL"`
HFToken string `toml:"HFToken"` HFToken string `toml:"HFToken"`
EmbedModelPath string `toml:"EmbedModelPath"`
EmbedTokenizerPath string `toml:"EmbedTokenizerPath"`
EmbedDims int `toml:"EmbedDims"`
// rag settings // rag settings
RAGEnabled bool `toml:"RAGEnabled"`
RAGDir string `toml:"RAGDir"` RAGDir string `toml:"RAGDir"`
RAGBatchSize int `toml:"RAGBatchSize"` RAGBatchSize int `toml:"RAGBatchSize"`
RAGWordLimit uint32 `toml:"RAGWordLimit"` RAGWordLimit uint32 `toml:"RAGWordLimit"`
RAGOverlapWords uint32 `toml:"RAGOverlapWords"`
// deepseek // deepseek
DeepSeekChatAPI string `toml:"DeepSeekChatAPI"` DeepSeekChatAPI string `toml:"DeepSeekChatAPI"`
DeepSeekCompletionAPI string `toml:"DeepSeekCompletionAPI"` DeepSeekCompletionAPI string `toml:"DeepSeekCompletionAPI"`

View File

@@ -63,17 +63,11 @@ This document explains how to set up and configure the application using the `co
#### AutoScrollEnabled (`true`) #### AutoScrollEnabled (`true`)
- Whether to automatically scroll chat window while llm streams its repsonse. - Whether to automatically scroll chat window while llm streams its repsonse.
#### AutoCleanToolCallsFromCtx (`false`)
- Whether to automatically clean tool calls from the conversation context to manage token usage.
### RAG (Retrieval Augmented Generation) Settings ### RAG (Retrieval Augmented Generation) Settings
#### EmbedURL (`"http://localhost:8082/v1/embeddings"`) #### EmbedURL (`"http://localhost:8082/v1/embeddings"`)
- The endpoint for embedding API, used for RAG (Retrieval Augmented Generation) functionality. - The endpoint for embedding API, used for RAG (Retrieval Augmented Generation) functionality.
#### RAGEnabled (`false`)
- Enable or disable RAG functionality for enhanced context retrieval.
#### RAGBatchSize (`1`) #### RAGBatchSize (`1`)
- Number of documents to process in each RAG batch. - Number of documents to process in each RAG batch.

218
extra/google_tts.go Normal file
View File

@@ -0,0 +1,218 @@
//go:build extra
// +build extra
package extra
import (
"fmt"
"gf-lt/models"
"io"
"log/slog"
"os/exec"
"strings"
"sync"
google_translate_tts "github.com/GrailFinder/google-translate-tts"
"github.com/neurosnap/sentences/english"
)
type GoogleTranslateOrator struct {
logger *slog.Logger
mu sync.Mutex
speech *google_translate_tts.Speech
// fields for playback control
cmd *exec.Cmd
cmdMu sync.Mutex
stopCh chan struct{}
// text buffer and interrupt flag
textBuffer strings.Builder
interrupt bool
Speed float32
}
func (o *GoogleTranslateOrator) stoproutine() {
for {
<-TTSDoneChan
o.logger.Debug("orator got done signal")
o.Stop()
for len(TTSTextChan) > 0 {
<-TTSTextChan
}
o.mu.Lock()
o.textBuffer.Reset()
o.interrupt = true
o.mu.Unlock()
}
}
func (o *GoogleTranslateOrator) readroutine() {
tokenizer, _ := english.NewSentenceTokenizer(nil)
for {
select {
case chunk := <-TTSTextChan:
o.mu.Lock()
o.interrupt = false
_, err := o.textBuffer.WriteString(chunk)
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
o.mu.Unlock()
continue
}
text := o.textBuffer.String()
sentences := tokenizer.Tokenize(text)
o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
if len(sentences) <= 1 {
o.mu.Unlock()
continue
}
completeSentences := sentences[:len(sentences)-1]
remaining := sentences[len(sentences)-1].Text
o.textBuffer.Reset()
o.textBuffer.WriteString(remaining)
o.mu.Unlock()
for _, sentence := range completeSentences {
o.mu.Lock()
interrupted := o.interrupt
o.mu.Unlock()
if interrupted {
return
}
cleanedText := models.CleanText(sentence.Text)
if cleanedText == "" {
continue
}
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
if err := o.Speak(cleanedText); err != nil {
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
}
}
case <-TTSFlushChan:
o.logger.Debug("got flushchan signal start")
// lln is done get the whole message out
if len(TTSTextChan) > 0 { // otherwise might get stuck
for chunk := range TTSTextChan {
o.mu.Lock()
_, err := o.textBuffer.WriteString(chunk)
o.mu.Unlock()
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
continue
}
if len(TTSTextChan) == 0 {
break
}
}
}
o.mu.Lock()
remaining := o.textBuffer.String()
remaining = models.CleanText(remaining)
o.textBuffer.Reset()
o.mu.Unlock()
if remaining == "" {
continue
}
o.logger.Debug("calling Speak with remainder", "rem", remaining)
sentencesRem := tokenizer.Tokenize(remaining)
for _, rs := range sentencesRem { // to avoid dumping large volume of text
o.mu.Lock()
interrupt := o.interrupt
o.mu.Unlock()
if interrupt {
break
}
if err := o.Speak(rs.Text); err != nil {
o.logger.Error("tts failed", "sentence", rs.Text, "error", err)
}
}
}
}
}
func (o *GoogleTranslateOrator) GetLogger() *slog.Logger {
return o.logger
}
func (o *GoogleTranslateOrator) Speak(text string) error {
o.logger.Debug("fn: Speak is called", "text-len", len(text))
// Generate MP3 data directly as an io.Reader
reader, err := o.speech.GenerateSpeech(text)
if err != nil {
return fmt.Errorf("generate speech failed: %w", err)
}
// Wrap in io.NopCloser since GenerateSpeech returns io.Reader (no close needed)
body := io.NopCloser(reader)
defer body.Close()
// Build ffplay command with optional speed filter
args := []string{"-nodisp", "-autoexit"}
if o.Speed > 0.1 && o.Speed != 1.0 {
// atempo range is 0.5 to 2.0; you might clamp it here
args = append(args, "-af", fmt.Sprintf("atempo=%.2f", o.Speed))
}
args = append(args, "-i", "pipe:0")
cmd := exec.Command("ffplay", args...)
stdin, err := cmd.StdinPipe()
if err != nil {
return fmt.Errorf("failed to get stdin pipe: %w", err)
}
o.cmdMu.Lock()
o.cmd = cmd
o.stopCh = make(chan struct{})
o.cmdMu.Unlock()
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start ffplay: %w", err)
}
copyErr := make(chan error, 1)
go func() {
_, err := io.Copy(stdin, body)
stdin.Close()
copyErr <- err
}()
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
}()
select {
case <-o.stopCh:
if o.cmd != nil && o.cmd.Process != nil {
o.cmd.Process.Kill()
}
<-done
return nil
case copyErrVal := <-copyErr:
if copyErrVal != nil {
if o.cmd != nil && o.cmd.Process != nil {
o.cmd.Process.Kill()
}
<-done
return copyErrVal
}
return <-done
case err := <-done:
return err
}
}
func (o *GoogleTranslateOrator) Stop() {
o.cmdMu.Lock()
defer o.cmdMu.Unlock()
// Signal any running Speak to stop
if o.stopCh != nil {
select {
case <-o.stopCh: // already closed
default:
close(o.stopCh)
}
o.stopCh = nil
}
// Kill the external player process if it's still running
if o.cmd != nil && o.cmd.Process != nil {
o.cmd.Process.Kill()
o.cmd.Wait() // clean up zombie process
o.cmd = nil
}
// Also reset text buffer and interrupt flag (with o.mu)
o.mu.Lock()
o.textBuffer.Reset()
o.interrupt = true
o.mu.Unlock()
}

259
extra/kokoro.go Normal file
View File

@@ -0,0 +1,259 @@
//go:build extra
// +build extra
package extra
import (
"bytes"
"encoding/json"
"fmt"
"gf-lt/models"
"io"
"log/slog"
"net/http"
"os/exec"
"strings"
"sync"
"github.com/neurosnap/sentences/english"
)
type KokoroOrator struct {
logger *slog.Logger
mu sync.Mutex
URL string
Format models.AudioFormat
Stream bool
Speed float32
Language string
Voice string
// fields for playback control
cmd *exec.Cmd
cmdMu sync.Mutex
stopCh chan struct{}
// textBuffer, interrupt etc. remain the same
textBuffer strings.Builder
interrupt bool
}
func (o *KokoroOrator) GetLogger() *slog.Logger {
return o.logger
}
func (o *KokoroOrator) Speak(text string) error {
o.logger.Debug("fn: Speak is called", "text-len", len(text))
body, err := o.requestSound(text)
if err != nil {
return fmt.Errorf("request failed: %w", err)
}
defer body.Close()
cmd := exec.Command("ffplay", "-nodisp", "-autoexit", "-i", "pipe:0")
stdin, err := cmd.StdinPipe()
if err != nil {
return fmt.Errorf("failed to get stdin pipe: %w", err)
}
o.cmdMu.Lock()
o.cmd = cmd
o.stopCh = make(chan struct{})
o.cmdMu.Unlock()
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start ffplay: %w", err)
}
// Copy audio in background
copyErr := make(chan error, 1)
go func() {
_, err := io.Copy(stdin, body)
stdin.Close()
copyErr <- err
}()
// Wait for player in background
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
}()
// Wait for BOTH copy and player, but ensure we block until done
select {
case <-o.stopCh:
// Stop requested: kill player and wait for it to exit
if o.cmd != nil && o.cmd.Process != nil {
o.cmd.Process.Kill()
}
<-done // Wait for process to actually exit
return nil
case copyErrVal := <-copyErr:
if copyErrVal != nil {
// Copy failed: kill player and wait
if o.cmd != nil && o.cmd.Process != nil {
o.cmd.Process.Kill()
}
<-done
return copyErrVal
}
// Copy succeeded, now wait for playback to complete
return <-done
case err := <-done:
// Playback finished normally (copy must have succeeded or player would have exited early)
return err
}
}
func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
if o.URL == "" {
return nil, fmt.Errorf("TTS URL is empty")
}
payload := map[string]interface{}{
"input": text,
"voice": o.Voice,
"response_format": o.Format,
"download_format": o.Format,
"stream": o.Stream,
"speed": o.Speed,
// "return_download_link": true,
"lang_code": o.Language,
}
payloadBytes, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("failed to marshal payload: %w", err)
}
req, err := http.NewRequest("POST", o.URL, bytes.NewBuffer(payloadBytes)) //nolint:noctx
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("accept", "application/json")
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("request failed: %w", err)
}
if resp.StatusCode != http.StatusOK {
defer resp.Body.Close()
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
return resp.Body, nil
}
func (o *KokoroOrator) stoproutine() {
for {
<-TTSDoneChan
o.logger.Debug("orator got done signal")
// 1. Stop any ongoing playback (kills external player, closes stopCh)
o.Stop()
// 2. Drain any pending text chunks
for len(TTSTextChan) > 0 {
<-TTSTextChan
}
// 3. Reset internal state
o.mu.Lock()
o.textBuffer.Reset()
o.interrupt = true
o.mu.Unlock()
}
}
func (o *KokoroOrator) Stop() {
o.cmdMu.Lock()
defer o.cmdMu.Unlock()
// Signal any running Speak to stop
if o.stopCh != nil {
select {
case <-o.stopCh: // already closed
default:
close(o.stopCh)
}
o.stopCh = nil
}
// Kill the external player process if it's still running
if o.cmd != nil && o.cmd.Process != nil {
o.cmd.Process.Kill()
o.cmd.Wait() // clean up zombie process
o.cmd = nil
}
// Also reset text buffer and interrupt flag (with o.mu)
o.mu.Lock()
o.textBuffer.Reset()
o.interrupt = true
o.mu.Unlock()
}
func (o *KokoroOrator) readroutine() {
tokenizer, _ := english.NewSentenceTokenizer(nil)
for {
select {
case chunk := <-TTSTextChan:
o.mu.Lock()
o.interrupt = false
_, err := o.textBuffer.WriteString(chunk)
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
o.mu.Unlock()
continue
}
text := o.textBuffer.String()
sentences := tokenizer.Tokenize(text)
o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
if len(sentences) <= 1 {
o.mu.Unlock()
continue
}
completeSentences := sentences[:len(sentences)-1]
remaining := sentences[len(sentences)-1].Text
o.textBuffer.Reset()
o.textBuffer.WriteString(remaining)
o.mu.Unlock()
for _, sentence := range completeSentences {
o.mu.Lock()
interrupted := o.interrupt
o.mu.Unlock()
if interrupted {
return
}
cleanedText := models.CleanText(sentence.Text)
if cleanedText == "" {
continue
}
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
if err := o.Speak(cleanedText); err != nil {
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
}
}
case <-TTSFlushChan:
o.logger.Debug("got flushchan signal start")
// lln is done get the whole message out
if len(TTSTextChan) > 0 { // otherwise might get stuck
for chunk := range TTSTextChan {
o.mu.Lock()
_, err := o.textBuffer.WriteString(chunk)
o.mu.Unlock()
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
continue
}
if len(TTSTextChan) == 0 {
break
}
}
}
// flush remaining text
o.mu.Lock()
remaining := o.textBuffer.String()
remaining = models.CleanText(remaining)
o.textBuffer.Reset()
o.mu.Unlock()
if remaining == "" {
continue
}
o.logger.Debug("calling Speak with remainder", "rem", remaining)
sentencesRem := tokenizer.Tokenize(remaining)
for _, rs := range sentencesRem { // to avoid dumping large volume of text
o.mu.Lock()
interrupt := o.interrupt
o.mu.Unlock()
if interrupt {
break
}
if err := o.Speak(rs.Text); err != nil {
o.logger.Error("tts failed", "sentence", rs, "error", err)
}
}
}
}
}

View File

@@ -6,18 +6,10 @@ package extra
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"errors"
"fmt"
"gf-lt/config" "gf-lt/config"
"io" "io"
"log/slog" "log/slog"
"mime/multipart"
"net/http"
"regexp" "regexp"
"strings"
"syscall"
"github.com/gordonklaus/portaudio"
) )
var specialRE = regexp.MustCompile(`\[.*?\]`) var specialRE = regexp.MustCompile(`\[.*?\]`)
@@ -44,14 +36,6 @@ func NewSTT(logger *slog.Logger, cfg *config.Config) STT {
return NewWhisperServer(logger, cfg) return NewWhisperServer(logger, cfg)
} }
type WhisperServer struct {
logger *slog.Logger
ServerURL string
SampleRate int
AudioBuffer *bytes.Buffer
recording bool
}
func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer { func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
return &WhisperServer{ return &WhisperServer{
logger: logger, logger: logger,
@@ -61,69 +45,6 @@ func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
} }
} }
func (stt *WhisperServer) StartRecording() error {
if err := stt.microphoneStream(stt.SampleRate); err != nil {
return fmt.Errorf("failed to init microphone: %w", err)
}
stt.recording = true
return nil
}
func (stt *WhisperServer) StopRecording() (string, error) {
stt.recording = false
// wait loop to finish?
if stt.AudioBuffer == nil {
err := errors.New("unexpected nil AudioBuffer")
stt.logger.Error(err.Error())
return "", err
}
// Create WAV header first
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
// Add audio file part
part, err := writer.CreateFormFile("file", "recording.wav")
if err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
// Stream directly to multipart writer: header + raw data
dataSize := stt.AudioBuffer.Len()
stt.writeWavHeader(part, dataSize)
if _, err := io.Copy(part, stt.AudioBuffer); err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
// Reset buffer for next recording
stt.AudioBuffer.Reset()
// Add response format field
err = writer.WriteField("response_format", "text")
if err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
if writer.Close() != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
// Send request
resp, err := http.Post(stt.ServerURL, writer.FormDataContentType(), body) //nolint:noctx
if err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
defer resp.Body.Close()
// Read and print response
responseTextBytes, err := io.ReadAll(resp.Body)
if err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
resptext := strings.TrimRight(string(responseTextBytes), "\n")
// in case there are special tokens like [_BEG_]
resptext = specialRE.ReplaceAllString(resptext, "")
return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
}
func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) { func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
header := make([]byte, 44) header := make([]byte, 44)
copy(header[0:4], "RIFF") copy(header[0:4], "RIFF")
@@ -147,56 +68,3 @@ func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
func (stt *WhisperServer) IsRecording() bool { func (stt *WhisperServer) IsRecording() bool {
return stt.recording return stt.recording
} }
func (stt *WhisperServer) microphoneStream(sampleRate int) error {
// Temporarily redirect stderr to suppress ALSA warnings during PortAudio init
origStderr, errDup := syscall.Dup(syscall.Stderr)
if errDup != nil {
return fmt.Errorf("failed to dup stderr: %w", errDup)
}
nullFD, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
if err != nil {
_ = syscall.Close(origStderr) // Close the dup'd fd if open fails
return fmt.Errorf("failed to open /dev/null: %w", err)
}
// redirect stderr
_ = syscall.Dup2(nullFD, syscall.Stderr)
// Initialize PortAudio (this is where ALSA warnings occur)
defer func() {
// Restore stderr
_ = syscall.Dup2(origStderr, syscall.Stderr)
_ = syscall.Close(origStderr)
_ = syscall.Close(nullFD)
}()
if err := portaudio.Initialize(); err != nil {
return fmt.Errorf("portaudio init failed: %w", err)
}
in := make([]int16, 64)
stream, err := portaudio.OpenDefaultStream(1, 0, float64(sampleRate), len(in), in)
if err != nil {
if paErr := portaudio.Terminate(); paErr != nil {
return fmt.Errorf("failed to open microphone: %w; terminate error: %w", err, paErr)
}
return fmt.Errorf("failed to open microphone: %w", err)
}
go func(stream *portaudio.Stream) {
if err := stream.Start(); err != nil {
stt.logger.Error("microphoneStream", "error", err)
return
}
for {
if !stt.IsRecording() {
return
}
if err := stream.Read(); err != nil {
stt.logger.Error("reading stream", "error", err)
return
}
if err := binary.Write(stt.AudioBuffer, binary.LittleEndian, in); err != nil {
stt.logger.Error("writing to buffer", "error", err)
return
}
}
}(stream)
return nil
}

View File

@@ -4,25 +4,13 @@
package extra package extra
import ( import (
"bytes"
"encoding/json"
"fmt"
"gf-lt/config" "gf-lt/config"
"gf-lt/models" "gf-lt/models"
"io"
"log/slog" "log/slog"
"net/http"
"os" "os"
"strings" "strings"
"sync"
"time"
google_translate_tts "github.com/GrailFinder/google-translate-tts" google_translate_tts "github.com/GrailFinder/google-translate-tts"
"github.com/GrailFinder/google-translate-tts/handlers"
"github.com/gopxl/beep/v2"
"github.com/gopxl/beep/v2/mp3"
"github.com/gopxl/beep/v2/speaker"
"github.com/neurosnap/sentences/english"
) )
var ( var (
@@ -39,142 +27,6 @@ type Orator interface {
GetLogger() *slog.Logger GetLogger() *slog.Logger
} }
// impl https://github.com/remsky/Kokoro-FastAPI
type KokoroOrator struct {
logger *slog.Logger
mu sync.Mutex
URL string
Format models.AudioFormat
Stream bool
Speed float32
Language string
Voice string
currentStream *beep.Ctrl // Added for playback control
currentDone chan bool
textBuffer strings.Builder
interrupt bool
// textBuffer bytes.Buffer
}
// Google Translate TTS implementation
type GoogleTranslateOrator struct {
logger *slog.Logger
mu sync.Mutex
speech *google_translate_tts.Speech
currentStream *beep.Ctrl
currentDone chan bool
textBuffer strings.Builder
interrupt bool
}
func (o *KokoroOrator) stoproutine() {
for {
<-TTSDoneChan
o.logger.Debug("orator got done signal")
o.Stop()
// drain the channel
for len(TTSTextChan) > 0 {
<-TTSTextChan
}
o.mu.Lock()
o.textBuffer.Reset()
if o.currentDone != nil {
select {
case o.currentDone <- true:
default:
// Channel might be closed, ignore
}
}
o.interrupt = true
o.mu.Unlock()
}
}
func (o *KokoroOrator) readroutine() {
tokenizer, _ := english.NewSentenceTokenizer(nil)
for {
select {
case chunk := <-TTSTextChan:
o.mu.Lock()
o.interrupt = false
_, err := o.textBuffer.WriteString(chunk)
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
o.mu.Unlock()
continue
}
text := o.textBuffer.String()
sentences := tokenizer.Tokenize(text)
o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
if len(sentences) <= 1 {
o.mu.Unlock()
continue
}
completeSentences := sentences[:len(sentences)-1]
remaining := sentences[len(sentences)-1].Text
o.textBuffer.Reset()
o.textBuffer.WriteString(remaining)
o.mu.Unlock()
for _, sentence := range completeSentences {
o.mu.Lock()
interrupted := o.interrupt
o.mu.Unlock()
if interrupted {
return
}
cleanedText := models.CleanText(sentence.Text)
if cleanedText == "" {
continue
}
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
if err := o.Speak(cleanedText); err != nil {
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
}
}
case <-TTSFlushChan:
o.logger.Debug("got flushchan signal start")
// lln is done get the whole message out
if len(TTSTextChan) > 0 { // otherwise might get stuck
for chunk := range TTSTextChan {
o.mu.Lock()
_, err := o.textBuffer.WriteString(chunk)
o.mu.Unlock()
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
continue
}
if len(TTSTextChan) == 0 {
break
}
}
}
// flush remaining text
o.mu.Lock()
remaining := o.textBuffer.String()
remaining = models.CleanText(remaining)
o.textBuffer.Reset()
o.mu.Unlock()
if remaining == "" {
continue
}
o.logger.Debug("calling Speak with remainder", "rem", remaining)
sentencesRem := tokenizer.Tokenize(remaining)
for _, rs := range sentencesRem { // to avoid dumping large volume of text
o.mu.Lock()
interrupt := o.interrupt
o.mu.Unlock()
if interrupt {
break
}
if err := o.Speak(rs.Text); err != nil {
o.logger.Error("tts failed", "sentence", rs, "error", err)
}
}
}
}
}
func NewOrator(log *slog.Logger, cfg *config.Config) Orator { func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
provider := cfg.TTS_PROVIDER provider := cfg.TTS_PROVIDER
if provider == "" { if provider == "" {
@@ -204,270 +56,14 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
Language: language, Language: language,
Proxy: "", // Proxy not supported Proxy: "", // Proxy not supported
Speed: cfg.TTS_SPEED, Speed: cfg.TTS_SPEED,
Handler: &handlers.Beep{},
} }
orator := &GoogleTranslateOrator{ orator := &GoogleTranslateOrator{
logger: log, logger: log,
speech: speech, speech: speech,
Speed: cfg.TTS_SPEED,
} }
go orator.readroutine() go orator.readroutine()
go orator.stoproutine() go orator.stoproutine()
return orator return orator
} }
} }
func (o *KokoroOrator) GetLogger() *slog.Logger {
return o.logger
}
func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
if o.URL == "" {
return nil, fmt.Errorf("TTS URL is empty")
}
payload := map[string]interface{}{
"input": text,
"voice": o.Voice,
"response_format": o.Format,
"download_format": o.Format,
"stream": o.Stream,
"speed": o.Speed,
// "return_download_link": true,
"lang_code": o.Language,
}
payloadBytes, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("failed to marshal payload: %w", err)
}
req, err := http.NewRequest("POST", o.URL, bytes.NewBuffer(payloadBytes)) //nolint:noctx
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("accept", "application/json")
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("request failed: %w", err)
}
if resp.StatusCode != http.StatusOK {
defer resp.Body.Close()
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
return resp.Body, nil
}
func (o *KokoroOrator) Speak(text string) error {
o.logger.Debug("fn: Speak is called", "text-len", len(text))
body, err := o.requestSound(text)
if err != nil {
o.logger.Error("request failed", "error", err)
return fmt.Errorf("request failed: %w", err)
}
defer body.Close()
// Decode the mp3 audio from response body
streamer, format, err := mp3.Decode(body)
if err != nil {
o.logger.Error("mp3 decode failed", "error", err)
return fmt.Errorf("mp3 decode failed: %w", err)
}
defer streamer.Close()
// here it spams with errors that speaker cannot be initialized more than once, but how would we deal with many audio records then?
if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
o.logger.Debug("failed to init speaker", "error", err)
}
done := make(chan bool)
o.mu.Lock()
o.currentDone = done
o.currentStream = &beep.Ctrl{Streamer: beep.Seq(streamer, beep.Callback(func() {
o.mu.Lock()
close(done)
o.currentStream = nil
o.currentDone = nil
o.mu.Unlock()
})), Paused: false}
o.mu.Unlock()
speaker.Play(o.currentStream)
<-done
return nil
}
func (o *KokoroOrator) Stop() {
// speaker.Clear()
o.logger.Debug("attempted to stop orator", "orator", o)
speaker.Lock()
defer speaker.Unlock()
o.mu.Lock()
defer o.mu.Unlock()
if o.currentStream != nil {
// o.currentStream.Paused = true
o.currentStream.Streamer = nil
}
}
func (o *GoogleTranslateOrator) stoproutine() {
for {
<-TTSDoneChan
o.logger.Debug("orator got done signal")
o.Stop()
// drain the channel
for len(TTSTextChan) > 0 {
<-TTSTextChan
}
o.mu.Lock()
o.textBuffer.Reset()
if o.currentDone != nil {
select {
case o.currentDone <- true:
default:
// Channel might be closed, ignore
}
}
o.interrupt = true
o.mu.Unlock()
}
}
func (o *GoogleTranslateOrator) readroutine() {
tokenizer, _ := english.NewSentenceTokenizer(nil)
for {
select {
case chunk := <-TTSTextChan:
o.mu.Lock()
o.interrupt = false
_, err := o.textBuffer.WriteString(chunk)
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
o.mu.Unlock()
continue
}
text := o.textBuffer.String()
sentences := tokenizer.Tokenize(text)
o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
if len(sentences) <= 1 {
o.mu.Unlock()
continue
}
completeSentences := sentences[:len(sentences)-1]
remaining := sentences[len(sentences)-1].Text
o.textBuffer.Reset()
o.textBuffer.WriteString(remaining)
o.mu.Unlock()
for _, sentence := range completeSentences {
o.mu.Lock()
interrupted := o.interrupt
o.mu.Unlock()
if interrupted {
return
}
cleanedText := models.CleanText(sentence.Text)
if cleanedText == "" {
continue
}
o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
if err := o.Speak(cleanedText); err != nil {
o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
}
}
case <-TTSFlushChan:
o.logger.Debug("got flushchan signal start")
// lln is done get the whole message out
if len(TTSTextChan) > 0 { // otherwise might get stuck
for chunk := range TTSTextChan {
o.mu.Lock()
_, err := o.textBuffer.WriteString(chunk)
o.mu.Unlock()
if err != nil {
o.logger.Warn("failed to write to stringbuilder", "error", err)
continue
}
if len(TTSTextChan) == 0 {
break
}
}
}
o.mu.Lock()
remaining := o.textBuffer.String()
remaining = models.CleanText(remaining)
o.textBuffer.Reset()
o.mu.Unlock()
if remaining == "" {
continue
}
o.logger.Debug("calling Speak with remainder", "rem", remaining)
sentencesRem := tokenizer.Tokenize(remaining)
for _, rs := range sentencesRem { // to avoid dumping large volume of text
o.mu.Lock()
interrupt := o.interrupt
o.mu.Unlock()
if interrupt {
break
}
if err := o.Speak(rs.Text); err != nil {
o.logger.Error("tts failed", "sentence", rs.Text, "error", err)
}
}
}
}
}
func (o *GoogleTranslateOrator) GetLogger() *slog.Logger {
return o.logger
}
func (o *GoogleTranslateOrator) Speak(text string) error {
o.logger.Debug("fn: Speak is called", "text-len", len(text))
// Generate MP3 data using google-translate-tts
reader, err := o.speech.GenerateSpeech(text)
if err != nil {
o.logger.Error("generate speech failed", "error", err)
return fmt.Errorf("generate speech failed: %w", err)
}
// Decode the mp3 audio from reader (wrap with NopCloser for io.ReadCloser)
streamer, format, err := mp3.Decode(io.NopCloser(reader))
if err != nil {
o.logger.Error("mp3 decode failed", "error", err)
return fmt.Errorf("mp3 decode failed: %w", err)
}
defer streamer.Close()
playbackStreamer := beep.Streamer(streamer)
speed := o.speech.Speed
if speed <= 0 {
speed = 1.0
}
if speed != 1.0 {
playbackStreamer = beep.ResampleRatio(3, float64(speed), streamer)
}
// Initialize speaker with the format's sample rate
if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
o.logger.Debug("failed to init speaker", "error", err)
}
done := make(chan bool)
o.mu.Lock()
o.currentDone = done
o.currentStream = &beep.Ctrl{Streamer: beep.Seq(playbackStreamer, beep.Callback(func() {
o.mu.Lock()
close(done)
o.currentStream = nil
o.currentDone = nil
o.mu.Unlock()
})), Paused: false}
o.mu.Unlock()
speaker.Play(o.currentStream)
<-done // wait for playback to complete
return nil
}
func (o *GoogleTranslateOrator) Stop() {
o.logger.Debug("attempted to stop google translate orator")
speaker.Lock()
defer speaker.Unlock()
o.mu.Lock()
defer o.mu.Unlock()
if o.currentStream != nil {
o.currentStream.Streamer = nil
}
// Also stop the speech handler if possible
if o.speech != nil {
_ = o.speech.Stop()
}
}

View File

@@ -9,15 +9,13 @@ import (
"errors" "errors"
"fmt" "fmt"
"gf-lt/config" "gf-lt/config"
"io"
"log/slog" "log/slog"
"os" "os"
"os/exec" "os/exec"
"strings" "strings"
"sync" "sync"
"syscall" "syscall"
"time"
"github.com/gordonklaus/portaudio"
) )
type WhisperBinary struct { type WhisperBinary struct {
@@ -25,11 +23,143 @@ type WhisperBinary struct {
whisperPath string whisperPath string
modelPath string modelPath string
lang string lang string
ctx context.Context // Per-recording fields (protected by mu)
cancel context.CancelFunc
mu sync.Mutex mu sync.Mutex
recording bool recording bool
audioBuffer []int16 tempFile string
ctx context.Context
cancel context.CancelFunc
cmd *exec.Cmd
cmdMu sync.Mutex
}
func (w *WhisperBinary) StartRecording() error {
w.mu.Lock()
defer w.mu.Unlock()
if w.recording {
return errors.New("recording is already in progress")
}
// Fresh context for this recording
ctx, cancel := context.WithCancel(context.Background())
w.ctx = ctx
w.cancel = cancel
// Create temporary file
tempFile, err := os.CreateTemp("", "recording_*.wav")
if err != nil {
cancel()
return fmt.Errorf("failed to create temp file: %w", err)
}
tempFile.Close()
w.tempFile = tempFile.Name()
// ffmpeg command: capture from default microphone, write WAV
args := []string{
"-f", "alsa", // or "pulse" if preferred
"-i", "default",
"-acodec", "pcm_s16le",
"-ar", "16000",
"-ac", "1",
"-y", // overwrite output file
w.tempFile,
}
cmd := exec.CommandContext(w.ctx, "ffmpeg", args...)
// Capture stderr for debugging (optional, but useful for diagnosing)
stderr, err := cmd.StderrPipe()
if err != nil {
cancel()
os.Remove(w.tempFile)
return fmt.Errorf("failed to create stderr pipe: %w", err)
}
go func() {
buf := make([]byte, 1024)
for {
n, err := stderr.Read(buf)
if n > 0 {
w.logger.Debug("ffmpeg stderr", "output", string(buf[:n]))
}
if err != nil {
break
}
}
}()
w.cmdMu.Lock()
w.cmd = cmd
w.cmdMu.Unlock()
if err := cmd.Start(); err != nil {
cancel()
os.Remove(w.tempFile)
return fmt.Errorf("failed to start ffmpeg: %w", err)
}
w.recording = true
w.logger.Debug("Recording started", "file", w.tempFile)
return nil
}
func (w *WhisperBinary) StopRecording() (string, error) {
w.mu.Lock()
defer w.mu.Unlock()
if !w.recording {
return "", errors.New("not currently recording")
}
w.recording = false
// Gracefully stop ffmpeg
w.cmdMu.Lock()
if w.cmd != nil && w.cmd.Process != nil {
w.logger.Debug("Sending SIGTERM to ffmpeg")
w.cmd.Process.Signal(syscall.SIGTERM)
// Wait for process to exit (up to 2 seconds)
done := make(chan error, 1)
go func() {
done <- w.cmd.Wait()
}()
select {
case <-done:
w.logger.Debug("ffmpeg exited after SIGTERM")
case <-time.After(2 * time.Second):
w.logger.Warn("ffmpeg did not exit, sending SIGKILL")
w.cmd.Process.Kill()
<-done
}
}
w.cmdMu.Unlock()
// Cancel context (already done, but for cleanliness)
if w.cancel != nil {
w.cancel()
}
// Validate temp file
if w.tempFile == "" {
return "", errors.New("no recording file")
}
defer os.Remove(w.tempFile)
info, err := os.Stat(w.tempFile)
if err != nil {
return "", fmt.Errorf("failed to stat temp file: %w", err)
}
if info.Size() < 44 { // WAV header is 44 bytes
// Log ffmpeg stderr? Already captured in debug logs.
return "", fmt.Errorf("recording file too small (%d bytes), possibly no audio captured", info.Size())
}
// Run whisper.cpp binary
cmd := exec.Command(w.whisperPath, "-m", w.modelPath, "-l", w.lang, w.tempFile)
var outBuf, errBuf bytes.Buffer
cmd.Stdout = &outBuf
cmd.Stderr = &errBuf
if err := cmd.Run(); err != nil {
w.logger.Error("whisper binary failed",
"error", err,
"stderr", errBuf.String(),
"file_size", info.Size())
return "", fmt.Errorf("whisper binary failed: %w (stderr: %s)", err, errBuf.String())
}
result := strings.TrimRight(outBuf.String(), "\n")
result = specialRE.ReplaceAllString(result, "")
return strings.TrimSpace(strings.ReplaceAll(result, "\n ", "\n")), nil
}
// IsRecording returns true if a recording is in progress.
func (w *WhisperBinary) IsRecording() bool {
w.mu.Lock()
defer w.mu.Unlock()
return w.recording
} }
func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary { func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
@@ -44,283 +174,3 @@ func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
cancel: cancel, cancel: cancel,
} }
} }
func (w *WhisperBinary) StartRecording() error {
w.mu.Lock()
defer w.mu.Unlock()
if w.recording {
return errors.New("recording is already in progress")
}
// If context is cancelled, create a new one for the next recording session
if w.ctx.Err() != nil {
w.logger.Debug("Context cancelled, creating new context")
w.ctx, w.cancel = context.WithCancel(context.Background())
}
// Temporarily redirect stderr to suppress ALSA warnings during PortAudio init
origStderr, errDup := syscall.Dup(syscall.Stderr)
if errDup != nil {
return fmt.Errorf("failed to dup stderr: %w", errDup)
}
nullFD, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
if err != nil {
_ = syscall.Close(origStderr) // Close the dup'd fd if open fails
return fmt.Errorf("failed to open /dev/null: %w", err)
}
// redirect stderr
_ = syscall.Dup2(nullFD, syscall.Stderr)
// Initialize PortAudio (this is where ALSA warnings occur)
portaudioErr := portaudio.Initialize()
defer func() {
// Restore stderr
_ = syscall.Dup2(origStderr, syscall.Stderr)
_ = syscall.Close(origStderr)
_ = syscall.Close(nullFD)
}()
if portaudioErr != nil {
return fmt.Errorf("portaudio init failed: %w", portaudioErr)
}
// Initialize audio buffer
w.audioBuffer = make([]int16, 0)
in := make([]int16, 1024) // buffer size
stream, err := portaudio.OpenDefaultStream(1, 0, 16000.0, len(in), in)
if err != nil {
if paErr := portaudio.Terminate(); paErr != nil {
return fmt.Errorf("failed to open microphone: %w; terminate error: %w", err, paErr)
}
return fmt.Errorf("failed to open microphone: %w", err)
}
go w.recordAudio(stream, in)
w.recording = true
w.logger.Debug("Recording started")
return nil
}
func (w *WhisperBinary) recordAudio(stream *portaudio.Stream, in []int16) {
defer func() {
w.logger.Debug("recordAudio defer function called")
_ = stream.Stop() // Stop the stream
_ = portaudio.Terminate() // ignoring error as we're shutting down
w.logger.Debug("recordAudio terminated")
}()
w.logger.Debug("Starting audio stream")
if err := stream.Start(); err != nil {
w.logger.Error("Failed to start audio stream", "error", err)
return
}
w.logger.Debug("Audio stream started, entering recording loop")
for {
select {
case <-w.ctx.Done():
w.logger.Debug("Context done, exiting recording loop")
return
default:
// Check recording status with minimal lock time
w.mu.Lock()
recording := w.recording
w.mu.Unlock()
if !recording {
w.logger.Debug("Recording flag is false, exiting recording loop")
return
}
if err := stream.Read(); err != nil {
w.logger.Error("Error reading from stream", "error", err)
return
}
// Append samples to buffer - only acquire lock when necessary
w.mu.Lock()
if w.audioBuffer == nil {
w.audioBuffer = make([]int16, 0)
}
// Make a copy of the input buffer to avoid overwriting
tempBuffer := make([]int16, len(in))
copy(tempBuffer, in)
w.audioBuffer = append(w.audioBuffer, tempBuffer...)
w.mu.Unlock()
}
}
}
func (w *WhisperBinary) StopRecording() (string, error) {
w.logger.Debug("StopRecording called")
w.mu.Lock()
if !w.recording {
w.mu.Unlock()
return "", errors.New("not currently recording")
}
w.logger.Debug("Setting recording to false and cancelling context")
w.recording = false
w.cancel() // This will stop the recording goroutine
w.mu.Unlock()
// // Small delay to allow the recording goroutine to react to context cancellation
// time.Sleep(20 * time.Millisecond)
// Save the recorded audio to a temporary file
tempFile, err := w.saveAudioToTempFile()
if err != nil {
w.logger.Error("Error saving audio to temp file", "error", err)
return "", fmt.Errorf("failed to save audio to temp file: %w", err)
}
w.logger.Debug("Saved audio to temp file", "file", tempFile)
// Run the whisper binary with a separate context to avoid cancellation during transcription
cmd := exec.Command(w.whisperPath, "-m", w.modelPath, "-l", w.lang, tempFile, "2>/dev/null")
var outBuf bytes.Buffer
cmd.Stdout = &outBuf
// Redirect stderr to suppress ALSA warnings and other stderr output
cmd.Stderr = io.Discard // Suppress stderr output from whisper binary
w.logger.Debug("Running whisper binary command")
if err := cmd.Run(); err != nil {
// Clean up audio buffer
w.mu.Lock()
w.audioBuffer = nil
w.mu.Unlock()
// Since we're suppressing stderr, we'll just log that the command failed
w.logger.Error("Error running whisper binary", "error", err)
return "", fmt.Errorf("whisper binary failed: %w", err)
}
result := outBuf.String()
w.logger.Debug("Whisper binary completed", "result", result)
// Clean up audio buffer
w.mu.Lock()
w.audioBuffer = nil
w.mu.Unlock()
// Clean up the temporary file after transcription
w.logger.Debug("StopRecording completed")
os.Remove(tempFile)
result = strings.TrimRight(result, "\n")
// in case there are special tokens like [_BEG_]
result = specialRE.ReplaceAllString(result, "")
return strings.TrimSpace(strings.ReplaceAll(result, "\n ", "\n")), nil
}
// saveAudioToTempFile saves the recorded audio data to a temporary WAV file
func (w *WhisperBinary) saveAudioToTempFile() (string, error) {
w.logger.Debug("saveAudioToTempFile called")
// Create temporary WAV file
tempFile, err := os.CreateTemp("", "recording_*.wav")
if err != nil {
w.logger.Error("Failed to create temp file", "error", err)
return "", fmt.Errorf("failed to create temp file: %w", err)
}
w.logger.Debug("Created temp file", "file", tempFile.Name())
defer tempFile.Close()
// Write WAV header and data
w.logger.Debug("About to write WAV file", "file", tempFile.Name())
err = w.writeWAVFile(tempFile.Name())
if err != nil {
w.logger.Error("Error writing WAV file", "error", err)
return "", fmt.Errorf("failed to write WAV file: %w", err)
}
w.logger.Debug("WAV file written successfully", "file", tempFile.Name())
return tempFile.Name(), nil
}
// writeWAVFile creates a WAV file from the recorded audio data
func (w *WhisperBinary) writeWAVFile(filename string) error {
w.logger.Debug("writeWAVFile called", "filename", filename)
// Open file for writing
file, err := os.Create(filename)
if err != nil {
w.logger.Error("Error creating file", "error", err)
return err
}
defer file.Close()
w.logger.Debug("About to acquire mutex in writeWAVFile")
w.mu.Lock()
w.logger.Debug("Locked mutex, copying audio buffer")
audioData := make([]int16, len(w.audioBuffer))
copy(audioData, w.audioBuffer)
w.mu.Unlock()
w.logger.Debug("Unlocked mutex", "audio_data_length", len(audioData))
if len(audioData) == 0 {
w.logger.Warn("No audio data to write")
return errors.New("no audio data to write")
}
// Calculate data size (number of samples * size of int16)
dataSize := len(audioData) * 2 // 2 bytes per int16 sample
w.logger.Debug("Calculated data size", "size", dataSize)
// Write WAV header with the correct data size
header := w.createWAVHeader(16000, 1, 16, dataSize)
_, err = file.Write(header)
if err != nil {
w.logger.Error("Error writing WAV header", "error", err)
return err
}
w.logger.Debug("WAV header written successfully")
// Write audio data
w.logger.Debug("About to write audio data samples")
for i, sample := range audioData {
// Write little-endian 16-bit sample
_, err := file.Write([]byte{byte(sample), byte(sample >> 8)})
if err != nil {
w.logger.Error("Error writing sample", "index", i, "error", err)
return err
}
// Log progress every 10000 samples to avoid too much output
if i%10000 == 0 {
w.logger.Debug("Written samples", "count", i)
}
}
w.logger.Debug("All audio data written successfully")
return nil
}
// createWAVHeader creates a WAV file header
func (w *WhisperBinary) createWAVHeader(sampleRate, channels, bitsPerSample int, dataSize int) []byte {
header := make([]byte, 44)
copy(header[0:4], "RIFF")
// Total file size will be updated later
copy(header[8:12], "WAVE")
copy(header[12:16], "fmt ")
// fmt chunk size (16 for PCM)
header[16] = 16
header[17] = 0
header[18] = 0
header[19] = 0
// Audio format (1 = PCM)
header[20] = 1
header[21] = 0
// Number of channels
header[22] = byte(channels)
header[23] = 0
// Sample rate
header[24] = byte(sampleRate)
header[25] = byte(sampleRate >> 8)
header[26] = byte(sampleRate >> 16)
header[27] = byte(sampleRate >> 24)
// Byte rate
byteRate := sampleRate * channels * bitsPerSample / 8
header[28] = byte(byteRate)
header[29] = byte(byteRate >> 8)
header[30] = byte(byteRate >> 16)
header[31] = byte(byteRate >> 24)
// Block align
blockAlign := channels * bitsPerSample / 8
header[32] = byte(blockAlign)
header[33] = 0
// Bits per sample
header[34] = byte(bitsPerSample)
header[35] = 0
// "data" subchunk
copy(header[36:40], "data")
// Data size
header[40] = byte(dataSize)
header[41] = byte(dataSize >> 8)
header[42] = byte(dataSize >> 16)
header[43] = byte(dataSize >> 24)
return header
}
func (w *WhisperBinary) IsRecording() bool {
w.mu.Lock()
defer w.mu.Unlock()
return w.recording
}

156
extra/whisper_server.go Normal file
View File

@@ -0,0 +1,156 @@
//go:build extra
// +build extra
package extra
import (
"bytes"
"errors"
"fmt"
"io"
"log/slog"
"mime/multipart"
"net/http"
"os/exec"
"strings"
"sync"
)
type WhisperServer struct {
logger *slog.Logger
ServerURL string
SampleRate int
AudioBuffer *bytes.Buffer
recording bool // protected by mu
mu sync.Mutex // protects recording & AudioBuffer
cmd *exec.Cmd // protected by cmdMu
stopCh chan struct{} // protected by cmdMu
cmdMu sync.Mutex // protects cmd and stopCh
}
func (stt *WhisperServer) StartRecording() error {
stt.mu.Lock()
defer stt.mu.Unlock()
if stt.recording {
return nil
}
// Build ffmpeg command for microphone capture
args := []string{
"-f", "alsa",
"-i", "default",
"-acodec", "pcm_s16le",
"-ar", fmt.Sprint(stt.SampleRate),
"-ac", "1",
"-f", "s16le",
"-",
}
cmd := exec.Command("ffmpeg", args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("failed to get stdout pipe: %w", err)
}
stt.cmdMu.Lock()
stt.cmd = cmd
stt.stopCh = make(chan struct{})
stt.cmdMu.Unlock()
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start ffmpeg: %w", err)
}
stt.recording = true
stt.AudioBuffer.Reset()
// Read PCM data in goroutine
go func() {
buf := make([]byte, 4096)
for {
select {
case <-stt.stopCh:
return
default:
n, err := stdout.Read(buf)
if n > 0 {
stt.mu.Lock()
stt.AudioBuffer.Write(buf[:n])
stt.mu.Unlock()
}
if err != nil {
if err != io.EOF {
stt.logger.Error("recording read error", "error", err)
}
return
}
}
}
}()
return nil
}
func (stt *WhisperServer) StopRecording() (string, error) {
stt.mu.Lock()
defer stt.mu.Unlock()
if !stt.recording {
return "", errors.New("not recording")
}
stt.recording = false
// Stop ffmpeg
stt.cmdMu.Lock()
if stt.cmd != nil && stt.cmd.Process != nil {
stt.cmd.Process.Kill()
stt.cmd.Wait()
}
close(stt.stopCh)
stt.cmdMu.Unlock()
// Rest of StopRecording unchanged (WAV header + HTTP upload)
// ...
stt.recording = false
// wait loop to finish?
if stt.AudioBuffer == nil {
err := errors.New("unexpected nil AudioBuffer")
stt.logger.Error(err.Error())
return "", err
}
// Create WAV header first
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
// Add audio file part
part, err := writer.CreateFormFile("file", "recording.wav")
if err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
// Stream directly to multipart writer: header + raw data
dataSize := stt.AudioBuffer.Len()
stt.writeWavHeader(part, dataSize)
if _, err := io.Copy(part, stt.AudioBuffer); err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
// Reset buffer for next recording
stt.AudioBuffer.Reset()
// Add response format field
err = writer.WriteField("response_format", "text")
if err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
if writer.Close() != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
// Send request
resp, err := http.Post(stt.ServerURL, writer.FormDataContentType(), body) //nolint:noctx
if err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
defer resp.Body.Close()
// Read and print response
responseTextBytes, err := io.ReadAll(resp.Body)
if err != nil {
stt.logger.Error("fn: StopRecording", "error", err)
return "", err
}
resptext := strings.TrimRight(string(responseTextBytes), "\n")
// in case there are special tokens like [_BEG_]
resptext = specialRE.ReplaceAllString(resptext, "")
return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
}

17
go.mod
View File

@@ -4,39 +4,40 @@ go 1.25.1
require ( require (
github.com/BurntSushi/toml v1.5.0 github.com/BurntSushi/toml v1.5.0
github.com/GrailFinder/google-translate-tts v0.1.3 github.com/GrailFinder/google-translate-tts v0.1.4
github.com/GrailFinder/searchagent v0.2.0 github.com/GrailFinder/searchagent v0.2.0
github.com/PuerkitoBio/goquery v1.11.0 github.com/PuerkitoBio/goquery v1.11.0
github.com/deckarep/golang-set/v2 v2.8.0
github.com/gdamore/tcell/v2 v2.13.2 github.com/gdamore/tcell/v2 v2.13.2
github.com/glebarez/go-sqlite v1.22.0 github.com/glebarez/go-sqlite v1.22.0
github.com/gopxl/beep/v2 v2.1.1
github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b
github.com/jmoiron/sqlx v1.4.0 github.com/jmoiron/sqlx v1.4.0
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728
github.com/neurosnap/sentences v1.1.2 github.com/neurosnap/sentences v1.1.2
github.com/playwright-community/playwright-go v0.5700.1 github.com/playwright-community/playwright-go v0.5700.1
github.com/rivo/tview v0.42.0 github.com/rivo/tview v0.42.0
github.com/sugarme/tokenizer v0.3.0
github.com/yalue/onnxruntime_go v1.27.0
github.com/yuin/goldmark v1.4.13 github.com/yuin/goldmark v1.4.13
) )
require ( require (
github.com/andybalholm/cascadia v1.3.3 // indirect github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/deckarep/golang-set/v2 v2.8.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect
github.com/ebitengine/oto/v3 v3.4.0 // indirect github.com/emirpasic/gods v1.18.1 // indirect
github.com/ebitengine/purego v0.9.1 // indirect
github.com/gdamore/encoding v1.0.1 // indirect github.com/gdamore/encoding v1.0.1 // indirect
github.com/go-jose/go-jose/v3 v3.0.4 // indirect github.com/go-jose/go-jose/v3 v3.0.4 // indirect
github.com/go-stack/stack v1.8.1 // indirect github.com/go-stack/stack v1.8.1 // indirect
github.com/google/uuid v1.6.0 // indirect github.com/google/uuid v1.6.0 // indirect
github.com/hajimehoshi/go-mp3 v0.3.4 // indirect github.com/hajimehoshi/go-mp3 v0.3.4 // indirect
github.com/hajimehoshi/oto/v2 v2.3.1 // indirect
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/ncruces/go-strftime v1.0.0 // indirect github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/pkg/errors v0.9.1 // indirect github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.4.7 // indirect github.com/rivo/uniseg v0.4.7 // indirect
github.com/schollz/progressbar/v2 v2.15.0 // indirect
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c // indirect
golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect
golang.org/x/net v0.48.0 // indirect golang.org/x/net v0.48.0 // indirect
golang.org/x/sys v0.39.0 // indirect golang.org/x/sys v0.39.0 // indirect

30
go.sum
View File

@@ -2,8 +2,8 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg= github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=
github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/GrailFinder/google-translate-tts v0.1.3 h1:Mww9tNzTWjjSh+OCbTPl/+21oMPKcUecXZfU7nTB/lA= github.com/GrailFinder/google-translate-tts v0.1.4 h1:NJoPZUGfBrmouQMN19MUcNPNUx4tmf4a8OZRME4E4Mg=
github.com/GrailFinder/google-translate-tts v0.1.3/go.mod h1:YIOLKR7sObazdUCrSex3u9OVBovU55eYgWa25vsQJ18= github.com/GrailFinder/google-translate-tts v0.1.4/go.mod h1:YIOLKR7sObazdUCrSex3u9OVBovU55eYgWa25vsQJ18=
github.com/GrailFinder/searchagent v0.2.0 h1:U2GVjLh/9xZt0xX9OcYk9Q2fMkyzyTiADPUmUisRdtQ= github.com/GrailFinder/searchagent v0.2.0 h1:U2GVjLh/9xZt0xX9OcYk9Q2fMkyzyTiADPUmUisRdtQ=
github.com/GrailFinder/searchagent v0.2.0/go.mod h1:d66tn5+22LI8IGJREUsRBT60P0sFdgQgvQRqyvgItrs= github.com/GrailFinder/searchagent v0.2.0/go.mod h1:d66tn5+22LI8IGJREUsRBT60P0sFdgQgvQRqyvgItrs=
github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw= github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
@@ -17,10 +17,8 @@ github.com/deckarep/golang-set/v2 v2.8.0 h1:swm0rlPCmdWn9mESxKOjWk8hXSqoxOp+Zlfu
github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/bQ= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/ebitengine/oto/v3 v3.4.0/go.mod h1:IOleLVD0m+CMak3mRVwsYY8vTctQgOM0iiL6S7Ar7eI= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/gdamore/encoding v1.0.1 h1:YzKZckdBL6jVt2Gc+5p82qhrGiqMdG/eNs6Wy0u3Uhw= github.com/gdamore/encoding v1.0.1 h1:YzKZckdBL6jVt2Gc+5p82qhrGiqMdG/eNs6Wy0u3Uhw=
github.com/gdamore/encoding v1.0.1/go.mod h1:0Z0cMFinngz9kS1QfMjCP8TY7em3bZYeeklsSDPivEo= github.com/gdamore/encoding v1.0.1/go.mod h1:0Z0cMFinngz9kS1QfMjCP8TY7em3bZYeeklsSDPivEo=
github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx2g= github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx2g=
@@ -39,13 +37,8 @@ github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17k
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gopxl/beep/v2 v2.1.1 h1:6FYIYMm2qPAdWkjX+7xwKrViS1x0Po5kDMdRkq8NVbU=
github.com/gopxl/beep/v2 v2.1.1/go.mod h1:ZAm9TGQ9lvpoiFLd4zf5B1IuyxZhgRACMId1XJbaW0E=
github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b h1:WEuQWBxelOGHA6z9lABqaMLMrfwVyMdN3UgRLT+YUPo=
github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b/go.mod h1:esZFQEUwqC+l76f2R8bIWSwXMaPbp79PppwZ1eJhFco=
github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68= github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo= github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
github.com/hajimehoshi/oto/v2 v2.3.1 h1:qrLKpNus2UfD674oxckKjNJmesp9hMh7u7QCrStB3Rc=
github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo= github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
@@ -61,12 +54,14 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7ZoUw= github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7ZoUw=
github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ= github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U= github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U=
github.com/playwright-community/playwright-go v0.5700.1/go.mod h1:MlSn1dZrx8rszbCxY6x3qK89ZesJUYVx21B2JnkoNF0= github.com/playwright-community/playwright-go v0.5700.1/go.mod h1:MlSn1dZrx8rszbCxY6x3qK89ZesJUYVx21B2JnkoNF0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -77,10 +72,19 @@ github.com/rivo/tview v0.42.0 h1:b/ftp+RxtDsHSaynXTbJb+/n/BxDEi+W3UfF5jILK6c=
github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY= github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/schollz/progressbar/v2 v2.15.0 h1:dVzHQ8fHRmtPjD3K10jT3Qgn/+H+92jhPrhmxIJfDz8=
github.com/schollz/progressbar/v2 v2.15.0/go.mod h1:UdPq3prGkfQ7MOzZKlDRpYKcFqEMczbD7YmbPgpzKMI=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c h1:pwb4kNSHb4K89ymCaN+5lPH/MwnfSVg4rzGDh4d+iy4=
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c/go.mod h1:2gwkXLWbDGUQWeL3RtpCmcY4mzCtU13kb9UsAg9xMaw=
github.com/sugarme/tokenizer v0.3.0 h1:FE8DYbNSz/kSbgEo9l/RjgYHkIJYEdskumitFQBE9FE=
github.com/sugarme/tokenizer v0.3.0/go.mod h1:VJ+DLK5ZEZwzvODOWwY0cw+B1dabTd3nCB5HuFCItCc=
github.com/yalue/onnxruntime_go v1.27.0 h1:c1YSgDNtpf0WGtxj3YeRIb8VC5LmM1J+Ve3uHdteC1U=
github.com/yalue/onnxruntime_go v1.27.0/go.mod h1:b4X26A8pekNb1ACJ58wAXgNKeUCGEAQ9dmACut9Sm/4=
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=

View File

@@ -5,7 +5,6 @@ import (
"gf-lt/models" "gf-lt/models"
"gf-lt/pngmeta" "gf-lt/pngmeta"
"image" "image"
"net/url"
"os" "os"
"os/exec" "os/exec"
"path" "path"
@@ -13,6 +12,7 @@ import (
"slices" "slices"
"strconv" "strconv"
"strings" "strings"
"sync/atomic"
"time" "time"
"unicode" "unicode"
@@ -20,7 +20,8 @@ import (
) )
// Cached model color - updated by background goroutine // Cached model color - updated by background goroutine
var cachedModelColor string = "orange" // var cachedModelColor string = "orange"
var cachedModelColor atomic.Value
// startModelColorUpdater starts a background goroutine that periodically updates // startModelColorUpdater starts a background goroutine that periodically updates
// the cached model color. Only runs HTTP requests for local llama.cpp APIs. // the cached model color. Only runs HTTP requests for local llama.cpp APIs.
@@ -39,20 +40,20 @@ func startModelColorUpdater() {
// updateCachedModelColor updates the global cachedModelColor variable // updateCachedModelColor updates the global cachedModelColor variable
func updateCachedModelColor() { func updateCachedModelColor() {
if !isLocalLlamacpp() { if !isLocalLlamacpp() {
cachedModelColor = "orange" cachedModelColor.Store("orange")
return return
} }
// Check if model is loaded // Check if model is loaded
loaded, err := isModelLoaded(chatBody.Model) loaded, err := isModelLoaded(chatBody.Model)
if err != nil { if err != nil {
// On error, assume not loaded (red) // On error, assume not loaded (red)
cachedModelColor = "red" cachedModelColor.Store("red")
return return
} }
if loaded { if loaded {
cachedModelColor = "green" cachedModelColor.Store("green")
} else { } else {
cachedModelColor = "red" cachedModelColor.Store("red")
} }
} }
@@ -108,7 +109,7 @@ func refreshChatDisplay() {
textView.SetText(displayText) textView.SetText(displayText)
colorText() colorText()
updateStatusLine() updateStatusLine()
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
} }
@@ -323,19 +324,17 @@ func strInSlice(s string, sl []string) bool {
// isLocalLlamacpp checks if the current API is a local llama.cpp instance. // isLocalLlamacpp checks if the current API is a local llama.cpp instance.
func isLocalLlamacpp() bool { func isLocalLlamacpp() bool {
u, err := url.Parse(cfg.CurrentAPI) if strings.Contains(cfg.CurrentAPI, "openrouter") || strings.Contains(cfg.CurrentAPI, "deepseek") {
if err != nil {
return false return false
} }
host := u.Hostname() return true
return host == "localhost" || host == "127.0.0.1" || host == "::1"
} }
// getModelColor returns the cached color tag for the model name. // getModelColor returns the cached color tag for the model name.
// The cached value is updated by a background goroutine every 5 seconds. // The cached value is updated by a background goroutine every 5 seconds.
// For non-local models, returns orange. For local llama.cpp models, returns green if loaded, red if not. // For non-local models, returns orange. For local llama.cpp models, returns green if loaded, red if not.
func getModelColor() string { func getModelColor() string {
return cachedModelColor return cachedModelColor.Load().(string)
} }
func makeStatusLine() string { func makeStatusLine() string {
@@ -521,7 +520,7 @@ func updateFlexLayout() {
if shellMode { if shellMode {
flex.AddItem(shellInput, 0, 10, false) flex.AddItem(shellInput, 0, 10, false)
} else { } else {
flex.AddItem(textArea, 0, 10, false) flex.AddItem(bottomFlex, 0, 10, true)
} }
if positionVisible { if positionVisible {
flex.AddItem(statusLineWidget, 0, 2, false) flex.AddItem(statusLineWidget, 0, 2, false)
@@ -542,7 +541,7 @@ func executeCommandAndDisplay(cmdText string) {
cmdText = strings.TrimSpace(cmdText) cmdText = strings.TrimSpace(cmdText)
if cmdText == "" { if cmdText == "" {
fmt.Fprintf(textView, "\n[red]Error: No command provided[-:-:-]\n") fmt.Fprintf(textView, "\n[red]Error: No command provided[-:-:-]\n")
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
colorText() colorText()
@@ -574,7 +573,7 @@ func executeCommandAndDisplay(cmdText string) {
Content: "$ " + cmdText + "\n\n" + outputContent, Content: "$ " + cmdText + "\n\n" + outputContent,
} }
chatBody.Messages = append(chatBody.Messages, combinedMsg) chatBody.Messages = append(chatBody.Messages, combinedMsg)
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
colorText() colorText()
@@ -589,7 +588,7 @@ func executeCommandAndDisplay(cmdText string) {
Content: "$ " + cmdText + "\n\n" + outputContent, Content: "$ " + cmdText + "\n\n" + outputContent,
} }
chatBody.Messages = append(chatBody.Messages, combinedMsg) chatBody.Messages = append(chatBody.Messages, combinedMsg)
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
colorText() colorText()
@@ -637,7 +636,7 @@ func executeCommandAndDisplay(cmdText string) {
} }
chatBody.Messages = append(chatBody.Messages, combinedMsg) chatBody.Messages = append(chatBody.Messages, combinedMsg)
// Scroll to end and update colors // Scroll to end and update colors
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
colorText() colorText()
@@ -967,3 +966,52 @@ func extractDisplayPath(p, bp string) string {
} }
return p return p
} }
func getValidKnowToRecipient(msg *models.RoleMsg) (string, bool) {
if cfg == nil || !cfg.CharSpecificContextEnabled {
return "", false
}
// case where all roles are in the tag => public message
cr := listChatRoles()
slices.Sort(cr)
slices.Sort(msg.KnownTo)
if slices.Equal(cr, msg.KnownTo) {
logger.Info("got msg with tag mentioning every role")
return "", false
}
// Check each character in the KnownTo list
for _, recipient := range msg.KnownTo {
if recipient == msg.Role || recipient == cfg.ToolRole {
// weird cases, skip
continue
}
// Skip if this is the user character (user handles their own turn)
// If user is in KnownTo, stop processing - it's the user's turn
if recipient == cfg.UserRole || recipient == cfg.WriteNextMsgAs {
return "", false
}
return recipient, true
}
return "", false
}
// triggerPrivateMessageResponses checks if a message was sent privately to specific characters
// and triggers those non-user characters to respond
func triggerPrivateMessageResponses(msg *models.RoleMsg) {
recipient, ok := getValidKnowToRecipient(msg)
if !ok || recipient == "" {
return
}
// Trigger the recipient character to respond
triggerMsg := recipient + ":\n"
// Send empty message so LLM continues naturally from the conversation
crr := &models.ChatRoundReq{
UserMsg: triggerMsg,
Role: recipient,
Resume: true,
}
fmt.Fprintf(textView, "\n[-:-:b](%d) ", len(chatBody.Messages))
fmt.Fprint(textView, roleToIcon(recipient))
fmt.Fprint(textView, "[-:-:-]\n")
chatRoundChan <- crr
}

9
llm.go
View File

@@ -62,11 +62,11 @@ type ChunkParser interface {
func choseChunkParser() { func choseChunkParser() {
chunkParser = LCPCompletion{} chunkParser = LCPCompletion{}
switch cfg.CurrentAPI { switch cfg.CurrentAPI {
case "http://localhost:8080/completion": case "http://localhost:8080/completion", "http://127.0.0.1:8080/completion":
chunkParser = LCPCompletion{} chunkParser = LCPCompletion{}
logger.Debug("chosen lcpcompletion", "link", cfg.CurrentAPI) logger.Debug("chosen lcpcompletion", "link", cfg.CurrentAPI)
return return
case "http://localhost:8080/v1/chat/completions": case "http://localhost:8080/v1/chat/completions", "http://127.0.0.1:8080/v1/chat/completions":
chunkParser = LCPChat{} chunkParser = LCPChat{}
logger.Debug("chosen lcpchat", "link", cfg.CurrentAPI) logger.Debug("chosen lcpchat", "link", cfg.CurrentAPI)
return return
@@ -87,6 +87,11 @@ func choseChunkParser() {
logger.Debug("chosen openrouterchat", "link", cfg.CurrentAPI) logger.Debug("chosen openrouterchat", "link", cfg.CurrentAPI)
return return
default: default:
logger.Warn("unexpected case, assuming llama.cpp on non default address", "link", cfg.CurrentAPI)
if strings.Contains(cfg.CurrentAPI, "chat") {
chunkParser = LCPChat{}
return
}
chunkParser = LCPCompletion{} chunkParser = LCPCompletion{}
} }
} }

View File

@@ -1,13 +1,15 @@
package main package main
import ( import (
"sync/atomic"
"github.com/rivo/tview" "github.com/rivo/tview"
) )
var ( var (
boolColors = map[bool]string{true: "green", false: "red"} boolColors = map[bool]string{true: "green", false: "red"}
botRespMode = false botRespMode atomic.Bool
toolRunningMode = false toolRunningMode atomic.Bool
editMode = false editMode = false
roleEditMode = false roleEditMode = false
injectRole = true injectRole = true

View File

@@ -115,18 +115,12 @@ func makePropsTable(props map[string]float32) *tview.Table {
row++ row++
} }
// Add checkboxes // Add checkboxes
addCheckboxRow("RAG use", cfg.RAGEnabled, func(checked bool) {
cfg.RAGEnabled = checked
})
addCheckboxRow("Inject role", injectRole, func(checked bool) { addCheckboxRow("Inject role", injectRole, func(checked bool) {
injectRole = checked injectRole = checked
}) })
addCheckboxRow("TTS Enabled", cfg.TTS_ENABLED, func(checked bool) { addCheckboxRow("TTS Enabled", cfg.TTS_ENABLED, func(checked bool) {
cfg.TTS_ENABLED = checked cfg.TTS_ENABLED = checked
}) })
addCheckboxRow("Auto clean tool calls from context", cfg.AutoCleanToolCallsFromCtx, func(checked bool) {
cfg.AutoCleanToolCallsFromCtx = checked
})
addCheckboxRow("Enable Mouse", cfg.EnableMouse, func(checked bool) { addCheckboxRow("Enable Mouse", cfg.EnableMouse, func(checked bool) {
cfg.EnableMouse = checked cfg.EnableMouse = checked
// Reconfigure the app's mouse setting // Reconfigure the app's mouse setting

View File

@@ -9,6 +9,13 @@ import (
"gf-lt/models" "gf-lt/models"
"log/slog" "log/slog"
"net/http" "net/http"
"os"
"sync"
"time"
"github.com/sugarme/tokenizer"
"github.com/sugarme/tokenizer/pretrained"
"github.com/yalue/onnxruntime_go"
) )
// Embedder defines the interface for embedding text // Embedder defines the interface for embedding text
@@ -27,7 +34,9 @@ type APIEmbedder struct {
func NewAPIEmbedder(l *slog.Logger, cfg *config.Config) *APIEmbedder { func NewAPIEmbedder(l *slog.Logger, cfg *config.Config) *APIEmbedder {
return &APIEmbedder{ return &APIEmbedder{
logger: l, logger: l,
client: &http.Client{}, client: &http.Client{
Timeout: 30 * time.Second,
},
cfg: cfg, cfg: cfg,
} }
} }
@@ -134,11 +143,302 @@ func (a *APIEmbedder) EmbedSlice(lines []string) ([][]float32, error) {
return embeddings, nil return embeddings, nil
} }
// TODO: ONNXEmbedder implementation would go here
// This would require:
// 1. Loading ONNX models locally // 1. Loading ONNX models locally
// 2. Using a Go ONNX runtime (like gorgonia/onnx or similar) // 2. Using a Go ONNX runtime (like gorgonia/onnx or similar)
// 3. Converting text to embeddings without external API calls // 3. Converting text to embeddings without external API calls
// type ONNXEmbedder struct {
// For now, we'll focus on the API implementation which is already working in the current system, session *onnxruntime_go.DynamicAdvancedSession
// and can be extended later when we have ONNX runtime integration tokenizer *tokenizer.Tokenizer
tokenizerPath string
dims int
logger *slog.Logger
mu sync.Mutex
modelPath string
}
var onnxInitOnce sync.Once
var onnxReady bool
var onnxLibPath string
var cudaLibPath string
var onnxLibPaths = []string{
"/usr/lib/libonnxruntime.so",
"/usr/lib/libonnxruntime.so.1.24.2",
"/usr/local/lib/libonnxruntime.so",
"/usr/lib/x86_64-linux-gnu/libonnxruntime.so",
"/opt/onnxruntime/lib/libonnxruntime.so",
}
var cudaLibPaths = []string{
"/usr/lib/libonnxruntime_providers_cuda.so",
"/usr/local/lib/libonnxruntime_providers_cuda.so",
"/opt/onnxruntime/lib/libonnxruntime_providers_cuda.so",
}
func findONNXLibrary() string {
for _, path := range onnxLibPaths {
if _, err := os.Stat(path); err == nil {
return path
}
}
return ""
}
func findCUDALibrary() string {
for _, path := range cudaLibPaths {
if _, err := os.Stat(path); err == nil {
return path
}
}
return ""
}
func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Logger) (*ONNXEmbedder, error) {
// Check if model and tokenizer files exist
if _, err := os.Stat(modelPath); err != nil {
return nil, fmt.Errorf("ONNX model not found: %w", err)
}
if _, err := os.Stat(tokenizerPath); err != nil {
return nil, fmt.Errorf("tokenizer not found: %w", err)
}
// Find ONNX library
onnxLibPath = findONNXLibrary()
if onnxLibPath == "" {
return nil, errors.New("ONNX runtime library not found in standard locations")
}
// Find CUDA provider library (optional)
cudaLibPath = findCUDALibrary()
if cudaLibPath == "" {
fmt.Println("WARNING: CUDA provider library not found, will use CPU")
}
emb := &ONNXEmbedder{
tokenizerPath: tokenizerPath,
dims: dims,
logger: logger,
modelPath: modelPath,
}
return emb, nil
}
func (e *ONNXEmbedder) ensureInitialized() error {
if e.session != nil {
return nil
}
e.mu.Lock()
defer e.mu.Unlock()
if e.session != nil {
return nil
}
// Load tokenizer lazily
if e.tokenizer == nil {
tok, err := pretrained.FromFile(e.tokenizerPath)
if err != nil {
return fmt.Errorf("failed to load tokenizer: %w", err)
}
e.tokenizer = tok
}
onnxInitOnce.Do(func() {
onnxruntime_go.SetSharedLibraryPath(onnxLibPath)
if err := onnxruntime_go.InitializeEnvironment(); err != nil {
e.logger.Error("failed to initialize ONNX runtime", "error", err)
onnxReady = false
return
}
// Register CUDA provider if available
if cudaLibPath != "" {
if err := onnxruntime_go.RegisterExecutionProviderLibrary("CUDA", cudaLibPath); err != nil {
e.logger.Warn("failed to register CUDA provider", "error", err)
}
}
onnxReady = true
})
if !onnxReady {
return errors.New("ONNX runtime not ready")
}
// Create session options
opts, err := onnxruntime_go.NewSessionOptions()
if err != nil {
return fmt.Errorf("failed to create session options: %w", err)
}
defer func() {
_ = opts.Destroy()
}()
// Try to add CUDA provider
useCUDA := cudaLibPath != ""
if useCUDA {
cudaOpts, err := onnxruntime_go.NewCUDAProviderOptions()
if err != nil {
e.logger.Warn("failed to create CUDA provider options, falling back to CPU", "error", err)
useCUDA = false
} else {
defer func() {
_ = cudaOpts.Destroy()
}()
if err := cudaOpts.Update(map[string]string{"device_id": "0"}); err != nil {
e.logger.Warn("failed to update CUDA options, falling back to CPU", "error", err)
useCUDA = false
} else if err := opts.AppendExecutionProviderCUDA(cudaOpts); err != nil {
e.logger.Warn("failed to append CUDA provider, falling back to CPU", "error", err)
useCUDA = false
}
}
}
if useCUDA {
e.logger.Info("Using CUDA for ONNX inference")
} else {
e.logger.Info("Using CPU for ONNX inference")
}
// Create session with options
session, err := onnxruntime_go.NewDynamicAdvancedSession(
e.getModelPath(),
[]string{"input_ids", "attention_mask"},
[]string{"sentence_embedding"},
opts,
)
if err != nil {
return fmt.Errorf("failed to create ONNX session: %w", err)
}
e.session = session
return nil
}
func (e *ONNXEmbedder) getModelPath() string {
return e.modelPath
}
func (e *ONNXEmbedder) Destroy() error {
e.mu.Lock()
defer e.mu.Unlock()
if e.session != nil {
if err := e.session.Destroy(); err != nil {
return fmt.Errorf("failed to destroy ONNX session: %w", err)
}
e.session = nil
e.logger.Info("ONNX session destroyed, VRAM freed")
}
return nil
}
func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
if err := e.ensureInitialized(); err != nil {
return nil, err
}
// 1. Tokenize
encoding, err := e.tokenizer.EncodeSingle(text)
if err != nil {
return nil, fmt.Errorf("tokenization failed: %w", err)
}
// 2. Convert to int64 and create attention mask
ids := encoding.Ids
inputIDs := make([]int64, len(ids))
attentionMask := make([]int64, len(ids))
for i, id := range ids {
inputIDs[i] = int64(id)
attentionMask[i] = 1
}
// 3. Create input tensors (shape: [1, seq_len])
seqLen := int64(len(inputIDs))
inputIDsTensor, err := onnxruntime_go.NewTensor[int64](
onnxruntime_go.NewShape(1, seqLen),
inputIDs,
)
if err != nil {
return nil, fmt.Errorf("failed to create input_ids tensor: %w", err)
}
defer func() { _ = inputIDsTensor.Destroy() }()
maskTensor, err := onnxruntime_go.NewTensor[int64](
onnxruntime_go.NewShape(1, seqLen),
attentionMask,
)
if err != nil {
return nil, fmt.Errorf("failed to create attention_mask tensor: %w", err)
}
defer func() { _ = maskTensor.Destroy() }()
// 4. Create output tensor
outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](
onnxruntime_go.NewShape(1, int64(e.dims)),
)
if err != nil {
return nil, fmt.Errorf("failed to create output tensor: %w", err)
}
defer func() { _ = outputTensor.Destroy() }()
// 5. Run inference
err = e.session.Run(
[]onnxruntime_go.Value{inputIDsTensor, maskTensor},
[]onnxruntime_go.Value{outputTensor},
)
if err != nil {
return nil, fmt.Errorf("inference failed: %w", err)
}
// 6. Copy output data
outputData := outputTensor.GetData()
embedding := make([]float32, len(outputData))
copy(embedding, outputData)
return embedding, nil
}
func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
if err := e.ensureInitialized(); err != nil {
return nil, err
}
encodings := make([]*tokenizer.Encoding, len(texts))
maxLen := 0
for i, txt := range texts {
enc, err := e.tokenizer.EncodeSingle(txt)
if err != nil {
return nil, err
}
encodings[i] = enc
if l := len(enc.Ids); l > maxLen {
maxLen = l
}
}
batchSize := len(texts)
inputIDs := make([]int64, batchSize*maxLen)
attentionMask := make([]int64, batchSize*maxLen)
for i, enc := range encodings {
ids := enc.Ids
offset := i * maxLen
for j, id := range ids {
inputIDs[offset+j] = int64(id)
attentionMask[offset+j] = 1
}
// Remaining positions are already zero (padding)
}
// Create tensors with shape [batchSize, maxLen]
inputTensor, _ := onnxruntime_go.NewTensor[int64](
onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
inputIDs,
)
defer func() { _ = inputTensor.Destroy() }()
maskTensor, _ := onnxruntime_go.NewTensor[int64](
onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
attentionMask,
)
defer func() { _ = maskTensor.Destroy() }()
outputTensor, _ := onnxruntime_go.NewEmptyTensor[float32](
onnxruntime_go.NewShape(int64(batchSize), int64(e.dims)),
)
defer func() { _ = outputTensor.Destroy() }()
err := e.session.Run(
[]onnxruntime_go.Value{inputTensor, maskTensor},
[]onnxruntime_go.Value{outputTensor},
)
if err != nil {
return nil, err
}
// Extract embeddings per batch item
data := outputTensor.GetData()
embeddings := make([][]float32, batchSize)
for i := 0; i < batchSize; i++ {
start := i * e.dims
emb := make([]float32, e.dims)
copy(emb, data[start:start+e.dims])
embeddings[i] = emb
}
return embeddings, nil
}

File diff suppressed because it is too large Load Diff

409
rag/rag_integration_test.go Normal file
View File

@@ -0,0 +1,409 @@
package rag
import (
"fmt"
"gf-lt/config"
"gf-lt/models"
"gf-lt/storage"
"log/slog"
"testing"
_ "github.com/glebarez/go-sqlite"
"github.com/jmoiron/sqlx"
)
// mockEmbedder returns zero vectors of a fixed dimension.
type mockEmbedder struct {
dim int
}
func (m *mockEmbedder) Embed(text string) ([]float32, error) {
vec := make([]float32, m.dim)
return vec, nil
}
func (m *mockEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
vecs := make([][]float32, len(texts))
for i := range vecs {
vecs[i] = make([]float32, m.dim)
}
return vecs, nil
}
// dummyStore implements storage.FullRepo with a minimal set of methods.
// Only DB() is used by VectorStorage; other methods return empty values.
type dummyStore struct {
db *sqlx.DB
}
func (d dummyStore) DB() *sqlx.DB { return d.db }
// ChatHistory methods
func (d dummyStore) ListChats() ([]models.Chat, error) { return nil, nil }
func (d dummyStore) GetChatByID(id uint32) (*models.Chat, error) { return nil, nil }
func (d dummyStore) GetChatByChar(char string) ([]models.Chat, error) { return nil, nil }
func (d dummyStore) GetLastChat() (*models.Chat, error) { return nil, nil }
func (d dummyStore) GetLastChatByAgent(agent string) (*models.Chat, error) { return nil, nil }
func (d dummyStore) UpsertChat(chat *models.Chat) (*models.Chat, error) { return chat, nil }
func (d dummyStore) RemoveChat(id uint32) error { return nil }
func (d dummyStore) ChatGetMaxID() (uint32, error) { return 0, nil }
// Memories methods
func (d dummyStore) Memorise(m *models.Memory) (*models.Memory, error) { return m, nil }
func (d dummyStore) Recall(agent, topic string) (string, error) { return "", nil }
func (d dummyStore) RecallTopics(agent string) ([]string, error) { return nil, nil }
// VectorRepo methods (not used but required by interface)
func (d dummyStore) WriteVector(row *models.VectorRow) error { return nil }
func (d dummyStore) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
return nil, nil
}
func (d dummyStore) ListFiles() ([]string, error) { return nil, nil }
func (d dummyStore) RemoveEmbByFileName(filename string) error { return nil }
var _ storage.FullRepo = dummyStore{}
// setupTestRAG creates an inmemory SQLite database, creates the necessary tables,
// inserts the provided chunks, and returns a RAG instance with a mock embedder.
func setupTestRAG(t *testing.T, chunks []*models.VectorRow) (*RAG, error) {
t.Helper()
db, err := sqlx.Open("sqlite", ":memory:")
if err != nil {
return nil, fmt.Errorf("open inmemory db: %w", err)
}
// Create the required tables (embeddings_768 and fts_embeddings).
// Use the same schema as production.
_, err = db.Exec(`
CREATE TABLE embeddings_768 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
slug TEXT NOT NULL,
raw_text TEXT NOT NULL,
filename TEXT NOT NULL DEFAULT ''
);
`)
if err != nil {
return nil, fmt.Errorf("create embeddings table: %w", err)
}
_, err = db.Exec(`
CREATE VIRTUAL TABLE fts_embeddings USING fts5(
slug UNINDEXED,
raw_text,
filename UNINDEXED,
embedding_size UNINDEXED,
tokenize='porter unicode61'
);
`)
if err != nil {
return nil, fmt.Errorf("create FTS table: %w", err)
}
// Create a logger that discards output.
logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
store := dummyStore{db: db}
// Create config with embedding dimension 768.
cfg := &config.Config{
EmbedDims: 768,
RAGWordLimit: 250,
RAGOverlapWords: 25,
RAGBatchSize: 1,
}
// Create a RAG instance using New, which will create an embedder based on config.
// We'll override the embedder afterwards via reflection.
rag, err := New(logger, store, cfg)
if err != nil {
return nil, fmt.Errorf("create RAG: %w", err)
}
// Replace the embedder with our mock.
rag.SetEmbedderForTesting(&mockEmbedder{dim: cfg.EmbedDims})
// Insert the provided chunks using the storage directly.
if len(chunks) > 0 {
// Ensure each chunk has embeddings of correct dimension (zero vector).
for _, chunk := range chunks {
if len(chunk.Embeddings) != cfg.EmbedDims {
chunk.Embeddings = make([]float32, cfg.EmbedDims)
}
}
err = rag.storage.WriteVectors(chunks)
if err != nil {
return nil, fmt.Errorf("write test chunks: %w", err)
}
}
return rag, nil
}
// createTestChunks returns a slice of VectorRow representing the target chunk
// (kjv_bible.epub_1786_0), several baldrelated noise chunks, and unrelated chunks.
func createTestChunks() []*models.VectorRow {
// Target chunk: 2 Kings 2:2324 containing "bald head" and "two she bears".
targetRaw := `And he said, Ye shall not send.
2:17 And when they urged him till he was ashamed, he said, Send. They sent
therefore fifty men; and they sought three days, but found him not.
2:18 And when they came again to him, (for he tarried at Jericho,) he said unto
them, Did I not say unto you, Go not? 2:19 And the men of the city said unto
Elisha, Behold, I pray thee, the situation of this city is pleasant, as my lord
seeth: but the water is naught, and the ground barren.
2:20 And he said, Bring me a new cruse, and put salt therein. And they brought
it to him.
2:21 And he went forth unto the spring of the waters, and cast the salt in
there, and said, Thus saith the LORD, I have healed these waters; there shall
not be from thence any more death or barren land.
2:22 So the waters were healed unto this day, according to the saying of Elisha
which he spake.
2:23 And he went up from thence unto Bethel: and as he was going up by the way,
there came forth little children out of the city, and mocked him, and said unto
him, Go up, thou bald head; go up, thou bald head.
2:24 And he turned back, and looked on them, and cursed them in the name of the
LORD. And there came forth two she bears out of the wood, and tare forty and
two children of them.`
// Noise chunk 1: Leviticus containing "bald locust"
noise1Raw := `11:12 Whatsoever hath no fins nor scales in the waters, that shall be an
abomination unto you.
11:13 And these are they which ye shall have in abomination among the fowls;
they shall not be eaten, they are an abomination: the eagle, and the ossifrage,
and the ospray, 11:14 And the vulture, and the kite after his kind; 11:15 Every
raven after his kind; 11:16 And the owl, and the night hawk, and the cuckow,
and the hawk after his kind, 11:17 And the little owl, and the cormorant, and
the great owl, 11:18 And the swan, and the pelican, and the gier eagle, 11:19
And the stork, the heron after her kind, and the lapwing, and the bat.
11:20 All fowls that creep, going upon all four, shall be an abomination unto
you.
11:21 Yet these may ye eat of every flying creeping thing that goeth upon all
four, which have legs above their feet, to leap withal upon the earth; 11:22
Even these of them ye may eat; the locust after his kind, and the bald locust
after his kind, and the beetle after his kind, and the grasshopper after his
kind.
11:23 But all other flying creeping things, which have four feet, shall be an
abomination unto you.
11:24 And for these ye shall be unclean: whosoever toucheth the carcase of them
shall be unclean until the even.`
// Noise chunk 2: Leviticus containing "bald"
noise2Raw := `11:13 And these are they which ye shall have in abomination among the fowls;
they shall not be eaten, they are an abomination: the eagle, and the ossifrage,
and the ospray, 11:14 And the vulture, and the kite after his kind; 11:15 Every
raven after his kind; 11:16 And the owl, and the night hawk, and the cuckow,
and the hawk after his kind, 11:17 And the little owl, and the cormorant, and
the great owl, 11:18 And the swan, and the pelican, and the gier eagle, 11:19
And the stork, the heron after her kind, and the lapwing, and the bat.
11:20 All fowls that creep, going upon all four, shall be an abomination unto
you.
11:21 Yet these may ye eat of every flying creeping thing that goeth upon all
four, which have legs above their feet, to leap withal upon the earth; 11:22
Even these of them ye may eat; the locust after his kind, and the bald locust
after his kind, and the beetle after his kind, and the grasshopper after his
kind.
11:23 But all other flying creeping things, which have four feet, shall be an
abomination unto you.
11:24 And for these ye shall be unclean: whosoever toucheth the carcase of them
shall be unclean until the even.`
// Additional Leviticus noise chunks (simulating 28 bald-related chunks)
// Using variations of the same text with different slugs
leviticusSlugs := []string{
"kjv_bible.epub_564_0",
"kjv_bible.epub_565_0",
"kjv_bible.epub_579_0",
"kjv_bible.epub_580_0",
"kjv_bible.epub_581_0",
"kjv_bible.epub_582_0",
"kjv_bible.epub_583_0",
"kjv_bible.epub_584_0",
"kjv_bible.epub_585_0",
"kjv_bible.epub_586_0",
"kjv_bible.epub_587_0",
"kjv_bible.epub_588_0",
"kjv_bible.epub_589_0",
"kjv_bible.epub_590_0",
}
leviticusTexts := []string{
noise1Raw,
noise2Raw,
`13:40 And the man whose hair is fallen off his head, he is bald; yet is he
clean.
13:41 And he that hath his hair fallen off from the part of his head toward his
face, he is forehead bald; yet is he clean.`,
`13:42 And if there be in the bald head, or bald forehead, a white reddish sore;
it is a leprosy sprung up in his bald head, or his bald forehead.`,
`13:43 Then the priest shall look upon it: and, behold, if the rising of the
sore be white reddish in his bald head, or in his bald forehead, as the leprosy
appearedh in the skin of the flesh;`,
`13:44 He is a leprous man, he is unclean: the priest shall pronounce him utterly
unclean; his plague is in his head.`,
`13:45 And the leper in whom the plague is, his clothes shall be rent, and his
head bare, and he shall put a covering upon his upper lip, and shall cry,
Unclean, unclean.`,
`13:46 All the days wherein the plague shall be in him he shall be defiled; he
is unclean: he shall dwell alone; without the camp shall his habitation be.`,
`13:47 The garment also that the plague of leprosy is in, whether it be a woollen
garment, or a linen garment;`,
`13:48 Whether it be in the warp, or woof; of linen, or of woollen; whether in a
skin, or in any thing made of skin;`,
`13:49 And if the plague be greenish or reddish in the garment, or in the skin,
either in the warp, or in the woof, or in any thing of skin; it is a plague of
leprosy, and shall be shewed unto the priest:`,
`13:50 And the priest shall look upon the plague, and shut up it that hath the
plague seven days:`,
`13:51 And he shall look on the plague on the seventh day: if the plague be spread
in the garment, either in the warp, or in the woof, or in a skin, or in any work
that is made of skin; the plague is a fretting leprosy; it is unclean.`,
`13:52 He shall therefore burn that garment, whether warp or woof, in woollen or
in linen, or any thing of skin, wherein the plague is: for it is a fretting
leprosy; it shall be burnt in the fire.`,
}
// Unrelated chunk 1: ghost_7.txt_777_0
unrelated1Raw := `Doesnt he have any pride as a hunter?!
I didnt see what other choice I had. I would just have to grovel and be ready to flee at any given moment.
The Hidden Curse clan house was in the central region of the imperial capital. It was a high-class area with extraordinary property values that hosted the residences of people like Lord Gladis. This district was near the Imperial Castle, though “near” was a
relative term as it was still a few kilometers away.
The clan house was made of brick and conformed to an older style of architecture.`
// Unrelated chunk 2: ghost_7.txt_778_0
unrelated2Raw := `I would just have to grovel and be ready to flee at any given moment.
The Hidden Curse clan house was in the central region of the imperial capital. It was a high-class area with extraordinary property values that hosted the residences of people like Lord Gladis. This district was near the Imperial Castle, though “near” was a
relative term as it was still a few kilometers away.
The clan house was made of brick and conformed to an older style of architecture. Nearly everyone knew about this mansion and its clock tower. It stood tall over the neighboring mansions and rumor had it that you could see the whole capital from the top. It
spoke to this clans renown and history that they were able to get away with building something that dwarfed the mansions of the nobility.`
chunks := []*models.VectorRow{
{
Slug: "kjv_bible.epub_1786_0",
RawText: targetRaw,
FileName: "kjv_bible.epub",
Embeddings: nil, // will be filled with zero vector later
},
}
// Add Leviticus noise chunks
for i, slug := range leviticusSlugs {
text := leviticusTexts[i%len(leviticusTexts)]
chunks = append(chunks, &models.VectorRow{
Slug: slug,
RawText: text,
FileName: "kjv_bible.epub",
Embeddings: nil,
})
}
// Add unrelated chunks
chunks = append(chunks,
&models.VectorRow{
Slug: "ghost_7.txt_777_0",
RawText: unrelated1Raw,
FileName: "ghost_7.txt",
Embeddings: nil,
},
&models.VectorRow{
Slug: "ghost_7.txt_778_0",
RawText: unrelated2Raw,
FileName: "ghost_7.txt",
Embeddings: nil,
},
)
return chunks
}
func assertTargetInTopN(t *testing.T, results []models.VectorRow, topN int) bool {
t.Helper()
for i, row := range results {
if i >= topN {
break
}
if row.Slug == "kjv_bible.epub_1786_0" {
return true
}
}
return false
}
func TestBiblicalQuery(t *testing.T) {
chunks := createTestChunks()
rag, err := setupTestRAG(t, chunks)
if err != nil {
t.Fatalf("setup failed: %v", err)
}
query := "bald prophet and two she bears"
results, err := rag.Search(query, 10)
if err != nil {
t.Fatalf("search failed: %v", err)
}
// The target chunk should be in the top results.
if !assertTargetInTopN(t, results, 5) {
t.Errorf("target chunk not found in top 5 results for query %q", query)
t.Logf("results slugs: %v", func() []string {
slugs := make([]string, len(results))
for i, r := range results {
slugs[i] = r.Slug
}
return slugs
}())
}
}
func TestQueryVariations(t *testing.T) {
chunks := createTestChunks()
rag, err := setupTestRAG(t, chunks)
if err != nil {
t.Fatalf("setup failed: %v", err)
}
tests := []struct {
name string
query string
topN int
}{
{"she bears", "she bears", 5},
{"bald head", "bald head", 5},
{"two she bears out of the wood", "two she bears out of the wood", 5},
{"bald prophet", "bald prophet", 10},
{"go up thou bald head", "\"go up thou bald head\"", 5},
{"two she bears", "\"two she bears\"", 5},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
results, err := rag.Search(tt.query, 10)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if !assertTargetInTopN(t, results, tt.topN) {
t.Errorf("target chunk not found in top %d results for query %q", tt.topN, tt.query)
t.Logf("results slugs: %v", func() []string {
slugs := make([]string, len(results))
for i, r := range results {
slugs[i] = r.Slug
}
return slugs
}())
}
})
}
}

131
rag/rag_real_test.go Normal file
View File

@@ -0,0 +1,131 @@
package rag
import (
"gf-lt/config"
"gf-lt/storage"
"log/slog"
"os"
"path/filepath"
"testing"
)
func TestRealBiblicalQuery(t *testing.T) {
if testing.Short() {
t.Skip("skipping real embedder test in short mode")
}
// Check if the embedder model exists
modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx")
if _, err := os.Stat(modelPath); os.IsNotExist(err) {
t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath)
}
tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json")
dbPath := filepath.Join("..", "gflt.db")
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
t.Skipf("database not found at %s; skipping real embedder test", dbPath)
}
cfg := &config.Config{
EmbedModelPath: modelPath,
EmbedTokenizerPath: tokenizerPath,
EmbedDims: 768,
RAGWordLimit: 250,
RAGOverlapWords: 25,
RAGBatchSize: 1,
}
logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
store := storage.NewProviderSQL(dbPath, logger)
if store == nil {
t.Fatal("failed to create storage provider")
}
rag, err := New(logger, store, cfg)
if err != nil {
t.Fatalf("failed to create RAG instance: %v", err)
}
t.Cleanup(func() { rag.Destroy() })
query := "bald prophet and two she bears"
results, err := rag.Search(query, 30)
if err != nil {
t.Fatalf("search failed: %v", err)
}
found := false
for i, row := range results {
if row.Slug == "kjv_bible.epub_1786_0" {
found = true
t.Logf("target chunk found at rank %d", i+1)
break
}
}
if !found {
t.Errorf("target chunk not found in search results for query %q", query)
t.Logf("results slugs:")
for i, r := range results {
t.Logf("%d: %s", i+1, r.Slug)
}
}
}
func TestRealQueryVariations(t *testing.T) {
if testing.Short() {
t.Skip("skipping real embedder test in short mode")
}
modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx")
if _, err := os.Stat(modelPath); os.IsNotExist(err) {
t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath)
}
tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json")
dbPath := filepath.Join("..", "gflt.db")
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
t.Skipf("database not found at %s; skipping real embedder test", dbPath)
}
cfg := &config.Config{
EmbedModelPath: modelPath,
EmbedTokenizerPath: tokenizerPath,
EmbedDims: 768,
RAGWordLimit: 250,
RAGOverlapWords: 25,
RAGBatchSize: 1,
}
logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
store := storage.NewProviderSQL(dbPath, logger)
if store == nil {
t.Fatal("failed to create storage provider")
}
rag, err := New(logger, store, cfg)
if err != nil {
t.Fatalf("failed to create RAG instance: %v", err)
}
t.Cleanup(func() { rag.Destroy() })
tests := []struct {
name string
query string
}{
{"she bears", "she bears"},
{"bald head", "bald head"},
{"two she bears out of the wood", "two she bears out of the wood"},
{"bald prophet", "bald prophet"},
{"go up thou bald head", "\"go up thou bald head\""},
{"two she bears", "\"two she bears\""},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
results, err := rag.Search(tt.query, 10)
if err != nil {
t.Fatalf("search failed: %v", err)
}
found := false
for _, row := range results {
if row.Slug == "kjv_bible.epub_1786_0" {
found = true
break
}
}
if !found {
t.Errorf("target chunk not found for query %q", tt.query)
for i, r := range results {
t.Logf("%d: %s", i+1, r.Slug)
}
}
})
}
}

155
rag/rag_test.go Normal file
View File

@@ -0,0 +1,155 @@
package rag
import (
"testing"
)
func TestDetectPhrases(t *testing.T) {
tests := []struct {
query string
expect []string
}{
{
query: "bald prophet and two she bears",
expect: []string{"bald prophet", "two she", "two she bears", "she bears"},
},
{
query: "she bears",
expect: []string{"she bears"},
},
{
query: "the quick brown fox",
expect: []string{"quick brown", "quick brown fox", "brown fox"},
},
{
query: "in the house", // stop words
expect: []string{}, // "in" and "the" are stop words
},
{
query: "a", // short
expect: []string{},
},
}
for _, tt := range tests {
got := detectPhrases(tt.query)
if len(got) != len(tt.expect) {
t.Errorf("detectPhrases(%q) = %v, want %v", tt.query, got, tt.expect)
continue
}
for i := range got {
if got[i] != tt.expect[i] {
t.Errorf("detectPhrases(%q) = %v, want %v", tt.query, got, tt.expect)
break
}
}
}
}
func TestCountPhraseMatches(t *testing.T) {
tests := []struct {
text string
query string
expect int
}{
{
text: "two she bears came out of the wood",
query: "she bears",
expect: 1,
},
{
text: "bald head and she bears",
query: "bald prophet and two she bears",
expect: 1, // only "she bears" matches
},
{
text: "no match here",
query: "she bears",
expect: 0,
},
{
text: "she bears and bald prophet",
query: "bald prophet she bears",
expect: 2, // "she bears" and "bald prophet"
},
}
for _, tt := range tests {
got := countPhraseMatches(tt.text, tt.query)
if got != tt.expect {
t.Errorf("countPhraseMatches(%q, %q) = %d, want %d", tt.text, tt.query, got, tt.expect)
}
}
}
func TestAreSlugsAdjacent(t *testing.T) {
tests := []struct {
slug1 string
slug2 string
expect bool
}{
{
slug1: "kjv_bible.epub_1786_0",
slug2: "kjv_bible.epub_1787_0",
expect: true,
},
{
slug1: "kjv_bible.epub_1787_0",
slug2: "kjv_bible.epub_1786_0",
expect: true,
},
{
slug1: "kjv_bible.epub_1786_0",
slug2: "kjv_bible.epub_1788_0",
expect: false,
},
{
slug1: "otherfile.txt_1_0",
slug2: "kjv_bible.epub_1786_0",
expect: false,
},
{
slug1: "file_1_0",
slug2: "file_1_1",
expect: true,
},
{
slug1: "file_1_0",
slug2: "file_2_0", // different batch
expect: true, // sequential batches with same chunk index are adjacent
},
}
for _, tt := range tests {
got := areSlugsAdjacent(tt.slug1, tt.slug2)
if got != tt.expect {
t.Errorf("areSlugsAdjacent(%q, %q) = %v, want %v", tt.slug1, tt.slug2, got, tt.expect)
}
}
}
func TestParseSlugIndices(t *testing.T) {
tests := []struct {
slug string
wantBatch int
wantChunk int
wantOk bool
}{
{"kjv_bible.epub_1786_0", 1786, 0, true},
{"file_1_5", 1, 5, true},
{"no_underscore", 0, 0, false},
{"file_abc_def", 0, 0, false},
{"file_123_456_extra", 456, 0, false}, // regex matches last two numbers
}
for _, tt := range tests {
batch, chunk, ok := parseSlugIndices(tt.slug)
if ok != tt.wantOk {
t.Errorf("parseSlugIndices(%q) ok = %v, want %v", tt.slug, ok, tt.wantOk)
continue
}
if ok && (batch != tt.wantBatch || chunk != tt.wantChunk) {
t.Errorf("parseSlugIndices(%q) = (%d, %d), want (%d, %d)", tt.slug, batch, chunk, tt.wantBatch, tt.wantChunk)
}
}
}

View File

@@ -1,6 +1,7 @@
package rag package rag
import ( import (
"database/sql"
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"gf-lt/models" "gf-lt/models"
@@ -62,6 +63,17 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
if err != nil { if err != nil {
return err return err
} }
embeddingSize := len(row.Embeddings)
// Start transaction
tx, err := vs.sqlxDB.Beginx()
if err != nil {
return err
}
defer func() {
if err != nil {
_ = tx.Rollback()
}
}()
// Serialize the embeddings to binary // Serialize the embeddings to binary
serializedEmbeddings := SerializeVector(row.Embeddings) serializedEmbeddings := SerializeVector(row.Embeddings)
@@ -69,10 +81,102 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES (?, ?, ?, ?)", "INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES (?, ?, ?, ?)",
tableName, tableName,
) )
if _, err := vs.sqlxDB.Exec(query, serializedEmbeddings, row.Slug, row.RawText, row.FileName); err != nil { if _, err := tx.Exec(query, serializedEmbeddings, row.Slug, row.RawText, row.FileName); err != nil {
vs.logger.Error("failed to write vector", "error", err, "slug", row.Slug) vs.logger.Error("failed to write vector", "error", err, "slug", row.Slug)
return err return err
} }
// Insert into FTS table
ftsQuery := `INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES (?, ?, ?, ?)`
if _, err := tx.Exec(ftsQuery, row.Slug, row.RawText, row.FileName, embeddingSize); err != nil {
vs.logger.Error("failed to write to FTS table", "error", err, "slug", row.Slug)
return err
}
err = tx.Commit()
if err != nil {
vs.logger.Error("failed to commit transaction", "error", err)
return err
}
return nil
}
// WriteVectors stores multiple embedding vectors in a single transaction
func (vs *VectorStorage) WriteVectors(rows []*models.VectorRow) error {
if len(rows) == 0 {
return nil
}
// SQLite has limit of 999 parameters per statement, each row uses 4 parameters
const maxBatchSize = 200 // 200 * 4 = 800 < 999
if len(rows) > maxBatchSize {
// Process in chunks
for i := 0; i < len(rows); i += maxBatchSize {
end := i + maxBatchSize
if end > len(rows) {
end = len(rows)
}
if err := vs.WriteVectors(rows[i:end]); err != nil {
return err
}
}
return nil
}
// All rows should have same embedding size (same model)
firstSize := len(rows[0].Embeddings)
for i, row := range rows {
if len(row.Embeddings) != firstSize {
return fmt.Errorf("embedding size mismatch: row %d has size %d, expected %d", i, len(row.Embeddings), firstSize)
}
}
tableName, err := vs.getTableName(rows[0].Embeddings)
if err != nil {
return err
}
// Start transaction
tx, err := vs.sqlxDB.Beginx()
if err != nil {
return err
}
defer func() {
if err != nil {
_ = tx.Rollback()
}
}()
// Build batch insert for embeddings table
embeddingPlaceholders := make([]string, 0, len(rows))
embeddingArgs := make([]any, 0, len(rows)*4)
for _, row := range rows {
embeddingPlaceholders = append(embeddingPlaceholders, "(?, ?, ?, ?)")
embeddingArgs = append(embeddingArgs, SerializeVector(row.Embeddings), row.Slug, row.RawText, row.FileName)
}
embeddingQuery := fmt.Sprintf(
"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES %s",
tableName,
strings.Join(embeddingPlaceholders, ", "),
)
if _, err := tx.Exec(embeddingQuery, embeddingArgs...); err != nil {
vs.logger.Error("failed to write vectors batch", "error", err, "batch_size", len(rows))
return err
}
// Build batch insert for FTS table
ftsPlaceholders := make([]string, 0, len(rows))
ftsArgs := make([]any, 0, len(rows)*4)
embeddingSize := len(rows[0].Embeddings)
for _, row := range rows {
ftsPlaceholders = append(ftsPlaceholders, "(?, ?, ?, ?)")
ftsArgs = append(ftsArgs, row.Slug, row.RawText, row.FileName, embeddingSize)
}
ftsQuery := "INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES " +
strings.Join(ftsPlaceholders, ", ")
if _, err := tx.Exec(ftsQuery, ftsArgs...); err != nil {
vs.logger.Error("failed to write FTS batch", "error", err, "batch_size", len(rows))
return err
}
err = tx.Commit()
if err != nil {
vs.logger.Error("failed to commit transaction", "error", err)
return err
}
vs.logger.Debug("wrote vectors batch", "batch_size", len(rows))
return nil return nil
} }
@@ -98,30 +202,25 @@ func (vs *VectorStorage) getTableName(emb []float32) (string, error) {
} }
// SearchClosest finds vectors closest to the query vector using efficient cosine similarity calculation // SearchClosest finds vectors closest to the query vector using efficient cosine similarity calculation
func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, error) { func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.VectorRow, error) {
if limit <= 0 {
limit = 10
}
tableName, err := vs.getTableName(query) tableName, err := vs.getTableName(query)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// For better performance, instead of loading all vectors at once,
// we'll implement batching and potentially add L2 distance-based pre-filtering
// since cosine similarity is related to L2 distance for normalized vectors
querySQL := "SELECT embeddings, slug, raw_text, filename FROM " + tableName querySQL := "SELECT embeddings, slug, raw_text, filename FROM " + tableName
rows, err := vs.sqlxDB.Query(querySQL) rows, err := vs.sqlxDB.Query(querySQL)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer rows.Close() defer rows.Close()
// Use a min-heap or simple slice to keep track of top 3 closest vectors
type SearchResult struct { type SearchResult struct {
vector models.VectorRow vector models.VectorRow
distance float32 distance float32
} }
var topResults []SearchResult var topResults []SearchResult
// Process vectors one by one to avoid loading everything into memory
for rows.Next() { for rows.Next() {
var ( var (
embeddingsBlob []byte embeddingsBlob []byte
@@ -132,12 +231,9 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
vs.logger.Error("failed to scan row", "error", err) vs.logger.Error("failed to scan row", "error", err)
continue continue
} }
storedEmbeddings := DeserializeVector(embeddingsBlob) storedEmbeddings := DeserializeVector(embeddingsBlob)
// Calculate cosine similarity (returns value between -1 and 1, where 1 is most similar)
similarity := cosineSimilarity(query, storedEmbeddings) similarity := cosineSimilarity(query, storedEmbeddings)
distance := 1 - similarity // Convert to distance where 0 is most similar distance := 1 - similarity
result := SearchResult{ result := SearchResult{
vector: models.VectorRow{ vector: models.VectorRow{
@@ -149,20 +245,14 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
distance: distance, distance: distance,
} }
// Add to top results and maintain only top 3
topResults = append(topResults, result) topResults = append(topResults, result)
// Sort and keep only top 3
sort.Slice(topResults, func(i, j int) bool { sort.Slice(topResults, func(i, j int) bool {
return topResults[i].distance < topResults[j].distance return topResults[i].distance < topResults[j].distance
}) })
if len(topResults) > limit {
if len(topResults) > 3 { topResults = topResults[:limit]
topResults = topResults[:3] // Keep only closest 3
} }
} }
// Convert back to VectorRow slice
results := make([]models.VectorRow, 0, len(topResults)) results := make([]models.VectorRow, 0, len(topResults))
for _, result := range topResults { for _, result := range topResults {
result.vector.Distance = result.distance result.vector.Distance = result.distance
@@ -171,6 +261,98 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
return results, nil return results, nil
} }
// GetVectorBySlug retrieves a vector row by its slug
func (vs *VectorStorage) GetVectorBySlug(slug string) (*models.VectorRow, error) {
embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
for _, size := range embeddingSizes {
table := fmt.Sprintf("embeddings_%d", size)
query := fmt.Sprintf("SELECT embeddings, slug, raw_text, filename FROM %s WHERE slug = ?", table)
row := vs.sqlxDB.QueryRow(query, slug)
var (
embeddingsBlob []byte
retrievedSlug, rawText, fileName string
)
if err := row.Scan(&embeddingsBlob, &retrievedSlug, &rawText, &fileName); err != nil {
// No row in this table, continue to next size
continue
}
storedEmbeddings := DeserializeVector(embeddingsBlob)
return &models.VectorRow{
Embeddings: storedEmbeddings,
Slug: retrievedSlug,
RawText: rawText,
FileName: fileName,
}, nil
}
return nil, fmt.Errorf("vector with slug %s not found", slug)
}
// SearchKeyword performs full-text search using FTS5
func (vs *VectorStorage) SearchKeyword(query string, limit int) ([]models.VectorRow, error) {
// Use FTS5 bm25 ranking. bm25 returns negative values where more negative is better.
// We'll order by bm25 (ascending) and limit.
ftsQuery := `SELECT slug, raw_text, filename, bm25(fts_embeddings) as score
FROM fts_embeddings
WHERE fts_embeddings MATCH ?
ORDER BY score
LIMIT ?`
// Try original query first
rows, err := vs.sqlxDB.Query(ftsQuery, query, limit)
if err != nil {
return nil, fmt.Errorf("FTS search failed: %w", err)
}
results, err := vs.scanRows(rows)
rows.Close()
if err != nil {
return nil, err
}
// If no results and query contains multiple terms, try OR fallback
if len(results) == 0 && strings.Contains(query, " ") && !strings.Contains(strings.ToUpper(query), " OR ") {
// Build OR query: term1 OR term2 OR term3
terms := strings.Fields(query)
if len(terms) > 1 {
orQuery := strings.Join(terms, " OR ")
rows, err := vs.sqlxDB.Query(ftsQuery, orQuery, limit)
if err != nil {
// Return original empty results rather than error
return results, nil
}
orResults, err := vs.scanRows(rows)
rows.Close()
if err == nil {
results = orResults
}
}
}
return results, nil
}
// scanRows converts SQL rows to VectorRow slice
func (vs *VectorStorage) scanRows(rows *sql.Rows) ([]models.VectorRow, error) {
var results []models.VectorRow
for rows.Next() {
var slug, rawText, fileName string
var score float64
if err := rows.Scan(&slug, &rawText, &fileName, &score); err != nil {
vs.logger.Error("failed to scan FTS row", "error", err)
continue
}
// Convert BM25 score to distance-like metric (lower is better)
// BM25 is negative, more negative is better. Keep as negative.
distance := float32(score) // Keep negative, more negative is better
// No clamping needed; negative distances are fine
results = append(results, models.VectorRow{
Slug: slug,
RawText: rawText,
FileName: fileName,
Distance: distance,
})
}
return results, nil
}
// ListFiles returns a list of all loaded files // ListFiles returns a list of all loaded files
func (vs *VectorStorage) ListFiles() ([]string, error) { func (vs *VectorStorage) ListFiles() ([]string, error) {
fileLists := make([][]string, 0) fileLists := make([][]string, 0)
@@ -215,6 +397,10 @@ func (vs *VectorStorage) ListFiles() ([]string, error) {
// RemoveEmbByFileName removes all embeddings associated with a specific filename // RemoveEmbByFileName removes all embeddings associated with a specific filename
func (vs *VectorStorage) RemoveEmbByFileName(filename string) error { func (vs *VectorStorage) RemoveEmbByFileName(filename string) error {
var errors []string var errors []string
// Delete from FTS table first
if _, err := vs.sqlxDB.Exec("DELETE FROM fts_embeddings WHERE filename = ?", filename); err != nil {
errors = append(errors, err.Error())
}
embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120} embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
for _, size := range embeddingSizes { for _, size := range embeddingSizes {
table := fmt.Sprintf("embeddings_%d", size) table := fmt.Sprintf("embeddings_%d", size)

View File

@@ -0,0 +1,2 @@
-- Drop FTS5 virtual table
DROP TABLE IF EXISTS fts_embeddings;

View File

@@ -0,0 +1,15 @@
-- Create FTS5 virtual table for full-text search
CREATE VIRTUAL TABLE IF NOT EXISTS fts_embeddings USING fts5(
slug UNINDEXED,
raw_text,
filename UNINDEXED,
embedding_size UNINDEXED,
tokenize='porter unicode61' -- Use porter stemmer and unicode61 tokenizer
);
-- Create triggers to maintain FTS table when embeddings are inserted/deleted
-- Note: We'll handle inserts/deletes programmatically for simplicity
-- but triggers could be added here if needed.
-- Indexes for performance (FTS5 manages its own indexes)
-- No additional indexes needed for FTS5 virtual table.

View File

@@ -0,0 +1,2 @@
-- Clear FTS table (optional)
DELETE FROM fts_embeddings;

View File

@@ -0,0 +1,26 @@
-- Populate FTS table with existing embeddings
DELETE FROM fts_embeddings;
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
SELECT slug, raw_text, filename, 384 FROM embeddings_384;
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
SELECT slug, raw_text, filename, 768 FROM embeddings_768;
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
SELECT slug, raw_text, filename, 1024 FROM embeddings_1024;
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
SELECT slug, raw_text, filename, 1536 FROM embeddings_1536;
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
SELECT slug, raw_text, filename, 2048 FROM embeddings_2048;
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
SELECT slug, raw_text, filename, 3072 FROM embeddings_3072;
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
SELECT slug, raw_text, filename, 4096 FROM embeddings_4096;
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
SELECT slug, raw_text, filename, 5120 FROM embeddings_5120;

View File

@@ -102,6 +102,22 @@ func NewProviderSQL(dbPath string, logger *slog.Logger) FullRepo {
logger.Error("failed to open db connection", "error", err) logger.Error("failed to open db connection", "error", err)
return nil return nil
} }
// Enable WAL mode for better concurrency and performance
if _, err := db.Exec("PRAGMA journal_mode = WAL;"); err != nil {
logger.Warn("failed to enable WAL mode", "error", err)
}
if _, err := db.Exec("PRAGMA synchronous = NORMAL;"); err != nil {
logger.Warn("failed to set synchronous mode", "error", err)
}
// Increase cache size for better performance
if _, err := db.Exec("PRAGMA cache_size = -2000;"); err != nil {
logger.Warn("failed to set cache size", "error", err)
}
// Log actual journal mode for debugging
var journalMode string
if err := db.QueryRow("PRAGMA journal_mode;").Scan(&journalMode); err == nil {
logger.Debug("SQLite journal mode", "mode", journalMode)
}
p := ProviderSQL{db: db, logger: logger} p := ProviderSQL{db: db, logger: logger}
if err := p.Migrate(); err != nil { if err := p.Migrate(); err != nil {
logger.Error("migration failed, app cannot start", "error", err) logger.Error("migration failed, app cannot start", "error", err)

View File

@@ -4,6 +4,7 @@ import (
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"gf-lt/models" "gf-lt/models"
"sort"
"unsafe" "unsafe"
"github.com/jmoiron/sqlx" "github.com/jmoiron/sqlx"
@@ -11,7 +12,7 @@ import (
type VectorRepo interface { type VectorRepo interface {
WriteVector(*models.VectorRow) error WriteVector(*models.VectorRow) error
SearchClosest(q []float32) ([]models.VectorRow, error) SearchClosest(q []float32, limit int) ([]models.VectorRow, error)
ListFiles() ([]string, error) ListFiles() ([]string, error)
RemoveEmbByFileName(filename string) error RemoveEmbByFileName(filename string) error
DB() *sqlx.DB DB() *sqlx.DB
@@ -79,7 +80,7 @@ func (p ProviderSQL) WriteVector(row *models.VectorRow) error {
return err return err
} }
func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) { func (p ProviderSQL) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
tableName, err := fetchTableName(q) tableName, err := fetchTableName(q)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -94,7 +95,7 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
vector models.VectorRow vector models.VectorRow
distance float32 distance float32
} }
var topResults []SearchResult var allResults []SearchResult
for rows.Next() { for rows.Next() {
var ( var (
embeddingsBlob []byte embeddingsBlob []byte
@@ -119,28 +120,19 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
}, },
distance: distance, distance: distance,
} }
allResults = append(allResults, result)
// Add to top results and maintain only top results
topResults = append(topResults, result)
// Sort and keep only top results
// We'll keep the top 3 closest vectors
if len(topResults) > 3 {
// Simple sort and truncate to maintain only 3 best matches
for i := 0; i < len(topResults); i++ {
for j := i + 1; j < len(topResults); j++ {
if topResults[i].distance > topResults[j].distance {
topResults[i], topResults[j] = topResults[j], topResults[i]
} }
// Sort by distance
sort.Slice(allResults, func(i, j int) bool {
return allResults[i].distance < allResults[j].distance
})
// Truncate to limit
if len(allResults) > limit {
allResults = allResults[:limit]
} }
}
topResults = topResults[:3]
}
}
// Convert back to VectorRow slice // Convert back to VectorRow slice
results := make([]models.VectorRow, len(topResults)) results := make([]models.VectorRow, len(allResults))
for i, result := range topResults { for i, result := range allResults {
result.vector.Distance = result.distance result.vector.Distance = result.distance
results[i] = result.vector results[i] = result.vector
} }

View File

@@ -1,7 +0,0 @@
{
"sys_prompt": "A game of cluedo. Players are {{user}}, {{char}}, {{char2}};\n\nrooms: hall, lounge, dinning room kitchen, ballroom, conservatory, billiard room, library, study;\nweapons: candlestick, dagger, lead pipe, revolver, rope, spanner;\npeople: miss Scarlett, colonel Mustard, mrs. White, reverend Green, mrs. Peacock, professor Plum;\n\nA murder happened in a mansion with 9 rooms. Victim is dr. Black.\nPlayers goal is to find out who commited a murder, in what room and with what weapon.\nWeapons, people and rooms not involved in murder are distributed between players (as cards) by tool agent.\nThe objective of the game is to deduce the details of the murder. There are six characters, six murder weapons, and nine rooms, leaving the players with 324 possibilities. As soon as a player enters a room, they may make a suggestion as to the details, naming a suspect, the room they are in, and the weapon. For example: \"I suspect Professor Plum, in the Dining Room, with the candlestick\".\nOnce a player makes a suggestion, the others are called upon to disprove it.\nBefore the player's move, tool agent will remind that players their cards. There are two types of moves: making a suggestion (suggestion_move) and disproving other player suggestion (evidence_move);\nIn this version player wins when the correct details are named in the suggestion_move.\n\n<example_game>\n{{user}}:\nlet's start a game of cluedo!\ntool: cards of {{char}} are 'LEAD PIPE', 'BALLROOM', 'CONSERVATORY', 'STUDY', 'Mrs. White'; suggestion_move;\n{{char}}:\n(putting miss Scarlet into the Hall with the Revolver) \"I suspect miss Scarlett, in the Hall, with the revolver.\"\ntool: cards of {{char2}} are 'SPANNER', 'DAGGER', 'Professor Plum', 'LIBRARY', 'Mrs. Peacock'; evidence_move;\n{{char2}}:\n\"No objections.\" (no cards matching the suspicion of {{char}})\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; evidence_move;\n{{user}}:\n\"I object. Miss Scarlett is innocent.\" (shows card with 'Miss Scarlett')\ntool: cards of {{char2}} are 'SPANNER', 'DAGGER', 'Professor Plum', 'LIBRARY', 'Mrs. Peacock'; suggestion_move;\n{{char2}}:\n*So it was not Miss Scarlett, good to know.*\n(moves Mrs. White to the Billiard Room) \"It might have been Mrs. White, in the Billiard Room, with the Revolver.\"\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; evidence_move;\n{{user}}:\n(no matching cards for the assumption of {{char2}}) \"Sounds possible to me.\"\ntool: cards of {{char}} are 'LEAD PIPE', 'BALLROOM', 'CONSERVATORY', 'STUDY', 'Mrs. White'; evidence_move;\n{{char}}:\n(shows Mrs. White card) \"No. Was not Mrs. White\"\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; suggestion_move;\n{{user}}:\n*So not Mrs. White...* (moves Reverend Green into the Billiard Room) \"I suspect Reverend Green, in the Billiard Room, with the Revolver.\"\ntool: Correct. It was Reverend Green in the Billiard Room, with the revolver. {{user}} wins.\n</example_game>",
"role": "CluedoPlayer",
"role2": "CluedoEnjoyer",
"filepath": "sysprompts/cluedo.json",
"first_msg": "Hey guys! Want to play cluedo?"
}

View File

@@ -243,11 +243,9 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
for _, f := range loadedFiles { for _, f := range loadedFiles {
loadedSet[f] = true loadedSet[f] = true
} }
// Build merged list: files from ragdir + orphaned files from DB // Build merged list: files from ragdir + orphaned files from DB
ragFiles := make([]ragFileInfo, 0, len(fileList)+len(loadedFiles)) ragFiles := make([]ragFileInfo, 0, len(fileList)+len(loadedFiles))
seen := make(map[string]bool) seen := make(map[string]bool)
// Add files from ragdir // Add files from ragdir
for _, f := range fileList { for _, f := range fileList {
ragFiles = append(ragFiles, ragFileInfo{ ragFiles = append(ragFiles, ragFileInfo{
@@ -258,7 +256,6 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
}) })
seen[f] = true seen[f] = true
} }
// Add orphaned files (in DB but not in ragdir) // Add orphaned files (in DB but not in ragdir)
for _, f := range loadedFiles { for _, f := range loadedFiles {
if !seen[f] { if !seen[f] {
@@ -275,7 +272,7 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
fileTable := tview.NewTable(). fileTable := tview.NewTable().
SetBorders(true) SetBorders(true)
longStatusView := tview.NewTextView() longStatusView := tview.NewTextView()
longStatusView.SetText("status text") longStatusView.SetText("press x to exit")
longStatusView.SetBorder(true).SetTitle("status") longStatusView.SetBorder(true).SetTitle("status")
longStatusView.SetChangedFunc(func() { longStatusView.SetChangedFunc(func() {
app.Draw() app.Draw()
@@ -376,7 +373,6 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
} }
errCh := make(chan error, 1) // why? errCh := make(chan error, 1) // why?
go func() { go func() {
defer pages.RemovePage(RAGPage)
for { for {
select { select {
case err := <-errCh: case err := <-errCh:
@@ -417,7 +413,6 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
} }
return return
} }
// defer pages.RemovePage(RAGPage)
tc := fileTable.GetCell(row, column) tc := fileTable.GetCell(row, column)
tc.SetTextColor(tcell.ColorRed) tc.SetTextColor(tcell.ColorRed)
fileTable.SetSelectable(false, false) fileTable.SetSelectable(false, false)
@@ -430,7 +425,6 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
f := ragFiles[row-1] f := ragFiles[row-1]
// Handle "-" case (orphaned file with no delete option) // Handle "-" case (orphaned file with no delete option)
if tc.Text == "-" { if tc.Text == "-" {
pages.RemovePage(RAGPage)
return return
} }
switch tc.Text { switch tc.Text {
@@ -441,14 +435,14 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
if err := ragger.LoadRAG(fpath); err != nil { if err := ragger.LoadRAG(fpath); err != nil {
logger.Error("failed to embed file", "chat", fpath, "error", err) logger.Error("failed to embed file", "chat", fpath, "error", err)
showToast("RAG", "failed to embed file; error: "+err.Error()) showToast("RAG", "failed to embed file; error: "+err.Error())
app.QueueUpdate(func() {
pages.RemovePage(RAGPage)
})
return return
} }
showToast("RAG", "file loaded successfully") showToast("RAG", "file loaded successfully")
app.QueueUpdate(func() { app.QueueUpdate(func() {
pages.RemovePage(RAGPage) pages.RemovePage(RAGPage)
loadedFiles, _ := ragger.ListLoaded()
chatRAGTable := makeRAGTable(fileList, loadedFiles)
pages.AddPage(RAGPage, chatRAGTable, true, true)
}) })
}() }()
return return
@@ -458,14 +452,14 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
if err := ragger.RemoveFile(f.name); err != nil { if err := ragger.RemoveFile(f.name); err != nil {
logger.Error("failed to unload file from RAG", "filename", f.name, "error", err) logger.Error("failed to unload file from RAG", "filename", f.name, "error", err)
showToast("RAG", "failed to unload file; error: "+err.Error()) showToast("RAG", "failed to unload file; error: "+err.Error())
app.QueueUpdate(func() {
pages.RemovePage(RAGPage)
})
return return
} }
showToast("RAG", "file unloaded successfully") showToast("RAG", "file unloaded successfully")
app.QueueUpdate(func() { app.QueueUpdate(func() {
pages.RemovePage(RAGPage) pages.RemovePage(RAGPage)
loadedFiles, _ := ragger.ListLoaded()
chatRAGTable := makeRAGTable(fileList, loadedFiles)
pages.AddPage(RAGPage, chatRAGTable, true, true)
}) })
}() }()
return return
@@ -476,6 +470,21 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
return return
} }
showToast("chat deleted", fpath+" was deleted") showToast("chat deleted", fpath+" was deleted")
go func() {
app.QueueUpdate(func() {
pages.RemovePage(RAGPage)
newFileList, _ := os.ReadDir(cfg.RAGDir)
loadedFiles, _ := ragger.ListLoaded()
var newFiles []string
for _, f := range newFileList {
if !f.IsDir() {
newFiles = append(newFiles, f.Name())
}
}
chatRAGTable := makeRAGTable(newFiles, loadedFiles)
pages.AddPage(RAGPage, chatRAGTable, true, true)
})
}()
return return
default: default:
pages.RemovePage(RAGPage) pages.RemovePage(RAGPage)

View File

@@ -207,7 +207,7 @@ var (
modelHasVision bool modelHasVision bool
) )
func init() { func initTools() {
sysMap[basicCard.ID] = basicCard sysMap[basicCard.ID] = basicCard
roleToID["assistant"] = basicCard.ID roleToID["assistant"] = basicCard.ID
sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "") sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
@@ -348,13 +348,13 @@ func ragsearch(args map[string]string) []byte {
} }
limitS, ok := args["limit"] limitS, ok := args["limit"]
if !ok || limitS == "" { if !ok || limitS == "" {
limitS = "3" limitS = "10"
} }
limit, err := strconv.Atoi(limitS) limit, err := strconv.Atoi(limitS)
if err != nil || limit == 0 { if err != nil || limit == 0 {
logger.Warn("ragsearch limit; passed bad value; setting to default (3)", logger.Warn("ragsearch limit; passed bad value; setting to default (3)",
"limit_arg", limitS, "error", err) "limit_arg", limitS, "error", err)
limit = 3 limit = 10
} }
ragInstance := rag.GetInstance() ragInstance := rag.GetInstance()
if ragInstance == nil { if ragInstance == nil {
@@ -2261,56 +2261,3 @@ var baseTools = []models.Tool{
}, },
}, },
} }
func init() {
if windowToolsAvailable {
baseTools = append(baseTools,
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "list_windows",
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window",
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window_and_view",
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
)
}
}

99
tui.go
View File

@@ -29,6 +29,8 @@ var (
statusLineWidget *tview.TextView statusLineWidget *tview.TextView
helpView *tview.TextView helpView *tview.TextView
flex *tview.Flex flex *tview.Flex
bottomFlex *tview.Flex
notificationWidget *tview.TextView
imgView *tview.Image imgView *tview.Image
defaultImage = "sysprompts/llama.png" defaultImage = "sysprompts/llama.png"
indexPickWindow *tview.InputField indexPickWindow *tview.InputField
@@ -36,10 +38,10 @@ var (
roleEditWindow *tview.InputField roleEditWindow *tview.InputField
shellInput *tview.InputField shellInput *tview.InputField
confirmModal *tview.Modal confirmModal *tview.Modal
toastTimer *time.Timer
confirmPageName = "confirm" confirmPageName = "confirm"
fullscreenMode bool fullscreenMode bool
positionVisible bool = true positionVisible bool = true
scrollToEndEnabled bool = true
// pages // pages
historyPage = "historyPage" historyPage = "historyPage"
agentPage = "agentPage" agentPage = "agentPage"
@@ -48,7 +50,6 @@ var (
helpPage = "helpPage" helpPage = "helpPage"
renamePage = "renamePage" renamePage = "renamePage"
RAGPage = "RAGPage" RAGPage = "RAGPage"
RAGLoadedPage = "RAGLoadedPage"
propsPage = "propsPage" propsPage = "propsPage"
codeBlockPage = "codeBlockPage" codeBlockPage = "codeBlockPage"
imgPage = "imgPage" imgPage = "imgPage"
@@ -137,8 +138,8 @@ func setShellMode(enabled bool) {
}() }()
} }
// showToast displays a temporary message in the topright corner. // showToast displays a temporary notification in the bottom-right corner.
// It autohides after 3 seconds and disappears when clicked. // It auto-hides after 3 seconds.
func showToast(title, message string) { func showToast(title, message string) {
sanitize := func(s string, maxLen int) string { sanitize := func(s string, maxLen int) string {
sanitized := strings.Map(func(r rune) rune { sanitized := strings.Map(func(r rune) rune {
@@ -154,6 +155,11 @@ func showToast(title, message string) {
} }
title = sanitize(title, 50) title = sanitize(title, 50)
message = sanitize(message, 197) message = sanitize(message, 197)
if toastTimer != nil {
toastTimer.Stop()
}
// show blocking notification to not mess up flex
if fullscreenMode {
notification := tview.NewTextView(). notification := tview.NewTextView().
SetTextAlign(tview.AlignCenter). SetTextAlign(tview.AlignCenter).
SetDynamicColors(true). SetDynamicColors(true).
@@ -176,17 +182,47 @@ func showToast(title, message string) {
// Generate a unique page name (e.g., using timestamp) to allow multiple toasts. // Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano()) pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
pages.AddPage(pageName, background, true, true) pages.AddPage(pageName, background, true, true)
// Autodismiss after 3 seconds. // Autodismiss after 2 seconds, since blocking is more annoying
time.AfterFunc(3*time.Second, func() { time.AfterFunc(2*time.Second, func() {
app.QueueUpdateDraw(func() { app.QueueUpdateDraw(func() {
if pages.HasPage(pageName) { if pages.HasPage(pageName) {
pages.RemovePage(pageName) pages.RemovePage(pageName)
} }
}) })
}) })
return
}
notificationWidget.SetTitle(title)
notificationWidget.SetText(fmt.Sprintf("[yellow]%s[-]", message))
go func() {
app.QueueUpdateDraw(func() {
flex.RemoveItem(bottomFlex)
flex.RemoveItem(statusLineWidget)
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
AddItem(textArea, 0, 1, true).
AddItem(notificationWidget, 40, 1, false)
flex.AddItem(bottomFlex, 0, 10, true)
if positionVisible {
flex.AddItem(statusLineWidget, 0, 2, false)
}
})
}()
toastTimer = time.AfterFunc(3*time.Second, func() {
app.QueueUpdateDraw(func() {
flex.RemoveItem(bottomFlex)
flex.RemoveItem(statusLineWidget)
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
AddItem(textArea, 0, 1, true).
AddItem(notificationWidget, 0, 0, false)
flex.AddItem(bottomFlex, 0, 10, true)
if positionVisible {
flex.AddItem(statusLineWidget, 0, 2, false)
}
})
})
} }
func init() { func initTUI() {
// Start background goroutine to update model color cache // Start background goroutine to update model color cache
startModelColorUpdater() startModelColorUpdater()
tview.Styles = colorschemes["default"] tview.Styles = colorschemes["default"]
@@ -235,7 +271,7 @@ func init() {
shellHistoryPos = -1 shellHistoryPos = -1
} }
// Handle Tab key for @ file completion // Handle Tab key for @ file completion
if event.Key() == tcell.KeyTab { if event.Key() == tcell.KeyTab && shellMode {
currentText := shellInput.GetText() currentText := shellInput.GetText()
atIndex := strings.LastIndex(currentText, "@") atIndex := strings.LastIndex(currentText, "@")
if atIndex >= 0 { if atIndex >= 0 {
@@ -286,12 +322,26 @@ func init() {
SetDynamicColors(true). SetDynamicColors(true).
SetRegions(true). SetRegions(true).
SetChangedFunc(func() { SetChangedFunc(func() {
// INFO:
// https://github.com/rivo/tview/wiki/Concurrency#event-handlers
// although already called by default per tview specs
// calling it explicitly makes text streaming to look more smooth
app.Draw() app.Draw()
}) })
notificationWidget = tview.NewTextView().
SetTextAlign(tview.AlignCenter).
SetDynamicColors(true).
SetRegions(true).
SetChangedFunc(func() {
})
notificationWidget.SetBorder(true).SetTitle("notification")
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
AddItem(textArea, 0, 1, true).
AddItem(notificationWidget, 0, 0, false)
// //
flex = tview.NewFlex().SetDirection(tview.FlexRow). flex = tview.NewFlex().SetDirection(tview.FlexRow).
AddItem(textView, 0, 40, false). AddItem(textView, 0, 40, false).
AddItem(textArea, 0, 10, true) // Restore original height AddItem(bottomFlex, 0, 10, true)
if positionVisible { if positionVisible {
flex.AddItem(statusLineWidget, 0, 2, false) flex.AddItem(statusLineWidget, 0, 2, false)
} }
@@ -360,10 +410,14 @@ func init() {
// y += h / 2 // y += h / 2
// return x, y, w, h // return x, y, w, h
// }) // })
notificationWidget.SetDrawFunc(func(screen tcell.Screen, x, y, w, h int) (int, int, int, int) {
y += h / 2
return x, y, w, h
})
// Initially set up flex without search bar // Initially set up flex without search bar
flex = tview.NewFlex().SetDirection(tview.FlexRow). flex = tview.NewFlex().SetDirection(tview.FlexRow).
AddItem(textView, 0, 40, false). AddItem(textView, 0, 40, false).
AddItem(textArea, 0, 10, true) // Restore original height AddItem(bottomFlex, 0, 10, true)
if positionVisible { if positionVisible {
flex.AddItem(statusLineWidget, 0, 2, false) flex.AddItem(statusLineWidget, 0, 2, false)
} }
@@ -578,7 +632,7 @@ func init() {
updateStatusLine() updateStatusLine()
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys)) textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
colorText() colorText()
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
// init sysmap // init sysmap
@@ -607,9 +661,9 @@ func init() {
} }
if event.Key() == tcell.KeyRune && event.Rune() == '2' && event.Modifiers()&tcell.ModAlt != 0 { if event.Key() == tcell.KeyRune && event.Rune() == '2' && event.Modifiers()&tcell.ModAlt != 0 {
// toggle auto-scrolling // toggle auto-scrolling
scrollToEndEnabled = !scrollToEndEnabled cfg.AutoScrollEnabled = !cfg.AutoScrollEnabled
status := "disabled" status := "disabled"
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
status = "enabled" status = "enabled"
} }
showToast("autoscroll", "Auto-scrolling "+status) showToast("autoscroll", "Auto-scrolling "+status)
@@ -676,7 +730,7 @@ func init() {
updateStatusLine() updateStatusLine()
return nil return nil
} }
if event.Key() == tcell.KeyF2 && !botRespMode { if event.Key() == tcell.KeyF2 && !botRespMode.Load() {
// regen last msg // regen last msg
if len(chatBody.Messages) == 0 { if len(chatBody.Messages) == 0 {
showToast("info", "no messages to regenerate") showToast("info", "no messages to regenerate")
@@ -693,7 +747,7 @@ func init() {
chatRoundChan <- &models.ChatRoundReq{Role: cfg.UserRole, Regen: true} chatRoundChan <- &models.ChatRoundReq{Role: cfg.UserRole, Regen: true}
return nil return nil
} }
if event.Key() == tcell.KeyF3 && !botRespMode { if event.Key() == tcell.KeyF3 && !botRespMode.Load() {
// delete last msg // delete last msg
// check textarea text; if it ends with bot icon delete only icon: // check textarea text; if it ends with bot icon delete only icon:
text := textView.GetText(true) text := textView.GetText(true)
@@ -749,9 +803,9 @@ func init() {
return nil return nil
} }
if event.Key() == tcell.KeyF6 { if event.Key() == tcell.KeyF6 {
interruptResp = true interruptResp.Store(true)
botRespMode = false botRespMode.Store(false)
toolRunningMode = false toolRunningMode.Store(false)
return nil return nil
} }
if event.Key() == tcell.KeyF7 { if event.Key() == tcell.KeyF7 {
@@ -1046,7 +1100,7 @@ func init() {
return nil return nil
} }
// cannot send msg in editMode or botRespMode // cannot send msg in editMode or botRespMode
if event.Key() == tcell.KeyEscape && !editMode && !botRespMode { if event.Key() == tcell.KeyEscape && !editMode && !botRespMode.Load() {
if shellMode { if shellMode {
cmdText := shellInput.GetText() cmdText := shellInput.GetText()
if cmdText != "" { if cmdText != "" {
@@ -1083,7 +1137,7 @@ func init() {
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n", fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
nl, len(chatBody.Messages), persona, msgText) nl, len(chatBody.Messages), persona, msgText)
textArea.SetText("", true) textArea.SetText("", true)
if scrollToEndEnabled { if cfg.AutoScrollEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
colorText() colorText()
@@ -1095,7 +1149,7 @@ func init() {
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText} chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
return nil return nil
} }
if event.Key() == tcell.KeyTab { if event.Key() == tcell.KeyTab && !shellMode {
currentF := app.GetFocus() currentF := app.GetFocus()
if currentF == textArea { if currentF == textArea {
currentText := textArea.GetText() currentText := textArea.GetText()
@@ -1112,9 +1166,10 @@ func init() {
app.SetFocus(focusSwitcher[currentF]) app.SetFocus(focusSwitcher[currentF])
return nil return nil
} }
if isASCII(string(event.Rune())) && !botRespMode { if isASCII(string(event.Rune())) && !botRespMode.Load() {
return event return event
} }
return event return event
}) })
go updateModelLists()
} }