Compare commits
1 Commits
fix/datara
...
feat/agent
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2687f38d00 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -3,8 +3,6 @@
|
|||||||
testlog
|
testlog
|
||||||
history/
|
history/
|
||||||
*.db
|
*.db
|
||||||
*.db-shm
|
|
||||||
*.db-wal
|
|
||||||
config.toml
|
config.toml
|
||||||
sysprompts/*
|
sysprompts/*
|
||||||
!sysprompts/alice_bob_carl.json
|
!sysprompts/alice_bob_carl.json
|
||||||
@@ -17,4 +15,3 @@ gflt
|
|||||||
chat_exports/*.json
|
chat_exports/*.json
|
||||||
ragimport
|
ragimport
|
||||||
.env
|
.env
|
||||||
onnx/
|
|
||||||
|
|||||||
101
Makefile
101
Makefile
@@ -1,4 +1,4 @@
|
|||||||
.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve fetch-onnx install-onnx-deps
|
.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve
|
||||||
|
|
||||||
run: setconfig
|
run: setconfig
|
||||||
go build -tags extra -o gf-lt && ./gf-lt
|
go build -tags extra -o gf-lt && ./gf-lt
|
||||||
@@ -30,105 +30,6 @@ lint: ## Run linters. Use make install-linters first.
|
|||||||
lintall: lint
|
lintall: lint
|
||||||
noblanks ./...
|
noblanks ./...
|
||||||
|
|
||||||
fetch-onnx:
|
|
||||||
mkdir -p onnx/embedgemma && curl -o onnx/embedgemma/config.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/config.json && curl -o onnx/embedgemma/tokenizer.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/tokenizer.json && curl -o onnx/embedgemma/model_q4.onnx -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx && curl -o onnx/embedgemma/model_q4.onnx_data -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx_data?download=true
|
|
||||||
|
|
||||||
install-onnx-deps: ## Install ONNX Runtime with CUDA support (or CPU fallback)
|
|
||||||
@echo "=== ONNX Runtime Installer ===" && \
|
|
||||||
echo "" && \
|
|
||||||
echo "Checking for existing ONNX Runtime..." && \
|
|
||||||
if ldconfig -p 2>/dev/null | grep -q libonnxruntime.so.1; then \
|
|
||||||
echo "ONNX Runtime is already installed:" && \
|
|
||||||
ldconfig -p 2>/dev/null | grep libonnxruntime && \
|
|
||||||
echo "" && \
|
|
||||||
echo "Skipping installation. To reinstall, remove existing libs first:" && \
|
|
||||||
echo " sudo rm -f /usr/local/lib/libonnxruntime*.so*" && \
|
|
||||||
exit 0; \
|
|
||||||
fi && \
|
|
||||||
echo "No ONNX Runtime found. Proceeding with installation..." && \
|
|
||||||
echo "" && \
|
|
||||||
echo "Detecting CUDA version..." && \
|
|
||||||
HAS_CUDA=0 && \
|
|
||||||
if command -v nvidia-smi >/dev/null 2>&1; then \
|
|
||||||
CUDA_INFO=$$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1) && \
|
|
||||||
if [ -n "$$CUDA_INFO" ]; then \
|
|
||||||
echo "Found NVIDIA GPU with driver: $$CUDA_INFO" && \
|
|
||||||
HAS_CUDA=1; \
|
|
||||||
else \
|
|
||||||
echo "NVIDIA driver found but could not detect CUDA version"; \
|
|
||||||
fi; \
|
|
||||||
else \
|
|
||||||
echo "No NVIDIA GPU detected (nvidia-smi not found)"; \
|
|
||||||
fi && \
|
|
||||||
echo "" && \
|
|
||||||
echo "Determining ONNX Runtime version..." && \
|
|
||||||
ARCH=$$(uname -m) && \
|
|
||||||
if [ "$$ARCH" = "x86_64" ]; then \
|
|
||||||
ONNX_ARCH="x64"; \
|
|
||||||
elif [ "$$ARCH" = "aarch64" ] || [ "$$ARCH" = "arm64" ]; then \
|
|
||||||
ONNX_ARCH="aarch64"; \
|
|
||||||
else \
|
|
||||||
echo "Unsupported architecture: $$ARCH" && \
|
|
||||||
exit 1; \
|
|
||||||
fi && \
|
|
||||||
echo "Detected architecture: $$ARCH (ONNX runtime: $$ONNX_ARCH)" && \
|
|
||||||
if [ "$$HAS_CUDA" = "1" ]; then \
|
|
||||||
echo "Installing ONNX Runtime with CUDA support..."; \
|
|
||||||
ONNX_VERSION="1.24.2"; \
|
|
||||||
else \
|
|
||||||
echo "Installing ONNX Runtime (CPU version)..."; \
|
|
||||||
ONNX_VERSION="1.24.2"; \
|
|
||||||
fi && \
|
|
||||||
FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \
|
|
||||||
URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \
|
|
||||||
echo "Downloading $${URL}..." && \
|
|
||||||
mkdir -p /tmp/onnx-install && \
|
|
||||||
curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \
|
|
||||||
echo "Failed to download ONNX Runtime v$${ONNX_VERSION}. Trying v1.18.0..." && \
|
|
||||||
ONNX_VERSION="1.18.0" && \
|
|
||||||
FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \
|
|
||||||
URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \
|
|
||||||
curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \
|
|
||||||
echo "ERROR: Failed to download ONNX Runtime from GitHub" && \
|
|
||||||
echo "" && \
|
|
||||||
echo "Please install manually:" && \
|
|
||||||
echo " 1. Go to https://github.com/microsoft/onnxruntime/releases" && \
|
|
||||||
echo " 2. Download onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \
|
|
||||||
echo " 3. Extract and copy to /usr/local/lib:" && \
|
|
||||||
echo " tar -xzf onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \
|
|
||||||
echo " sudo cp -r onnxruntime-linux-$${ONNX_ARCH}-VERSION/lib/* /usr/local/lib/" && \
|
|
||||||
echo " sudo ldconfig" && \
|
|
||||||
exit 1; \
|
|
||||||
}; \
|
|
||||||
} && \
|
|
||||||
echo "Extracting..." && \
|
|
||||||
cd /tmp/onnx-install && tar -xzf $${FILENAME} && \
|
|
||||||
echo "Installing to /usr/local/lib..." && \
|
|
||||||
ONNX_DIR=$$(find /tmp/onnx-install -maxdepth 1 -type d -name "onnxruntime-linux-*") && \
|
|
||||||
if [ -d "$${ONNX_DIR}/lib" ]; then \
|
|
||||||
cp -r $${ONNX_DIR}/lib/* /usr/local/lib/ 2>/dev/null || sudo cp -r $${ONNX_DIR}/lib/* /usr/local/lib/; \
|
|
||||||
else \
|
|
||||||
echo "ERROR: Could not find lib directory in extracted archive" && \
|
|
||||||
exit 1; \
|
|
||||||
fi && \
|
|
||||||
echo "Updating library cache..." && \
|
|
||||||
sudo ldconfig 2>/dev/null || ldconfig && \
|
|
||||||
echo "" && \
|
|
||||||
echo "=== Installation complete! ===" && \
|
|
||||||
echo "" && \
|
|
||||||
echo "Installed libraries:" && \
|
|
||||||
ldconfig -p | grep libonnxruntime || echo "(libraries may require logout/relogin to appear)" && \
|
|
||||||
echo "" && \
|
|
||||||
if [ "$$HAS_CUDA" = "1" ]; then \
|
|
||||||
echo "NOTE: CUDA-enabled ONNX Runtime installed."; \
|
|
||||||
echo "Ensure you also have CUDA libraries installed:"; \
|
|
||||||
echo " - libcudnn, libcublas, libcurand"; \
|
|
||||||
else \
|
|
||||||
echo "NOTE: CPU-only ONNX Runtime installed."; \
|
|
||||||
echo "For GPU support, install CUDA and re-run this script."; \
|
|
||||||
fi && \
|
|
||||||
rm -rf /tmp/onnx-install
|
|
||||||
|
|
||||||
# Whisper STT Setup (in batteries directory)
|
# Whisper STT Setup (in batteries directory)
|
||||||
setup-whisper: build-whisper download-whisper-model
|
setup-whisper: build-whisper download-whisper-model
|
||||||
|
|
||||||
|
|||||||
@@ -38,8 +38,3 @@ func RegisterA(toolNames []string, a AgenterA) {
|
|||||||
func Get(toolName string) AgenterB {
|
func Get(toolName string) AgenterB {
|
||||||
return RegistryB[toolName]
|
return RegistryB[toolName]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register is a convenience wrapper for RegisterB.
|
|
||||||
func Register(toolName string, a AgenterB) {
|
|
||||||
RegisterB(toolName, a)
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -32,10 +32,10 @@ func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool)
|
|||||||
type AgentClient struct {
|
type AgentClient struct {
|
||||||
cfg *config.Config
|
cfg *config.Config
|
||||||
getToken func() string
|
getToken func() string
|
||||||
log slog.Logger
|
log *slog.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewAgentClient(cfg *config.Config, log slog.Logger, gt func() string) *AgentClient {
|
func NewAgentClient(cfg *config.Config, log *slog.Logger, gt func() string) *AgentClient {
|
||||||
return &AgentClient{
|
return &AgentClient{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
getToken: gt,
|
getToken: gt,
|
||||||
@@ -44,7 +44,7 @@ func NewAgentClient(cfg *config.Config, log slog.Logger, gt func() string) *Agen
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (ag *AgentClient) Log() *slog.Logger {
|
func (ag *AgentClient) Log() *slog.Logger {
|
||||||
return &ag.log
|
return ag.log
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ag *AgentClient) FormMsg(sysprompt, msg string) (io.Reader, error) {
|
func (ag *AgentClient) FormMsg(sysprompt, msg string) (io.Reader, error) {
|
||||||
@@ -63,11 +63,9 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
|
|||||||
{Role: "system", Content: sysprompt},
|
{Role: "system", Content: sysprompt},
|
||||||
{Role: "user", Content: msg},
|
{Role: "user", Content: msg},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine API type
|
// Determine API type
|
||||||
isCompletion, isChat, isDeepSeek, isOpenRouter := detectAPI(api)
|
isCompletion, isChat, isDeepSeek, isOpenRouter := detectAPI(api)
|
||||||
ag.log.Debug("agent building request", "api", api, "isCompletion", isCompletion, "isChat", isChat, "isDeepSeek", isDeepSeek, "isOpenRouter", isOpenRouter)
|
ag.log.Debug("agent building request", "api", api, "isCompletion", isCompletion, "isChat", isChat, "isDeepSeek", isDeepSeek, "isOpenRouter", isOpenRouter)
|
||||||
|
|
||||||
// Build prompt for completion endpoints
|
// Build prompt for completion endpoints
|
||||||
if isCompletion {
|
if isCompletion {
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
@@ -76,7 +74,6 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
|
|||||||
sb.WriteString("\n")
|
sb.WriteString("\n")
|
||||||
}
|
}
|
||||||
prompt := strings.TrimSpace(sb.String())
|
prompt := strings.TrimSpace(sb.String())
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case isDeepSeek:
|
case isDeepSeek:
|
||||||
// DeepSeek completion
|
// DeepSeek completion
|
||||||
@@ -95,7 +92,6 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
|
|||||||
return json.Marshal(req)
|
return json.Marshal(req)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Chat completions endpoints
|
// Chat completions endpoints
|
||||||
if isChat || !isCompletion {
|
if isChat || !isCompletion {
|
||||||
chatBody := &models.ChatBody{
|
chatBody := &models.ChatBody{
|
||||||
@@ -103,7 +99,6 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
|
|||||||
Stream: false, // Agents don't need streaming
|
Stream: false, // Agents don't need streaming
|
||||||
Messages: messages,
|
Messages: messages,
|
||||||
}
|
}
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case isDeepSeek:
|
case isDeepSeek:
|
||||||
// DeepSeek chat
|
// DeepSeek chat
|
||||||
@@ -122,7 +117,6 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
|
|||||||
return json.Marshal(req)
|
return json.Marshal(req)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback (should not reach here)
|
// Fallback (should not reach here)
|
||||||
ag.log.Warn("unknown API, using default chat completions format", "api", api)
|
ag.log.Warn("unknown API, using default chat completions format", "api", api)
|
||||||
chatBody := &models.ChatBody{
|
chatBody := &models.ChatBody{
|
||||||
@@ -165,7 +159,6 @@ func (ag *AgentClient) LLMRequest(body io.Reader) ([]byte, error) {
|
|||||||
ag.log.Error("agent LLM request failed", "status", resp.StatusCode, "response", string(responseBytes[:min(len(responseBytes), 1000)]))
|
ag.log.Error("agent LLM request failed", "status", resp.StatusCode, "response", string(responseBytes[:min(len(responseBytes), 1000)]))
|
||||||
return responseBytes, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(responseBytes[:min(len(responseBytes), 200)]))
|
return responseBytes, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(responseBytes[:min(len(responseBytes), 200)]))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse response and extract text content
|
// Parse response and extract text content
|
||||||
text, err := extractTextFromResponse(responseBytes)
|
text, err := extractTextFromResponse(responseBytes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -179,17 +172,16 @@ func (ag *AgentClient) LLMRequest(body io.Reader) ([]byte, error) {
|
|||||||
// extractTextFromResponse parses common LLM response formats and extracts the text content.
|
// extractTextFromResponse parses common LLM response formats and extracts the text content.
|
||||||
func extractTextFromResponse(data []byte) (string, error) {
|
func extractTextFromResponse(data []byte) (string, error) {
|
||||||
// Try to parse as generic JSON first
|
// Try to parse as generic JSON first
|
||||||
var genericResp map[string]interface{}
|
var genericResp map[string]any
|
||||||
if err := json.Unmarshal(data, &genericResp); err != nil {
|
if err := json.Unmarshal(data, &genericResp); err != nil {
|
||||||
// Not JSON, return as string
|
// Not JSON, return as string
|
||||||
return string(data), nil
|
return string(data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for OpenAI chat completion format
|
// Check for OpenAI chat completion format
|
||||||
if choices, ok := genericResp["choices"].([]interface{}); ok && len(choices) > 0 {
|
if choices, ok := genericResp["choices"].([]any); ok && len(choices) > 0 {
|
||||||
if firstChoice, ok := choices[0].(map[string]interface{}); ok {
|
if firstChoice, ok := choices[0].(map[string]any); ok {
|
||||||
// Chat completion: choices[0].message.content
|
// Chat completion: choices[0].message.content
|
||||||
if message, ok := firstChoice["message"].(map[string]interface{}); ok {
|
if message, ok := firstChoice["message"].(map[string]any); ok {
|
||||||
if content, ok := message["content"].(string); ok {
|
if content, ok := message["content"].(string); ok {
|
||||||
return content, nil
|
return content, nil
|
||||||
}
|
}
|
||||||
@@ -199,19 +191,17 @@ func extractTextFromResponse(data []byte) (string, error) {
|
|||||||
return text, nil
|
return text, nil
|
||||||
}
|
}
|
||||||
// Delta format for streaming (should not happen with stream: false)
|
// Delta format for streaming (should not happen with stream: false)
|
||||||
if delta, ok := firstChoice["delta"].(map[string]interface{}); ok {
|
if delta, ok := firstChoice["delta"].(map[string]any); ok {
|
||||||
if content, ok := delta["content"].(string); ok {
|
if content, ok := delta["content"].(string); ok {
|
||||||
return content, nil
|
return content, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for llama.cpp completion format
|
// Check for llama.cpp completion format
|
||||||
if content, ok := genericResp["content"].(string); ok {
|
if content, ok := genericResp["content"].(string); ok {
|
||||||
return content, nil
|
return content, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unknown format, return pretty-printed JSON
|
// Unknown format, return pretty-printed JSON
|
||||||
prettyJSON, err := json.MarshalIndent(genericResp, "", " ")
|
prettyJSON, err := json.MarshalIndent(genericResp, "", " ")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -219,10 +209,3 @@ func extractTextFromResponse(data []byte) (string, error) {
|
|||||||
}
|
}
|
||||||
return string(prettyJSON), nil
|
return string(prettyJSON), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func min(a, b int) int {
|
|
||||||
if a < b {
|
|
||||||
return a
|
|
||||||
}
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|||||||
172
bot.go
172
bot.go
@@ -22,7 +22,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync/atomic"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ var (
|
|||||||
chunkChan = make(chan string, 10)
|
chunkChan = make(chan string, 10)
|
||||||
openAIToolChan = make(chan string, 10)
|
openAIToolChan = make(chan string, 10)
|
||||||
streamDone = make(chan bool, 1)
|
streamDone = make(chan bool, 1)
|
||||||
chatBody *models.SafeChatBody
|
chatBody *models.ChatBody
|
||||||
store storage.FullRepo
|
store storage.FullRepo
|
||||||
defaultFirstMsg = "Hello! What can I do for you?"
|
defaultFirstMsg = "Hello! What can I do for you?"
|
||||||
defaultStarter = []models.RoleMsg{}
|
defaultStarter = []models.RoleMsg{}
|
||||||
@@ -49,6 +49,7 @@ var (
|
|||||||
//nolint:unused // TTS_ENABLED conditionally uses this
|
//nolint:unused // TTS_ENABLED conditionally uses this
|
||||||
orator Orator
|
orator Orator
|
||||||
asr STT
|
asr STT
|
||||||
|
localModelsMu sync.RWMutex
|
||||||
defaultLCPProps = map[string]float32{
|
defaultLCPProps = map[string]float32{
|
||||||
"temperature": 0.8,
|
"temperature": 0.8,
|
||||||
"dry_multiplier": 0.0,
|
"dry_multiplier": 0.0,
|
||||||
@@ -63,17 +64,11 @@ var (
|
|||||||
"google/gemma-3-27b-it:free",
|
"google/gemma-3-27b-it:free",
|
||||||
"meta-llama/llama-3.3-70b-instruct:free",
|
"meta-llama/llama-3.3-70b-instruct:free",
|
||||||
}
|
}
|
||||||
LocalModels atomic.Value // stores []string
|
LocalModels = []string{}
|
||||||
localModelsData atomic.Value // stores *models.LCPModels
|
localModelsData *models.LCPModels
|
||||||
orModelsData atomic.Value // stores *models.ORModels
|
orModelsData *models.ORModels
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
|
||||||
LocalModels.Store([]string{})
|
|
||||||
localModelsData.Store((*models.LCPModels)(nil))
|
|
||||||
orModelsData.Store((*models.ORModels)(nil))
|
|
||||||
}
|
|
||||||
|
|
||||||
var thinkBlockRE = regexp.MustCompile(`(?s)<think>.*?</think>`)
|
var thinkBlockRE = regexp.MustCompile(`(?s)<think>.*?</think>`)
|
||||||
|
|
||||||
// parseKnownToTag extracts known_to list from content using configured tag.
|
// parseKnownToTag extracts known_to list from content using configured tag.
|
||||||
@@ -267,13 +262,13 @@ func warmUpModel() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Check if model is already loaded
|
// Check if model is already loaded
|
||||||
loaded, err := isModelLoaded(chatBody.GetModel())
|
loaded, err := isModelLoaded(chatBody.Model)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Debug("failed to check model status", "model", chatBody.GetModel(), "error", err)
|
logger.Debug("failed to check model status", "model", chatBody.Model, "error", err)
|
||||||
// Continue with warmup attempt anyway
|
// Continue with warmup attempt anyway
|
||||||
}
|
}
|
||||||
if loaded {
|
if loaded {
|
||||||
showToast("model already loaded", "Model "+chatBody.GetModel()+" is already loaded.")
|
showToast("model already loaded", "Model "+chatBody.Model+" is already loaded.")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
go func() {
|
go func() {
|
||||||
@@ -282,7 +277,7 @@ func warmUpModel() {
|
|||||||
switch {
|
switch {
|
||||||
case strings.HasSuffix(cfg.CurrentAPI, "/completion"):
|
case strings.HasSuffix(cfg.CurrentAPI, "/completion"):
|
||||||
// Old completion endpoint
|
// Old completion endpoint
|
||||||
req := models.NewLCPReq(".", chatBody.GetModel(), nil, map[string]float32{
|
req := models.NewLCPReq(".", chatBody.Model, nil, map[string]float32{
|
||||||
"temperature": 0.8,
|
"temperature": 0.8,
|
||||||
"dry_multiplier": 0.0,
|
"dry_multiplier": 0.0,
|
||||||
"min_p": 0.05,
|
"min_p": 0.05,
|
||||||
@@ -294,7 +289,7 @@ func warmUpModel() {
|
|||||||
// OpenAI-compatible chat endpoint
|
// OpenAI-compatible chat endpoint
|
||||||
req := models.OpenAIReq{
|
req := models.OpenAIReq{
|
||||||
ChatBody: &models.ChatBody{
|
ChatBody: &models.ChatBody{
|
||||||
Model: chatBody.GetModel(),
|
Model: chatBody.Model,
|
||||||
Messages: []models.RoleMsg{
|
Messages: []models.RoleMsg{
|
||||||
{Role: "system", Content: "."},
|
{Role: "system", Content: "."},
|
||||||
},
|
},
|
||||||
@@ -318,7 +313,7 @@ func warmUpModel() {
|
|||||||
}
|
}
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
// Start monitoring for model load completion
|
// Start monitoring for model load completion
|
||||||
monitorModelLoad(chatBody.GetModel())
|
monitorModelLoad(chatBody.Model)
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -361,7 +356,7 @@ func fetchORModels(free bool) ([]string, error) {
|
|||||||
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
|
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
orModelsData.Store(data)
|
orModelsData = data
|
||||||
freeModels := data.ListModels(free)
|
freeModels := data.ListModels(free)
|
||||||
return freeModels, nil
|
return freeModels, nil
|
||||||
}
|
}
|
||||||
@@ -423,7 +418,7 @@ func fetchLCPModelsWithStatus() (*models.LCPModels, error) {
|
|||||||
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
|
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
localModelsData.Store(data)
|
localModelsData = data
|
||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -826,10 +821,10 @@ func chatRound(r *models.ChatRoundReq) error {
|
|||||||
}
|
}
|
||||||
go sendMsgToLLM(reader)
|
go sendMsgToLLM(reader)
|
||||||
logger.Debug("looking at vars in chatRound", "msg", r.UserMsg, "regen", r.Regen, "resume", r.Resume)
|
logger.Debug("looking at vars in chatRound", "msg", r.UserMsg, "regen", r.Regen, "resume", r.Resume)
|
||||||
msgIdx := chatBody.GetMessageCount()
|
msgIdx := len(chatBody.Messages)
|
||||||
if !r.Resume {
|
if !r.Resume {
|
||||||
// Add empty message to chatBody immediately so it persists during Alt+T toggle
|
// Add empty message to chatBody immediately so it persists during Alt+T toggle
|
||||||
chatBody.AppendMessage(models.RoleMsg{
|
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{
|
||||||
Role: botPersona, Content: "",
|
Role: botPersona, Content: "",
|
||||||
})
|
})
|
||||||
nl := "\n\n"
|
nl := "\n\n"
|
||||||
@@ -841,7 +836,7 @@ func chatRound(r *models.ChatRoundReq) error {
|
|||||||
}
|
}
|
||||||
fmt.Fprintf(textView, "%s[-:-:b](%d) %s[-:-:-]\n", nl, msgIdx, roleToIcon(botPersona))
|
fmt.Fprintf(textView, "%s[-:-:b](%d) %s[-:-:-]\n", nl, msgIdx, roleToIcon(botPersona))
|
||||||
} else {
|
} else {
|
||||||
msgIdx = chatBody.GetMessageCount() - 1
|
msgIdx = len(chatBody.Messages) - 1
|
||||||
}
|
}
|
||||||
respText := strings.Builder{}
|
respText := strings.Builder{}
|
||||||
toolResp := strings.Builder{}
|
toolResp := strings.Builder{}
|
||||||
@@ -898,10 +893,7 @@ out:
|
|||||||
fmt.Fprint(textView, chunk)
|
fmt.Fprint(textView, chunk)
|
||||||
respText.WriteString(chunk)
|
respText.WriteString(chunk)
|
||||||
// Update the message in chatBody.Messages so it persists during Alt+T
|
// Update the message in chatBody.Messages so it persists during Alt+T
|
||||||
chatBody.UpdateMessageFunc(msgIdx, func(msg models.RoleMsg) models.RoleMsg {
|
chatBody.Messages[msgIdx].Content = respText.String()
|
||||||
msg.Content = respText.String()
|
|
||||||
return msg
|
|
||||||
})
|
|
||||||
if scrollToEndEnabled {
|
if scrollToEndEnabled {
|
||||||
textView.ScrollToEnd()
|
textView.ScrollToEnd()
|
||||||
}
|
}
|
||||||
@@ -944,32 +936,29 @@ out:
|
|||||||
}
|
}
|
||||||
botRespMode = false
|
botRespMode = false
|
||||||
if r.Resume {
|
if r.Resume {
|
||||||
chatBody.UpdateMessageFunc(chatBody.GetMessageCount()-1, func(msg models.RoleMsg) models.RoleMsg {
|
chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String()
|
||||||
msg.Content += respText.String()
|
updatedMsg := chatBody.Messages[len(chatBody.Messages)-1]
|
||||||
processedMsg := processMessageTag(&msg)
|
processedMsg := processMessageTag(&updatedMsg)
|
||||||
if msgStats != nil && processedMsg.Role != cfg.ToolRole {
|
chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg
|
||||||
processedMsg.Stats = msgStats
|
if msgStats != nil && chatBody.Messages[len(chatBody.Messages)-1].Role != cfg.ToolRole {
|
||||||
}
|
chatBody.Messages[len(chatBody.Messages)-1].Stats = msgStats
|
||||||
return *processedMsg
|
}
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
chatBody.UpdateMessageFunc(msgIdx, func(msg models.RoleMsg) models.RoleMsg {
|
chatBody.Messages[msgIdx].Content = respText.String()
|
||||||
msg.Content = respText.String()
|
processedMsg := processMessageTag(&chatBody.Messages[msgIdx])
|
||||||
processedMsg := processMessageTag(&msg)
|
chatBody.Messages[msgIdx] = *processedMsg
|
||||||
if msgStats != nil && processedMsg.Role != cfg.ToolRole {
|
if msgStats != nil && chatBody.Messages[msgIdx].Role != cfg.ToolRole {
|
||||||
processedMsg.Stats = msgStats
|
chatBody.Messages[msgIdx].Stats = msgStats
|
||||||
}
|
}
|
||||||
return *processedMsg
|
stopTTSIfNotForUser(&chatBody.Messages[msgIdx])
|
||||||
})
|
|
||||||
stopTTSIfNotForUser(&chatBody.GetMessages()[msgIdx])
|
|
||||||
}
|
}
|
||||||
cleanChatBody()
|
cleanChatBody()
|
||||||
refreshChatDisplay()
|
refreshChatDisplay()
|
||||||
updateStatusLine()
|
updateStatusLine()
|
||||||
// bot msg is done;
|
// bot msg is done;
|
||||||
// now check it for func call
|
// now check it for func call
|
||||||
// logChat(activeChatName, chatBody.GetMessages())
|
// logChat(activeChatName, chatBody.Messages)
|
||||||
if err := updateStorageChat(activeChatName, chatBody.GetMessages()); err != nil {
|
if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
|
||||||
logger.Warn("failed to update storage", "error", err, "name", activeChatName)
|
logger.Warn("failed to update storage", "error", err, "name", activeChatName)
|
||||||
}
|
}
|
||||||
// Strip think blocks before parsing for tool calls
|
// Strip think blocks before parsing for tool calls
|
||||||
@@ -984,8 +973,8 @@ out:
|
|||||||
// If so, trigger those characters to respond if that char is not controlled by user
|
// If so, trigger those characters to respond if that char is not controlled by user
|
||||||
// perhaps we should have narrator role to determine which char is next to act
|
// perhaps we should have narrator role to determine which char is next to act
|
||||||
if cfg.AutoTurn {
|
if cfg.AutoTurn {
|
||||||
lastMsg, ok := chatBody.GetLastMessage()
|
lastMsg := chatBody.Messages[len(chatBody.Messages)-1]
|
||||||
if ok && len(lastMsg.KnownTo) > 0 {
|
if len(lastMsg.KnownTo) > 0 {
|
||||||
triggerPrivateMessageResponses(&lastMsg)
|
triggerPrivateMessageResponses(&lastMsg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -994,15 +983,13 @@ out:
|
|||||||
|
|
||||||
// cleanChatBody removes messages with null or empty content to prevent API issues
|
// cleanChatBody removes messages with null or empty content to prevent API issues
|
||||||
func cleanChatBody() {
|
func cleanChatBody() {
|
||||||
if chatBody == nil || chatBody.GetMessageCount() == 0 {
|
if chatBody == nil || chatBody.Messages == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Tool request cleaning is now configurable via AutoCleanToolCallsFromCtx (default false)
|
// Tool request cleaning is now configurable via AutoCleanToolCallsFromCtx (default false)
|
||||||
// /completion msg where part meant for user and other part tool call
|
// /completion msg where part meant for user and other part tool call
|
||||||
// chatBody.Messages = cleanToolCalls(chatBody.Messages)
|
// chatBody.Messages = cleanToolCalls(chatBody.Messages)
|
||||||
chatBody.WithLock(func(cb *models.ChatBody) {
|
chatBody.Messages = consolidateAssistantMessages(chatBody.Messages)
|
||||||
cb.Messages = consolidateAssistantMessages(cb.Messages)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertJSONToMapStringString unmarshals JSON into map[string]interface{} and converts all values to strings.
|
// convertJSONToMapStringString unmarshals JSON into map[string]interface{} and converts all values to strings.
|
||||||
@@ -1102,7 +1089,7 @@ func findCall(msg, toolCall string) bool {
|
|||||||
Content: fmt.Sprintf("Error processing tool call: %v. Please check the JSON format and try again.", err),
|
Content: fmt.Sprintf("Error processing tool call: %v. Please check the JSON format and try again.", err),
|
||||||
ToolCallID: lastToolCall.ID, // Use the stored tool call ID
|
ToolCallID: lastToolCall.ID, // Use the stored tool call ID
|
||||||
}
|
}
|
||||||
chatBody.AppendMessage(toolResponseMsg)
|
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
|
||||||
// Clear the stored tool call ID after using it (no longer needed)
|
// Clear the stored tool call ID after using it (no longer needed)
|
||||||
// Trigger the assistant to continue processing with the error message
|
// Trigger the assistant to continue processing with the error message
|
||||||
crr := &models.ChatRoundReq{
|
crr := &models.ChatRoundReq{
|
||||||
@@ -1139,7 +1126,7 @@ func findCall(msg, toolCall string) bool {
|
|||||||
Role: cfg.ToolRole,
|
Role: cfg.ToolRole,
|
||||||
Content: "Error processing tool call: no valid JSON found. Please check the JSON format.",
|
Content: "Error processing tool call: no valid JSON found. Please check the JSON format.",
|
||||||
}
|
}
|
||||||
chatBody.AppendMessage(toolResponseMsg)
|
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
|
||||||
crr := &models.ChatRoundReq{
|
crr := &models.ChatRoundReq{
|
||||||
Role: cfg.AssistantRole,
|
Role: cfg.AssistantRole,
|
||||||
}
|
}
|
||||||
@@ -1156,8 +1143,8 @@ func findCall(msg, toolCall string) bool {
|
|||||||
Role: cfg.ToolRole,
|
Role: cfg.ToolRole,
|
||||||
Content: fmt.Sprintf("Error processing tool call: %v. Please check the JSON format and try again.", err),
|
Content: fmt.Sprintf("Error processing tool call: %v. Please check the JSON format and try again.", err),
|
||||||
}
|
}
|
||||||
chatBody.AppendMessage(toolResponseMsg)
|
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
|
||||||
logger.Debug("findCall: added tool error response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "message_count_after_add", chatBody.GetMessageCount())
|
logger.Debug("findCall: added tool error response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "message_count_after_add", len(chatBody.Messages))
|
||||||
// Trigger the assistant to continue processing with the error message
|
// Trigger the assistant to continue processing with the error message
|
||||||
// chatRound("", cfg.AssistantRole, tv, false, false)
|
// chatRound("", cfg.AssistantRole, tv, false, false)
|
||||||
crr := &models.ChatRoundReq{
|
crr := &models.ChatRoundReq{
|
||||||
@@ -1175,23 +1162,17 @@ func findCall(msg, toolCall string) bool {
|
|||||||
// we got here => last msg recognized as a tool call (correct or not)
|
// we got here => last msg recognized as a tool call (correct or not)
|
||||||
// Use the tool call ID from streaming response (lastToolCall.ID)
|
// Use the tool call ID from streaming response (lastToolCall.ID)
|
||||||
// Don't generate random ID - the ID should match between assistant message and tool response
|
// Don't generate random ID - the ID should match between assistant message and tool response
|
||||||
lastMsgIdx := chatBody.GetMessageCount() - 1
|
lastMsgIdx := len(chatBody.Messages) - 1
|
||||||
if lastToolCall.ID != "" {
|
if lastToolCall.ID != "" {
|
||||||
chatBody.UpdateMessageFunc(lastMsgIdx, func(msg models.RoleMsg) models.RoleMsg {
|
chatBody.Messages[lastMsgIdx].ToolCallID = lastToolCall.ID
|
||||||
msg.ToolCallID = lastToolCall.ID
|
|
||||||
return msg
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
// Store tool call info in the assistant message
|
// Store tool call info in the assistant message
|
||||||
// Convert Args map to JSON string for storage
|
// Convert Args map to JSON string for storage
|
||||||
chatBody.UpdateMessageFunc(lastMsgIdx, func(msg models.RoleMsg) models.RoleMsg {
|
chatBody.Messages[lastMsgIdx].ToolCall = &models.ToolCall{
|
||||||
msg.ToolCall = &models.ToolCall{
|
ID: lastToolCall.ID,
|
||||||
ID: lastToolCall.ID,
|
Name: lastToolCall.Name,
|
||||||
Name: lastToolCall.Name,
|
Args: mapToString(lastToolCall.Args),
|
||||||
Args: mapToString(lastToolCall.Args),
|
}
|
||||||
}
|
|
||||||
return msg
|
|
||||||
})
|
|
||||||
// call a func
|
// call a func
|
||||||
_, ok := fnMap[fc.Name]
|
_, ok := fnMap[fc.Name]
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -1202,8 +1183,8 @@ func findCall(msg, toolCall string) bool {
|
|||||||
Content: m,
|
Content: m,
|
||||||
ToolCallID: lastToolCall.ID, // Use the stored tool call ID
|
ToolCallID: lastToolCall.ID, // Use the stored tool call ID
|
||||||
}
|
}
|
||||||
chatBody.AppendMessage(toolResponseMsg)
|
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
|
||||||
logger.Debug("findCall: added tool not implemented response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", chatBody.GetMessageCount())
|
logger.Debug("findCall: added tool not implemented response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
|
||||||
// Clear the stored tool call ID after using it
|
// Clear the stored tool call ID after using it
|
||||||
lastToolCall.ID = ""
|
lastToolCall.ID = ""
|
||||||
// Trigger the assistant to continue processing with the new tool response
|
// Trigger the assistant to continue processing with the new tool response
|
||||||
@@ -1274,9 +1255,9 @@ func findCall(msg, toolCall string) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
||||||
"\n\n", chatBody.GetMessageCount(), cfg.ToolRole, toolResponseMsg.GetText())
|
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
|
||||||
chatBody.AppendMessage(toolResponseMsg)
|
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
|
||||||
logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", chatBody.GetMessageCount())
|
logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
|
||||||
// Clear the stored tool call ID after using it
|
// Clear the stored tool call ID after using it
|
||||||
lastToolCall.ID = ""
|
lastToolCall.ID = ""
|
||||||
// Trigger the assistant to continue processing with the new tool response
|
// Trigger the assistant to continue processing with the new tool response
|
||||||
@@ -1406,29 +1387,28 @@ func charToStart(agentName string, keepSysP bool) bool {
|
|||||||
func updateModelLists() {
|
func updateModelLists() {
|
||||||
var err error
|
var err error
|
||||||
if cfg.OpenRouterToken != "" {
|
if cfg.OpenRouterToken != "" {
|
||||||
_, err := fetchORModels(true)
|
ORFreeModels, err = fetchORModels(true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warn("failed to fetch or models", "error", err)
|
logger.Warn("failed to fetch or models", "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if llama.cpp started after gf-lt?
|
// if llama.cpp started after gf-lt?
|
||||||
ml, err := fetchLCPModelsWithLoadStatus()
|
localModelsMu.Lock()
|
||||||
|
LocalModels, err = fetchLCPModelsWithLoadStatus()
|
||||||
|
localModelsMu.Unlock()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warn("failed to fetch llama.cpp models", "error", err)
|
logger.Warn("failed to fetch llama.cpp models", "error", err)
|
||||||
}
|
}
|
||||||
LocalModels.Store(ml)
|
|
||||||
for statusLineWidget == nil {
|
|
||||||
time.Sleep(time.Millisecond * 100)
|
|
||||||
}
|
|
||||||
// set already loaded model in llama.cpp
|
// set already loaded model in llama.cpp
|
||||||
if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") {
|
if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") {
|
||||||
modelList := LocalModels.Load().([]string)
|
localModelsMu.Lock()
|
||||||
for i := range modelList {
|
defer localModelsMu.Unlock()
|
||||||
if strings.Contains(modelList[i], models.LoadedMark) {
|
for i := range LocalModels {
|
||||||
m := strings.TrimPrefix(modelList[i], models.LoadedMark)
|
if strings.Contains(LocalModels[i], models.LoadedMark) {
|
||||||
|
m := strings.TrimPrefix(LocalModels[i], models.LoadedMark)
|
||||||
cfg.CurrentModel = m
|
cfg.CurrentModel = m
|
||||||
chatBody.Model = m
|
chatBody.Model = m
|
||||||
cachedModelColor.Store("green")
|
cachedModelColor = "green"
|
||||||
updateStatusLine()
|
updateStatusLine()
|
||||||
updateToolCapabilities()
|
updateToolCapabilities()
|
||||||
app.Draw()
|
app.Draw()
|
||||||
@@ -1439,17 +1419,21 @@ func updateModelLists() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func refreshLocalModelsIfEmpty() {
|
func refreshLocalModelsIfEmpty() {
|
||||||
models := LocalModels.Load().([]string)
|
localModelsMu.RLock()
|
||||||
if len(models) > 0 {
|
if len(LocalModels) > 0 {
|
||||||
|
localModelsMu.RUnlock()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
localModelsMu.RUnlock()
|
||||||
// try to fetch
|
// try to fetch
|
||||||
models, err := fetchLCPModels()
|
models, err := fetchLCPModels()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warn("failed to fetch llama.cpp models", "error", err)
|
logger.Warn("failed to fetch llama.cpp models", "error", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
LocalModels.Store(models)
|
localModelsMu.Lock()
|
||||||
|
LocalModels = models
|
||||||
|
localModelsMu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func summarizeAndStartNewChat() {
|
func summarizeAndStartNewChat() {
|
||||||
@@ -1509,20 +1493,14 @@ func init() {
|
|||||||
// load cards
|
// load cards
|
||||||
basicCard.Role = cfg.AssistantRole
|
basicCard.Role = cfg.AssistantRole
|
||||||
logLevel.Set(slog.LevelInfo)
|
logLevel.Set(slog.LevelInfo)
|
||||||
logger = slog.New(slog.NewTextHandler(logfile, &slog.HandlerOptions{Level: logLevel, AddSource: true}))
|
logger = slog.New(slog.NewTextHandler(logfile, &slog.HandlerOptions{Level: logLevel}))
|
||||||
store = storage.NewProviderSQL(cfg.DBPATH, logger)
|
store = storage.NewProviderSQL(cfg.DBPATH, logger)
|
||||||
if store == nil {
|
if store == nil {
|
||||||
cancel()
|
cancel()
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ragger, err = rag.New(logger, store, cfg)
|
ragger = rag.New(logger, store, cfg)
|
||||||
if err != nil {
|
|
||||||
logger.Error("failed to create RAG", "error", err)
|
|
||||||
}
|
|
||||||
if ragger != nil && ragger.FallbackMessage() != "" && app != nil {
|
|
||||||
showToast("RAG", "ONNX unavailable, using API: "+ragger.FallbackMessage())
|
|
||||||
}
|
|
||||||
// https://github.com/coreydaley/ggerganov-llama.cpp/blob/master/examples/server/README.md
|
// https://github.com/coreydaley/ggerganov-llama.cpp/blob/master/examples/server/README.md
|
||||||
// load all chats in memory
|
// load all chats in memory
|
||||||
if _, err := loadHistoryChats(); err != nil {
|
if _, err := loadHistoryChats(); err != nil {
|
||||||
@@ -1533,11 +1511,11 @@ func init() {
|
|||||||
}
|
}
|
||||||
lastToolCall = &models.FuncCall{}
|
lastToolCall = &models.FuncCall{}
|
||||||
lastChat := loadOldChatOrGetNew()
|
lastChat := loadOldChatOrGetNew()
|
||||||
chatBody = models.NewSafeChatBody(&models.ChatBody{
|
chatBody = &models.ChatBody{
|
||||||
Model: "modelname",
|
Model: "modelname",
|
||||||
Stream: true,
|
Stream: true,
|
||||||
Messages: lastChat,
|
Messages: lastChat,
|
||||||
})
|
}
|
||||||
choseChunkParser()
|
choseChunkParser()
|
||||||
httpClient = createClient(time.Second * 90)
|
httpClient = createClient(time.Second * 90)
|
||||||
if cfg.TTS_ENABLED {
|
if cfg.TTS_ENABLED {
|
||||||
|
|||||||
@@ -13,9 +13,6 @@ OpenRouterChatAPI = "https://openrouter.ai/api/v1/chat/completions"
|
|||||||
# embeddings
|
# embeddings
|
||||||
EmbedURL = "http://localhost:8082/v1/embeddings"
|
EmbedURL = "http://localhost:8082/v1/embeddings"
|
||||||
HFToken = ""
|
HFToken = ""
|
||||||
EmbedModelPath = "onnx/embedgemma/model_q4.onnx"
|
|
||||||
EmbedTokenizerPath = "onnx/embedgemma/tokenizer.json"
|
|
||||||
EmbedDims = 768
|
|
||||||
#
|
#
|
||||||
ShowSys = true
|
ShowSys = true
|
||||||
LogFile = "log.txt"
|
LogFile = "log.txt"
|
||||||
@@ -27,9 +24,9 @@ ChunkLimit = 100000
|
|||||||
AutoScrollEnabled = true
|
AutoScrollEnabled = true
|
||||||
AutoCleanToolCallsFromCtx = false
|
AutoCleanToolCallsFromCtx = false
|
||||||
# rag settings
|
# rag settings
|
||||||
|
RAGEnabled = false
|
||||||
RAGBatchSize = 1
|
RAGBatchSize = 1
|
||||||
RAGWordLimit = 80
|
RAGWordLimit = 80
|
||||||
RAGOverlapWords = 16
|
|
||||||
RAGDir = "ragimport"
|
RAGDir = "ragimport"
|
||||||
# extra tts
|
# extra tts
|
||||||
TTS_ENABLED = false
|
TTS_ENABLED = false
|
||||||
|
|||||||
@@ -34,16 +34,13 @@ type Config struct {
|
|||||||
ImagePreview bool `toml:"ImagePreview"`
|
ImagePreview bool `toml:"ImagePreview"`
|
||||||
EnableMouse bool `toml:"EnableMouse"`
|
EnableMouse bool `toml:"EnableMouse"`
|
||||||
// embeddings
|
// embeddings
|
||||||
EmbedURL string `toml:"EmbedURL"`
|
EmbedURL string `toml:"EmbedURL"`
|
||||||
HFToken string `toml:"HFToken"`
|
HFToken string `toml:"HFToken"`
|
||||||
EmbedModelPath string `toml:"EmbedModelPath"`
|
|
||||||
EmbedTokenizerPath string `toml:"EmbedTokenizerPath"`
|
|
||||||
EmbedDims int `toml:"EmbedDims"`
|
|
||||||
// rag settings
|
// rag settings
|
||||||
RAGDir string `toml:"RAGDir"`
|
RAGEnabled bool `toml:"RAGEnabled"`
|
||||||
RAGBatchSize int `toml:"RAGBatchSize"`
|
RAGDir string `toml:"RAGDir"`
|
||||||
RAGWordLimit uint32 `toml:"RAGWordLimit"`
|
RAGBatchSize int `toml:"RAGBatchSize"`
|
||||||
RAGOverlapWords uint32 `toml:"RAGOverlapWords"`
|
RAGWordLimit uint32 `toml:"RAGWordLimit"`
|
||||||
// deepseek
|
// deepseek
|
||||||
DeepSeekChatAPI string `toml:"DeepSeekChatAPI"`
|
DeepSeekChatAPI string `toml:"DeepSeekChatAPI"`
|
||||||
DeepSeekCompletionAPI string `toml:"DeepSeekCompletionAPI"`
|
DeepSeekCompletionAPI string `toml:"DeepSeekCompletionAPI"`
|
||||||
|
|||||||
@@ -71,6 +71,9 @@ This document explains how to set up and configure the application using the `co
|
|||||||
#### EmbedURL (`"http://localhost:8082/v1/embeddings"`)
|
#### EmbedURL (`"http://localhost:8082/v1/embeddings"`)
|
||||||
- The endpoint for embedding API, used for RAG (Retrieval Augmented Generation) functionality.
|
- The endpoint for embedding API, used for RAG (Retrieval Augmented Generation) functionality.
|
||||||
|
|
||||||
|
#### RAGEnabled (`false`)
|
||||||
|
- Enable or disable RAG functionality for enhanced context retrieval.
|
||||||
|
|
||||||
#### RAGBatchSize (`1`)
|
#### RAGBatchSize (`1`)
|
||||||
- Number of documents to process in each RAG batch.
|
- Number of documents to process in each RAG batch.
|
||||||
|
|
||||||
|
|||||||
9
go.mod
9
go.mod
@@ -7,6 +7,7 @@ require (
|
|||||||
github.com/GrailFinder/google-translate-tts v0.1.3
|
github.com/GrailFinder/google-translate-tts v0.1.3
|
||||||
github.com/GrailFinder/searchagent v0.2.0
|
github.com/GrailFinder/searchagent v0.2.0
|
||||||
github.com/PuerkitoBio/goquery v1.11.0
|
github.com/PuerkitoBio/goquery v1.11.0
|
||||||
|
github.com/deckarep/golang-set/v2 v2.8.0
|
||||||
github.com/gdamore/tcell/v2 v2.13.2
|
github.com/gdamore/tcell/v2 v2.13.2
|
||||||
github.com/glebarez/go-sqlite v1.22.0
|
github.com/glebarez/go-sqlite v1.22.0
|
||||||
github.com/gopxl/beep/v2 v2.1.1
|
github.com/gopxl/beep/v2 v2.1.1
|
||||||
@@ -16,18 +17,14 @@ require (
|
|||||||
github.com/neurosnap/sentences v1.1.2
|
github.com/neurosnap/sentences v1.1.2
|
||||||
github.com/playwright-community/playwright-go v0.5700.1
|
github.com/playwright-community/playwright-go v0.5700.1
|
||||||
github.com/rivo/tview v0.42.0
|
github.com/rivo/tview v0.42.0
|
||||||
github.com/sugarme/tokenizer v0.3.0
|
|
||||||
github.com/yalue/onnxruntime_go v1.27.0
|
|
||||||
github.com/yuin/goldmark v1.4.13
|
github.com/yuin/goldmark v1.4.13
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/andybalholm/cascadia v1.3.3 // indirect
|
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||||
github.com/deckarep/golang-set/v2 v2.8.0 // indirect
|
|
||||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||||
github.com/ebitengine/oto/v3 v3.4.0 // indirect
|
github.com/ebitengine/oto/v3 v3.4.0 // indirect
|
||||||
github.com/ebitengine/purego v0.9.1 // indirect
|
github.com/ebitengine/purego v0.9.1 // indirect
|
||||||
github.com/emirpasic/gods v1.18.1 // indirect
|
|
||||||
github.com/gdamore/encoding v1.0.1 // indirect
|
github.com/gdamore/encoding v1.0.1 // indirect
|
||||||
github.com/go-jose/go-jose/v3 v3.0.4 // indirect
|
github.com/go-jose/go-jose/v3 v3.0.4 // indirect
|
||||||
github.com/go-stack/stack v1.8.1 // indirect
|
github.com/go-stack/stack v1.8.1 // indirect
|
||||||
@@ -36,14 +33,10 @@ require (
|
|||||||
github.com/hajimehoshi/oto/v2 v2.3.1 // indirect
|
github.com/hajimehoshi/oto/v2 v2.3.1 // indirect
|
||||||
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
|
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
|
||||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||||
github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
|
|
||||||
github.com/pkg/errors v0.9.1 // indirect
|
github.com/pkg/errors v0.9.1 // indirect
|
||||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||||
github.com/rivo/uniseg v0.4.7 // indirect
|
github.com/rivo/uniseg v0.4.7 // indirect
|
||||||
github.com/schollz/progressbar/v2 v2.15.0 // indirect
|
|
||||||
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c // indirect
|
|
||||||
golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect
|
golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect
|
||||||
golang.org/x/net v0.48.0 // indirect
|
golang.org/x/net v0.48.0 // indirect
|
||||||
golang.org/x/sys v0.39.0 // indirect
|
golang.org/x/sys v0.39.0 // indirect
|
||||||
|
|||||||
15
go.sum
15
go.sum
@@ -21,8 +21,6 @@ github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/
|
|||||||
github.com/ebitengine/oto/v3 v3.4.0/go.mod h1:IOleLVD0m+CMak3mRVwsYY8vTctQgOM0iiL6S7Ar7eI=
|
github.com/ebitengine/oto/v3 v3.4.0/go.mod h1:IOleLVD0m+CMak3mRVwsYY8vTctQgOM0iiL6S7Ar7eI=
|
||||||
github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
|
github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
|
||||||
github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
||||||
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
|
|
||||||
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
|
|
||||||
github.com/gdamore/encoding v1.0.1 h1:YzKZckdBL6jVt2Gc+5p82qhrGiqMdG/eNs6Wy0u3Uhw=
|
github.com/gdamore/encoding v1.0.1 h1:YzKZckdBL6jVt2Gc+5p82qhrGiqMdG/eNs6Wy0u3Uhw=
|
||||||
github.com/gdamore/encoding v1.0.1/go.mod h1:0Z0cMFinngz9kS1QfMjCP8TY7em3bZYeeklsSDPivEo=
|
github.com/gdamore/encoding v1.0.1/go.mod h1:0Z0cMFinngz9kS1QfMjCP8TY7em3bZYeeklsSDPivEo=
|
||||||
github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx2g=
|
github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx2g=
|
||||||
@@ -63,14 +61,10 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
|
|||||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
|
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
|
||||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
|
||||||
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||||
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||||
github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7ZoUw=
|
github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7ZoUw=
|
||||||
github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
|
github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
|
||||||
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
|
|
||||||
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
|
|
||||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U=
|
github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U=
|
||||||
@@ -83,19 +77,10 @@ github.com/rivo/tview v0.42.0 h1:b/ftp+RxtDsHSaynXTbJb+/n/BxDEi+W3UfF5jILK6c=
|
|||||||
github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY=
|
github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY=
|
||||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||||
github.com/schollz/progressbar/v2 v2.15.0 h1:dVzHQ8fHRmtPjD3K10jT3Qgn/+H+92jhPrhmxIJfDz8=
|
|
||||||
github.com/schollz/progressbar/v2 v2.15.0/go.mod h1:UdPq3prGkfQ7MOzZKlDRpYKcFqEMczbD7YmbPgpzKMI=
|
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
|
||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c h1:pwb4kNSHb4K89ymCaN+5lPH/MwnfSVg4rzGDh4d+iy4=
|
|
||||||
github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c/go.mod h1:2gwkXLWbDGUQWeL3RtpCmcY4mzCtU13kb9UsAg9xMaw=
|
|
||||||
github.com/sugarme/tokenizer v0.3.0 h1:FE8DYbNSz/kSbgEo9l/RjgYHkIJYEdskumitFQBE9FE=
|
|
||||||
github.com/sugarme/tokenizer v0.3.0/go.mod h1:VJ+DLK5ZEZwzvODOWwY0cw+B1dabTd3nCB5HuFCItCc=
|
|
||||||
github.com/yalue/onnxruntime_go v1.27.0 h1:c1YSgDNtpf0WGtxj3YeRIb8VC5LmM1J+Ve3uHdteC1U=
|
|
||||||
github.com/yalue/onnxruntime_go v1.27.0/go.mod h1:b4X26A8pekNb1ACJ58wAXgNKeUCGEAQ9dmACut9Sm/4=
|
|
||||||
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
|
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
|
||||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
|||||||
114
helpfuncs.go
114
helpfuncs.go
@@ -16,17 +16,11 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
|
||||||
"sync/atomic"
|
|
||||||
|
|
||||||
"github.com/rivo/tview"
|
"github.com/rivo/tview"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Cached model color - updated by background goroutine
|
// Cached model color - updated by background goroutine
|
||||||
var cachedModelColor atomic.Value // stores string
|
var cachedModelColor string = "orange"
|
||||||
|
|
||||||
func init() {
|
|
||||||
cachedModelColor.Store("orange")
|
|
||||||
}
|
|
||||||
|
|
||||||
// startModelColorUpdater starts a background goroutine that periodically updates
|
// startModelColorUpdater starts a background goroutine that periodically updates
|
||||||
// the cached model color. Only runs HTTP requests for local llama.cpp APIs.
|
// the cached model color. Only runs HTTP requests for local llama.cpp APIs.
|
||||||
@@ -45,20 +39,20 @@ func startModelColorUpdater() {
|
|||||||
// updateCachedModelColor updates the global cachedModelColor variable
|
// updateCachedModelColor updates the global cachedModelColor variable
|
||||||
func updateCachedModelColor() {
|
func updateCachedModelColor() {
|
||||||
if !isLocalLlamacpp() {
|
if !isLocalLlamacpp() {
|
||||||
cachedModelColor.Store("orange")
|
cachedModelColor = "orange"
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Check if model is loaded
|
// Check if model is loaded
|
||||||
loaded, err := isModelLoaded(chatBody.GetModel())
|
loaded, err := isModelLoaded(chatBody.Model)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// On error, assume not loaded (red)
|
// On error, assume not loaded (red)
|
||||||
cachedModelColor.Store("red")
|
cachedModelColor = "red"
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if loaded {
|
if loaded {
|
||||||
cachedModelColor.Store("green")
|
cachedModelColor = "green"
|
||||||
} else {
|
} else {
|
||||||
cachedModelColor.Store("red")
|
cachedModelColor = "red"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,7 +103,7 @@ func refreshChatDisplay() {
|
|||||||
viewingAs = cfg.WriteNextMsgAs
|
viewingAs = cfg.WriteNextMsgAs
|
||||||
}
|
}
|
||||||
// Filter messages for this character
|
// Filter messages for this character
|
||||||
filteredMessages := filterMessagesForCharacter(chatBody.GetMessages(), viewingAs)
|
filteredMessages := filterMessagesForCharacter(chatBody.Messages, viewingAs)
|
||||||
displayText := chatToText(filteredMessages, cfg.ShowSys)
|
displayText := chatToText(filteredMessages, cfg.ShowSys)
|
||||||
textView.SetText(displayText)
|
textView.SetText(displayText)
|
||||||
colorText()
|
colorText()
|
||||||
@@ -223,8 +217,8 @@ func startNewChat(keepSysP bool) {
|
|||||||
logger.Warn("no such sys msg", "name", cfg.AssistantRole)
|
logger.Warn("no such sys msg", "name", cfg.AssistantRole)
|
||||||
}
|
}
|
||||||
// set chat body
|
// set chat body
|
||||||
chatBody.TruncateMessages(2)
|
chatBody.Messages = chatBody.Messages[:2]
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
newChat := &models.Chat{
|
newChat := &models.Chat{
|
||||||
ID: id + 1,
|
ID: id + 1,
|
||||||
Name: fmt.Sprintf("%d_%s", id+1, cfg.AssistantRole),
|
Name: fmt.Sprintf("%d_%s", id+1, cfg.AssistantRole),
|
||||||
@@ -341,7 +335,7 @@ func isLocalLlamacpp() bool {
|
|||||||
// The cached value is updated by a background goroutine every 5 seconds.
|
// The cached value is updated by a background goroutine every 5 seconds.
|
||||||
// For non-local models, returns orange. For local llama.cpp models, returns green if loaded, red if not.
|
// For non-local models, returns orange. For local llama.cpp models, returns green if loaded, red if not.
|
||||||
func getModelColor() string {
|
func getModelColor() string {
|
||||||
return cachedModelColor.Load().(string)
|
return cachedModelColor
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeStatusLine() string {
|
func makeStatusLine() string {
|
||||||
@@ -376,7 +370,7 @@ func makeStatusLine() string {
|
|||||||
// Get model color based on load status for local llama.cpp models
|
// Get model color based on load status for local llama.cpp models
|
||||||
modelColor := getModelColor()
|
modelColor := getModelColor()
|
||||||
statusLine := fmt.Sprintf(statusLineTempl, activeChatName,
|
statusLine := fmt.Sprintf(statusLineTempl, activeChatName,
|
||||||
boolColors[cfg.ToolUse], modelColor, chatBody.GetModel(), boolColors[cfg.SkipLLMResp],
|
boolColors[cfg.ToolUse], modelColor, chatBody.Model, boolColors[cfg.SkipLLMResp],
|
||||||
cfg.CurrentAPI, persona, botPersona)
|
cfg.CurrentAPI, persona, botPersona)
|
||||||
if cfg.STT_ENABLED {
|
if cfg.STT_ENABLED {
|
||||||
recordingS := fmt.Sprintf(" | [%s:-:b]voice recording[-:-:-] (ctrl+r)",
|
recordingS := fmt.Sprintf(" | [%s:-:b]voice recording[-:-:-] (ctrl+r)",
|
||||||
@@ -402,11 +396,11 @@ func makeStatusLine() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getContextTokens() int {
|
func getContextTokens() int {
|
||||||
if chatBody == nil {
|
if chatBody == nil || chatBody.Messages == nil {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
total := 0
|
total := 0
|
||||||
messages := chatBody.GetMessages()
|
messages := chatBody.Messages
|
||||||
for i := range messages {
|
for i := range messages {
|
||||||
msg := &messages[i]
|
msg := &messages[i]
|
||||||
if msg.Stats != nil && msg.Stats.Tokens > 0 {
|
if msg.Stats != nil && msg.Stats.Tokens > 0 {
|
||||||
@@ -421,54 +415,46 @@ func getContextTokens() int {
|
|||||||
const deepseekContext = 128000
|
const deepseekContext = 128000
|
||||||
|
|
||||||
func getMaxContextTokens() int {
|
func getMaxContextTokens() int {
|
||||||
if chatBody == nil || chatBody.GetModel() == "" {
|
if chatBody == nil || chatBody.Model == "" {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
modelName := chatBody.GetModel()
|
modelName := chatBody.Model
|
||||||
switch {
|
switch {
|
||||||
case strings.Contains(cfg.CurrentAPI, "openrouter"):
|
case strings.Contains(cfg.CurrentAPI, "openrouter"):
|
||||||
ord := orModelsData.Load()
|
if orModelsData != nil {
|
||||||
if ord != nil {
|
for i := range orModelsData.Data {
|
||||||
data := ord.(*models.ORModels)
|
m := &orModelsData.Data[i]
|
||||||
if data != nil {
|
if m.ID == modelName {
|
||||||
for i := range data.Data {
|
return m.ContextLength
|
||||||
m := &data.Data[i]
|
|
||||||
if m.ID == modelName {
|
|
||||||
return m.ContextLength
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case strings.Contains(cfg.CurrentAPI, "deepseek"):
|
case strings.Contains(cfg.CurrentAPI, "deepseek"):
|
||||||
return deepseekContext
|
return deepseekContext
|
||||||
default:
|
default:
|
||||||
lmd := localModelsData.Load()
|
if localModelsData != nil {
|
||||||
if lmd != nil {
|
for i := range localModelsData.Data {
|
||||||
data := lmd.(*models.LCPModels)
|
m := &localModelsData.Data[i]
|
||||||
if data != nil {
|
if m.ID == modelName {
|
||||||
for i := range data.Data {
|
for _, arg := range m.Status.Args {
|
||||||
m := &data.Data[i]
|
if strings.HasPrefix(arg, "--ctx-size") {
|
||||||
if m.ID == modelName {
|
if strings.Contains(arg, "=") {
|
||||||
for _, arg := range m.Status.Args {
|
val := strings.Split(arg, "=")[1]
|
||||||
if strings.HasPrefix(arg, "--ctx-size") {
|
if n, err := strconv.Atoi(val); err == nil {
|
||||||
if strings.Contains(arg, "=") {
|
return n
|
||||||
val := strings.Split(arg, "=")[1]
|
}
|
||||||
if n, err := strconv.Atoi(val); err == nil {
|
} else {
|
||||||
|
idx := -1
|
||||||
|
for j, a := range m.Status.Args {
|
||||||
|
if a == "--ctx-size" && j+1 < len(m.Status.Args) {
|
||||||
|
idx = j + 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if idx != -1 {
|
||||||
|
if n, err := strconv.Atoi(m.Status.Args[idx]); err == nil {
|
||||||
return n
|
return n
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
idx := -1
|
|
||||||
for j, a := range m.Status.Args {
|
|
||||||
if a == "--ctx-size" && j+1 < len(m.Status.Args) {
|
|
||||||
idx = j + 1
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if idx != -1 {
|
|
||||||
if n, err := strconv.Atoi(m.Status.Args[idx]); err == nil {
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -504,7 +490,7 @@ func listChatRoles() []string {
|
|||||||
|
|
||||||
func deepseekModelValidator() error {
|
func deepseekModelValidator() error {
|
||||||
if cfg.CurrentAPI == cfg.DeepSeekChatAPI || cfg.CurrentAPI == cfg.DeepSeekCompletionAPI {
|
if cfg.CurrentAPI == cfg.DeepSeekChatAPI || cfg.CurrentAPI == cfg.DeepSeekCompletionAPI {
|
||||||
if chatBody.GetModel() != "deepseek-chat" && chatBody.GetModel() != "deepseek-reasoner" {
|
if chatBody.Model != "deepseek-chat" && chatBody.Model != "deepseek-reasoner" {
|
||||||
showToast("bad request", "wrong deepseek model name")
|
showToast("bad request", "wrong deepseek model name")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -535,7 +521,7 @@ func updateFlexLayout() {
|
|||||||
if shellMode {
|
if shellMode {
|
||||||
flex.AddItem(shellInput, 0, 10, false)
|
flex.AddItem(shellInput, 0, 10, false)
|
||||||
} else {
|
} else {
|
||||||
flex.AddItem(bottomFlex, 0, 10, true)
|
flex.AddItem(textArea, 0, 10, false)
|
||||||
}
|
}
|
||||||
if positionVisible {
|
if positionVisible {
|
||||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
flex.AddItem(statusLineWidget, 0, 2, false)
|
||||||
@@ -581,13 +567,13 @@ func executeCommandAndDisplay(cmdText string) {
|
|||||||
outputContent := workingDir
|
outputContent := workingDir
|
||||||
// Add the command being executed to the chat
|
// Add the command being executed to the chat
|
||||||
fmt.Fprintf(textView, "\n[-:-:b](%d) <%s>: [-:-:-]\n$ %s\n",
|
fmt.Fprintf(textView, "\n[-:-:b](%d) <%s>: [-:-:-]\n$ %s\n",
|
||||||
chatBody.GetMessageCount(), cfg.ToolRole, cmdText)
|
len(chatBody.Messages), cfg.ToolRole, cmdText)
|
||||||
fmt.Fprintf(textView, "%s\n", outputContent)
|
fmt.Fprintf(textView, "%s\n", outputContent)
|
||||||
combinedMsg := models.RoleMsg{
|
combinedMsg := models.RoleMsg{
|
||||||
Role: cfg.ToolRole,
|
Role: cfg.ToolRole,
|
||||||
Content: "$ " + cmdText + "\n\n" + outputContent,
|
Content: "$ " + cmdText + "\n\n" + outputContent,
|
||||||
}
|
}
|
||||||
chatBody.AppendMessage(combinedMsg)
|
chatBody.Messages = append(chatBody.Messages, combinedMsg)
|
||||||
if scrollToEndEnabled {
|
if scrollToEndEnabled {
|
||||||
textView.ScrollToEnd()
|
textView.ScrollToEnd()
|
||||||
}
|
}
|
||||||
@@ -596,13 +582,13 @@ func executeCommandAndDisplay(cmdText string) {
|
|||||||
} else {
|
} else {
|
||||||
outputContent := "cd: " + newDir + ": No such file or directory"
|
outputContent := "cd: " + newDir + ": No such file or directory"
|
||||||
fmt.Fprintf(textView, "\n[-:-:b](%d) <%s>: [-:-:-]\n$ %s\n",
|
fmt.Fprintf(textView, "\n[-:-:b](%d) <%s>: [-:-:-]\n$ %s\n",
|
||||||
chatBody.GetMessageCount(), cfg.ToolRole, cmdText)
|
len(chatBody.Messages), cfg.ToolRole, cmdText)
|
||||||
fmt.Fprintf(textView, "[red]%s[-:-:-]\n", outputContent)
|
fmt.Fprintf(textView, "[red]%s[-:-:-]\n", outputContent)
|
||||||
combinedMsg := models.RoleMsg{
|
combinedMsg := models.RoleMsg{
|
||||||
Role: cfg.ToolRole,
|
Role: cfg.ToolRole,
|
||||||
Content: "$ " + cmdText + "\n\n" + outputContent,
|
Content: "$ " + cmdText + "\n\n" + outputContent,
|
||||||
}
|
}
|
||||||
chatBody.AppendMessage(combinedMsg)
|
chatBody.Messages = append(chatBody.Messages, combinedMsg)
|
||||||
if scrollToEndEnabled {
|
if scrollToEndEnabled {
|
||||||
textView.ScrollToEnd()
|
textView.ScrollToEnd()
|
||||||
}
|
}
|
||||||
@@ -618,7 +604,7 @@ func executeCommandAndDisplay(cmdText string) {
|
|||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
// Add the command being executed to the chat
|
// Add the command being executed to the chat
|
||||||
fmt.Fprintf(textView, "\n[-:-:b](%d) <%s>: [-:-:-]\n$ %s\n",
|
fmt.Fprintf(textView, "\n[-:-:b](%d) <%s>: [-:-:-]\n$ %s\n",
|
||||||
chatBody.GetMessageCount(), cfg.ToolRole, cmdText)
|
len(chatBody.Messages), cfg.ToolRole, cmdText)
|
||||||
var outputContent string
|
var outputContent string
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Include both output and error
|
// Include both output and error
|
||||||
@@ -649,7 +635,7 @@ func executeCommandAndDisplay(cmdText string) {
|
|||||||
Role: cfg.ToolRole,
|
Role: cfg.ToolRole,
|
||||||
Content: combinedContent,
|
Content: combinedContent,
|
||||||
}
|
}
|
||||||
chatBody.AppendMessage(combinedMsg)
|
chatBody.Messages = append(chatBody.Messages, combinedMsg)
|
||||||
// Scroll to end and update colors
|
// Scroll to end and update colors
|
||||||
if scrollToEndEnabled {
|
if scrollToEndEnabled {
|
||||||
textView.ScrollToEnd()
|
textView.ScrollToEnd()
|
||||||
@@ -679,7 +665,7 @@ func performSearch(term string) {
|
|||||||
searchResultLengths = nil
|
searchResultLengths = nil
|
||||||
originalTextForSearch = ""
|
originalTextForSearch = ""
|
||||||
// Re-render text without highlights
|
// Re-render text without highlights
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
colorText()
|
colorText()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
55
llm.go
55
llm.go
@@ -13,9 +13,8 @@ var lastImg string // for ctrl+j
|
|||||||
|
|
||||||
// containsToolSysMsg checks if the toolSysMsg already exists in the chat body
|
// containsToolSysMsg checks if the toolSysMsg already exists in the chat body
|
||||||
func containsToolSysMsg() bool {
|
func containsToolSysMsg() bool {
|
||||||
messages := chatBody.GetMessages()
|
for i := range chatBody.Messages {
|
||||||
for i := range messages {
|
if chatBody.Messages[i].Role == cfg.ToolRole && chatBody.Messages[i].Content == toolSysMsg {
|
||||||
if messages[i].Role == cfg.ToolRole && messages[i].Content == toolSysMsg {
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -136,13 +135,13 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
|||||||
newMsg = models.RoleMsg{Role: role, Content: msg}
|
newMsg = models.RoleMsg{Role: role, Content: msg}
|
||||||
}
|
}
|
||||||
newMsg = *processMessageTag(&newMsg)
|
newMsg = *processMessageTag(&newMsg)
|
||||||
chatBody.AppendMessage(newMsg)
|
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||||
}
|
}
|
||||||
// sending description of the tools and how to use them
|
// sending description of the tools and how to use them
|
||||||
if cfg.ToolUse && !resume && role == cfg.UserRole && !containsToolSysMsg() {
|
if cfg.ToolUse && !resume && role == cfg.UserRole && !containsToolSysMsg() {
|
||||||
chatBody.AppendMessage(models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
|
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
|
||||||
}
|
}
|
||||||
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.GetMessages())
|
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages)
|
||||||
// Build prompt and extract images inline as we process each message
|
// Build prompt and extract images inline as we process each message
|
||||||
messages := make([]string, len(filteredMessages))
|
messages := make([]string, len(filteredMessages))
|
||||||
for i := range filteredMessages {
|
for i := range filteredMessages {
|
||||||
@@ -184,7 +183,7 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
|||||||
}
|
}
|
||||||
logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
|
logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
|
||||||
"msg", msg, "resume", resume, "prompt", prompt, "multimodal_data_count", len(multimodalData))
|
"msg", msg, "resume", resume, "prompt", prompt, "multimodal_data_count", len(multimodalData))
|
||||||
payload := models.NewLCPReq(prompt, chatBody.GetModel(), multimodalData,
|
payload := models.NewLCPReq(prompt, chatBody.Model, multimodalData,
|
||||||
defaultLCPProps, chatBody.MakeStopSliceExcluding("", listChatRoles()))
|
defaultLCPProps, chatBody.MakeStopSliceExcluding("", listChatRoles()))
|
||||||
data, err := json.Marshal(payload)
|
data, err := json.Marshal(payload)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -290,17 +289,17 @@ func (op LCPChat) FormMsg(msg, role string, resume bool) (io.Reader, error) {
|
|||||||
newMsg = models.NewRoleMsg(role, msg)
|
newMsg = models.NewRoleMsg(role, msg)
|
||||||
}
|
}
|
||||||
newMsg = *processMessageTag(&newMsg)
|
newMsg = *processMessageTag(&newMsg)
|
||||||
chatBody.AppendMessage(newMsg)
|
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||||
logger.Debug("LCPChat FormMsg: added message to chatBody", "role", newMsg.Role,
|
logger.Debug("LCPChat FormMsg: added message to chatBody", "role", newMsg.Role,
|
||||||
"content_len", len(newMsg.Content), "message_count_after_add", chatBody.GetMessageCount())
|
"content_len", len(newMsg.Content), "message_count_after_add", len(chatBody.Messages))
|
||||||
}
|
}
|
||||||
filteredMessages, _ := filterMessagesForCurrentCharacter(chatBody.GetMessages())
|
filteredMessages, _ := filterMessagesForCurrentCharacter(chatBody.Messages)
|
||||||
// openai /v1/chat does not support custom roles; needs to be user, assistant, system
|
// openai /v1/chat does not support custom roles; needs to be user, assistant, system
|
||||||
// Add persona suffix to the last user message to indicate who the assistant should reply as
|
// Add persona suffix to the last user message to indicate who the assistant should reply as
|
||||||
bodyCopy := &models.ChatBody{
|
bodyCopy := &models.ChatBody{
|
||||||
Messages: make([]models.RoleMsg, len(filteredMessages)),
|
Messages: make([]models.RoleMsg, len(filteredMessages)),
|
||||||
Model: chatBody.GetModel(),
|
Model: chatBody.Model,
|
||||||
Stream: chatBody.GetStream(),
|
Stream: chatBody.Stream,
|
||||||
}
|
}
|
||||||
for i := range filteredMessages {
|
for i := range filteredMessages {
|
||||||
strippedMsg := *stripThinkingFromMsg(&filteredMessages[i])
|
strippedMsg := *stripThinkingFromMsg(&filteredMessages[i])
|
||||||
@@ -376,13 +375,13 @@ func (ds DeepSeekerCompletion) FormMsg(msg, role string, resume bool) (io.Reader
|
|||||||
if msg != "" { // otherwise let the bot to continue
|
if msg != "" { // otherwise let the bot to continue
|
||||||
newMsg := models.RoleMsg{Role: role, Content: msg}
|
newMsg := models.RoleMsg{Role: role, Content: msg}
|
||||||
newMsg = *processMessageTag(&newMsg)
|
newMsg = *processMessageTag(&newMsg)
|
||||||
chatBody.AppendMessage(newMsg)
|
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||||
}
|
}
|
||||||
// sending description of the tools and how to use them
|
// sending description of the tools and how to use them
|
||||||
if cfg.ToolUse && !resume && role == cfg.UserRole && !containsToolSysMsg() {
|
if cfg.ToolUse && !resume && role == cfg.UserRole && !containsToolSysMsg() {
|
||||||
chatBody.AppendMessage(models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
|
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
|
||||||
}
|
}
|
||||||
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.GetMessages())
|
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages)
|
||||||
messages := make([]string, len(filteredMessages))
|
messages := make([]string, len(filteredMessages))
|
||||||
for i := range filteredMessages {
|
for i := range filteredMessages {
|
||||||
messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt()
|
messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt()
|
||||||
@@ -395,7 +394,7 @@ func (ds DeepSeekerCompletion) FormMsg(msg, role string, resume bool) (io.Reader
|
|||||||
}
|
}
|
||||||
logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
|
logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
|
||||||
"msg", msg, "resume", resume, "prompt", prompt)
|
"msg", msg, "resume", resume, "prompt", prompt)
|
||||||
payload := models.NewDSCompletionReq(prompt, chatBody.GetModel(),
|
payload := models.NewDSCompletionReq(prompt, chatBody.Model,
|
||||||
defaultLCPProps["temp"],
|
defaultLCPProps["temp"],
|
||||||
chatBody.MakeStopSliceExcluding("", listChatRoles()))
|
chatBody.MakeStopSliceExcluding("", listChatRoles()))
|
||||||
data, err := json.Marshal(payload)
|
data, err := json.Marshal(payload)
|
||||||
@@ -449,15 +448,15 @@ func (ds DeepSeekerChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
|||||||
if msg != "" { // otherwise let the bot continue
|
if msg != "" { // otherwise let the bot continue
|
||||||
newMsg := models.RoleMsg{Role: role, Content: msg}
|
newMsg := models.RoleMsg{Role: role, Content: msg}
|
||||||
newMsg = *processMessageTag(&newMsg)
|
newMsg = *processMessageTag(&newMsg)
|
||||||
chatBody.AppendMessage(newMsg)
|
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||||
}
|
}
|
||||||
// Create copy of chat body with standardized user role
|
// Create copy of chat body with standardized user role
|
||||||
filteredMessages, _ := filterMessagesForCurrentCharacter(chatBody.GetMessages())
|
filteredMessages, _ := filterMessagesForCurrentCharacter(chatBody.Messages)
|
||||||
// Add persona suffix to the last user message to indicate who the assistant should reply as
|
// Add persona suffix to the last user message to indicate who the assistant should reply as
|
||||||
bodyCopy := &models.ChatBody{
|
bodyCopy := &models.ChatBody{
|
||||||
Messages: make([]models.RoleMsg, len(filteredMessages)),
|
Messages: make([]models.RoleMsg, len(filteredMessages)),
|
||||||
Model: chatBody.GetModel(),
|
Model: chatBody.Model,
|
||||||
Stream: chatBody.GetStream(),
|
Stream: chatBody.Stream,
|
||||||
}
|
}
|
||||||
for i := range filteredMessages {
|
for i := range filteredMessages {
|
||||||
strippedMsg := *stripThinkingFromMsg(&filteredMessages[i])
|
strippedMsg := *stripThinkingFromMsg(&filteredMessages[i])
|
||||||
@@ -528,13 +527,13 @@ func (or OpenRouterCompletion) FormMsg(msg, role string, resume bool) (io.Reader
|
|||||||
if msg != "" { // otherwise let the bot to continue
|
if msg != "" { // otherwise let the bot to continue
|
||||||
newMsg := models.RoleMsg{Role: role, Content: msg}
|
newMsg := models.RoleMsg{Role: role, Content: msg}
|
||||||
newMsg = *processMessageTag(&newMsg)
|
newMsg = *processMessageTag(&newMsg)
|
||||||
chatBody.AppendMessage(newMsg)
|
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||||
}
|
}
|
||||||
// sending description of the tools and how to use them
|
// sending description of the tools and how to use them
|
||||||
if cfg.ToolUse && !resume && role == cfg.UserRole && !containsToolSysMsg() {
|
if cfg.ToolUse && !resume && role == cfg.UserRole && !containsToolSysMsg() {
|
||||||
chatBody.AppendMessage(models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
|
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
|
||||||
}
|
}
|
||||||
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.GetMessages())
|
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages)
|
||||||
messages := make([]string, len(filteredMessages))
|
messages := make([]string, len(filteredMessages))
|
||||||
for i := range filteredMessages {
|
for i := range filteredMessages {
|
||||||
messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt()
|
messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt()
|
||||||
@@ -548,7 +547,7 @@ func (or OpenRouterCompletion) FormMsg(msg, role string, resume bool) (io.Reader
|
|||||||
stopSlice := chatBody.MakeStopSliceExcluding("", listChatRoles())
|
stopSlice := chatBody.MakeStopSliceExcluding("", listChatRoles())
|
||||||
logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
|
logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
|
||||||
"msg", msg, "resume", resume, "prompt", prompt, "stop_strings", stopSlice)
|
"msg", msg, "resume", resume, "prompt", prompt, "stop_strings", stopSlice)
|
||||||
payload := models.NewOpenRouterCompletionReq(chatBody.GetModel(), prompt,
|
payload := models.NewOpenRouterCompletionReq(chatBody.Model, prompt,
|
||||||
defaultLCPProps, stopSlice)
|
defaultLCPProps, stopSlice)
|
||||||
data, err := json.Marshal(payload)
|
data, err := json.Marshal(payload)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -634,15 +633,15 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
|||||||
newMsg = models.NewRoleMsg(role, msg)
|
newMsg = models.NewRoleMsg(role, msg)
|
||||||
}
|
}
|
||||||
newMsg = *processMessageTag(&newMsg)
|
newMsg = *processMessageTag(&newMsg)
|
||||||
chatBody.AppendMessage(newMsg)
|
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||||
}
|
}
|
||||||
// Create copy of chat body with standardized user role
|
// Create copy of chat body with standardized user role
|
||||||
filteredMessages, _ := filterMessagesForCurrentCharacter(chatBody.GetMessages())
|
filteredMessages, _ := filterMessagesForCurrentCharacter(chatBody.Messages)
|
||||||
// Add persona suffix to the last user message to indicate who the assistant should reply as
|
// Add persona suffix to the last user message to indicate who the assistant should reply as
|
||||||
bodyCopy := &models.ChatBody{
|
bodyCopy := &models.ChatBody{
|
||||||
Messages: make([]models.RoleMsg, len(filteredMessages)),
|
Messages: make([]models.RoleMsg, len(filteredMessages)),
|
||||||
Model: chatBody.GetModel(),
|
Model: chatBody.Model,
|
||||||
Stream: chatBody.GetStream(),
|
Stream: chatBody.Stream,
|
||||||
}
|
}
|
||||||
for i := range filteredMessages {
|
for i := range filteredMessages {
|
||||||
strippedMsg := *stripThinkingFromMsg(&filteredMessages[i])
|
strippedMsg := *stripThinkingFromMsg(&filteredMessages[i])
|
||||||
|
|||||||
251
models/models.go
251
models/models.go
@@ -6,7 +6,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type FuncCall struct {
|
type FuncCall struct {
|
||||||
@@ -640,253 +639,3 @@ type MultimodalToolResp struct {
|
|||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
Parts []map[string]string `json:"parts"`
|
Parts []map[string]string `json:"parts"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SafeChatBody is a thread-safe wrapper around ChatBody using RWMutex.
|
|
||||||
// This allows safe concurrent access to chat state from multiple goroutines.
|
|
||||||
type SafeChatBody struct {
|
|
||||||
mu sync.RWMutex
|
|
||||||
ChatBody
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewSafeChatBody creates a new SafeChatBody from an existing ChatBody.
|
|
||||||
// If cb is nil, creates an empty ChatBody.
|
|
||||||
func NewSafeChatBody(cb *ChatBody) *SafeChatBody {
|
|
||||||
if cb == nil {
|
|
||||||
return &SafeChatBody{
|
|
||||||
ChatBody: ChatBody{
|
|
||||||
Messages: []RoleMsg{},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return &SafeChatBody{
|
|
||||||
ChatBody: *cb,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetModel returns the model name (thread-safe read).
|
|
||||||
func (s *SafeChatBody) GetModel() string {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
return s.Model
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetModel sets the model name (thread-safe write).
|
|
||||||
func (s *SafeChatBody) SetModel(model string) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
s.Model = model
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetStream returns the stream flag (thread-safe read).
|
|
||||||
func (s *SafeChatBody) GetStream() bool {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
return s.Stream
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetStream sets the stream flag (thread-safe write).
|
|
||||||
func (s *SafeChatBody) SetStream(stream bool) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
s.Stream = stream
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetMessages returns a copy of all messages (thread-safe read).
|
|
||||||
// Returns a copy to prevent race conditions after the lock is released.
|
|
||||||
func (s *SafeChatBody) GetMessages() []RoleMsg {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
// Return a copy to prevent external modification
|
|
||||||
messagesCopy := make([]RoleMsg, len(s.Messages))
|
|
||||||
copy(messagesCopy, s.Messages)
|
|
||||||
return messagesCopy
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetMessages replaces all messages (thread-safe write).
|
|
||||||
func (s *SafeChatBody) SetMessages(messages []RoleMsg) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
s.Messages = messages
|
|
||||||
}
|
|
||||||
|
|
||||||
// AppendMessage adds a message to the end (thread-safe write).
|
|
||||||
func (s *SafeChatBody) AppendMessage(msg RoleMsg) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
s.Messages = append(s.Messages, msg)
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetMessageAt returns a message at a specific index (thread-safe read).
|
|
||||||
// Returns the message and a boolean indicating if the index was valid.
|
|
||||||
func (s *SafeChatBody) GetMessageAt(index int) (RoleMsg, bool) {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
if index < 0 || index >= len(s.Messages) {
|
|
||||||
return RoleMsg{}, false
|
|
||||||
}
|
|
||||||
return s.Messages[index], true
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetMessageAt updates a message at a specific index (thread-safe write).
|
|
||||||
// Returns false if index is out of bounds.
|
|
||||||
func (s *SafeChatBody) SetMessageAt(index int, msg RoleMsg) bool {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
if index < 0 || index >= len(s.Messages) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
s.Messages[index] = msg
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetLastMessage returns the last message (thread-safe read).
|
|
||||||
// Returns the message and a boolean indicating if the chat has messages.
|
|
||||||
func (s *SafeChatBody) GetLastMessage() (RoleMsg, bool) {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
if len(s.Messages) == 0 {
|
|
||||||
return RoleMsg{}, false
|
|
||||||
}
|
|
||||||
return s.Messages[len(s.Messages)-1], true
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetMessageCount returns the number of messages (thread-safe read).
|
|
||||||
func (s *SafeChatBody) GetMessageCount() int {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
return len(s.Messages)
|
|
||||||
}
|
|
||||||
|
|
||||||
// RemoveLastMessage removes the last message (thread-safe write).
|
|
||||||
// Returns false if there are no messages.
|
|
||||||
func (s *SafeChatBody) RemoveLastMessage() bool {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
if len(s.Messages) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
s.Messages = s.Messages[:len(s.Messages)-1]
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// TruncateMessages keeps only the first n messages (thread-safe write).
|
|
||||||
func (s *SafeChatBody) TruncateMessages(n int) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
if n < len(s.Messages) {
|
|
||||||
s.Messages = s.Messages[:n]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ClearMessages removes all messages (thread-safe write).
|
|
||||||
func (s *SafeChatBody) ClearMessages() {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
s.Messages = []RoleMsg{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rename renames all occurrences of oldname to newname in messages (thread-safe read-modify-write).
|
|
||||||
func (s *SafeChatBody) Rename(oldname, newname string) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
for i := range s.Messages {
|
|
||||||
s.Messages[i].Content = strings.ReplaceAll(s.Messages[i].Content, oldname, newname)
|
|
||||||
s.Messages[i].Role = strings.ReplaceAll(s.Messages[i].Role, oldname, newname)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ListRoles returns all unique roles in messages (thread-safe read).
|
|
||||||
func (s *SafeChatBody) ListRoles() []string {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
namesMap := make(map[string]struct{})
|
|
||||||
for i := range s.Messages {
|
|
||||||
namesMap[s.Messages[i].Role] = struct{}{}
|
|
||||||
}
|
|
||||||
resp := make([]string, len(namesMap))
|
|
||||||
i := 0
|
|
||||||
for k := range namesMap {
|
|
||||||
resp[i] = k
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
return resp
|
|
||||||
}
|
|
||||||
|
|
||||||
// MakeStopSlice returns stop strings for all roles (thread-safe read).
|
|
||||||
func (s *SafeChatBody) MakeStopSlice() []string {
|
|
||||||
return s.MakeStopSliceExcluding("", s.ListRoles())
|
|
||||||
}
|
|
||||||
|
|
||||||
// MakeStopSliceExcluding returns stop strings excluding a specific role (thread-safe read).
|
|
||||||
func (s *SafeChatBody) MakeStopSliceExcluding(excludeRole string, roleList []string) []string {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
ss := []string{}
|
|
||||||
for _, role := range roleList {
|
|
||||||
if role == excludeRole {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
ss = append(ss,
|
|
||||||
role+":\n",
|
|
||||||
role+":",
|
|
||||||
role+": ",
|
|
||||||
role+": ",
|
|
||||||
role+": \n",
|
|
||||||
role+": ",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return ss
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateMessageFunc updates a message at index using a provided function.
|
|
||||||
// The function receives the current message and returns the updated message.
|
|
||||||
// This is atomic and thread-safe (read-modify-write under single lock).
|
|
||||||
// Returns false if index is out of bounds.
|
|
||||||
func (s *SafeChatBody) UpdateMessageFunc(index int, updater func(RoleMsg) RoleMsg) bool {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
if index < 0 || index >= len(s.Messages) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
s.Messages[index] = updater(s.Messages[index])
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// AppendMessageFunc appends a new message created by a provided function.
|
|
||||||
// The function receives the current message count and returns the new message.
|
|
||||||
// This is atomic and thread-safe.
|
|
||||||
func (s *SafeChatBody) AppendMessageFunc(creator func(count int) RoleMsg) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
msg := creator(len(s.Messages))
|
|
||||||
s.Messages = append(s.Messages, msg)
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetMessagesForLLM returns a filtered copy of messages for sending to LLM.
|
|
||||||
// This is thread-safe and returns a copy safe for external modification.
|
|
||||||
func (s *SafeChatBody) GetMessagesForLLM(filterFunc func([]RoleMsg) []RoleMsg) []RoleMsg {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
if filterFunc == nil {
|
|
||||||
messagesCopy := make([]RoleMsg, len(s.Messages))
|
|
||||||
copy(messagesCopy, s.Messages)
|
|
||||||
return messagesCopy
|
|
||||||
}
|
|
||||||
return filterFunc(s.Messages)
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithLock executes a function while holding the write lock.
|
|
||||||
// Use this for complex operations that need to be atomic.
|
|
||||||
func (s *SafeChatBody) WithLock(fn func(*ChatBody)) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
fn(&s.ChatBody)
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithRLock executes a function while holding the read lock.
|
|
||||||
// Use this for complex read-only operations.
|
|
||||||
func (s *SafeChatBody) WithRLock(fn func(*ChatBody)) {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
fn(&s.ChatBody)
|
|
||||||
}
|
|
||||||
|
|||||||
23
popups.go
23
popups.go
@@ -22,7 +22,7 @@ func showModelSelectionPopup() {
|
|||||||
models, err := fetchLCPModelsWithLoadStatus()
|
models, err := fetchLCPModelsWithLoadStatus()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("failed to fetch models with load status", "error", err)
|
logger.Error("failed to fetch models with load status", "error", err)
|
||||||
return LocalModels.Load().([]string)
|
return LocalModels
|
||||||
}
|
}
|
||||||
return models
|
return models
|
||||||
}
|
}
|
||||||
@@ -30,8 +30,7 @@ func showModelSelectionPopup() {
|
|||||||
modelList := getModelListForAPI(cfg.CurrentAPI)
|
modelList := getModelListForAPI(cfg.CurrentAPI)
|
||||||
// Check for empty options list
|
// Check for empty options list
|
||||||
if len(modelList) == 0 {
|
if len(modelList) == 0 {
|
||||||
localModels := LocalModels.Load().([]string)
|
logger.Warn("empty model list for", "api", cfg.CurrentAPI, "localModelsLen", len(LocalModels), "orModelsLen", len(ORFreeModels))
|
||||||
logger.Warn("empty model list for", "api", cfg.CurrentAPI, "localModelsLen", len(localModels), "orModelsLen", len(ORFreeModels))
|
|
||||||
var message string
|
var message string
|
||||||
switch {
|
switch {
|
||||||
case strings.Contains(cfg.CurrentAPI, "openrouter.ai"):
|
case strings.Contains(cfg.CurrentAPI, "openrouter.ai"):
|
||||||
@@ -51,7 +50,7 @@ func showModelSelectionPopup() {
|
|||||||
// Find the current model index to set as selected
|
// Find the current model index to set as selected
|
||||||
currentModelIndex := -1
|
currentModelIndex := -1
|
||||||
for i, model := range modelList {
|
for i, model := range modelList {
|
||||||
if strings.TrimPrefix(model, models.LoadedMark) == chatBody.GetModel() {
|
if strings.TrimPrefix(model, models.LoadedMark) == chatBody.Model {
|
||||||
currentModelIndex = i
|
currentModelIndex = i
|
||||||
}
|
}
|
||||||
modelListWidget.AddItem(model, "", 0, nil)
|
modelListWidget.AddItem(model, "", 0, nil)
|
||||||
@@ -62,8 +61,8 @@ func showModelSelectionPopup() {
|
|||||||
}
|
}
|
||||||
modelListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
|
modelListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
|
||||||
modelName := strings.TrimPrefix(mainText, models.LoadedMark)
|
modelName := strings.TrimPrefix(mainText, models.LoadedMark)
|
||||||
chatBody.SetModel(modelName)
|
chatBody.Model = modelName
|
||||||
cfg.CurrentModel = chatBody.GetModel()
|
cfg.CurrentModel = chatBody.Model
|
||||||
pages.RemovePage("modelSelectionPopup")
|
pages.RemovePage("modelSelectionPopup")
|
||||||
app.SetFocus(textArea)
|
app.SetFocus(textArea)
|
||||||
updateCachedModelColor()
|
updateCachedModelColor()
|
||||||
@@ -151,13 +150,15 @@ func showAPILinkSelectionPopup() {
|
|||||||
}
|
}
|
||||||
// Assume local llama.cpp
|
// Assume local llama.cpp
|
||||||
refreshLocalModelsIfEmpty()
|
refreshLocalModelsIfEmpty()
|
||||||
return LocalModels.Load().([]string)
|
localModelsMu.RLock()
|
||||||
|
defer localModelsMu.RUnlock()
|
||||||
|
return LocalModels
|
||||||
}
|
}
|
||||||
newModelList := getModelListForAPI(cfg.CurrentAPI)
|
newModelList := getModelListForAPI(cfg.CurrentAPI)
|
||||||
// Ensure chatBody.Model is in the new list; if not, set to first available model
|
// Ensure chatBody.Model is in the new list; if not, set to first available model
|
||||||
if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.GetModel()) {
|
if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) {
|
||||||
chatBody.SetModel(strings.TrimPrefix(newModelList[0], models.LoadedMark))
|
chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark)
|
||||||
cfg.CurrentModel = chatBody.GetModel()
|
cfg.CurrentModel = chatBody.Model
|
||||||
updateToolCapabilities()
|
updateToolCapabilities()
|
||||||
}
|
}
|
||||||
pages.RemovePage("apiLinkSelectionPopup")
|
pages.RemovePage("apiLinkSelectionPopup")
|
||||||
@@ -228,7 +229,7 @@ func showUserRoleSelectionPopup() {
|
|||||||
// Update the user role in config
|
// Update the user role in config
|
||||||
cfg.WriteNextMsgAs = mainText
|
cfg.WriteNextMsgAs = mainText
|
||||||
// role got switch, update textview with character specific context for user
|
// role got switch, update textview with character specific context for user
|
||||||
filtered := filterMessagesForCharacter(chatBody.GetMessages(), mainText)
|
filtered := filterMessagesForCharacter(chatBody.Messages, mainText)
|
||||||
textView.SetText(chatToText(filtered, cfg.ShowSys))
|
textView.SetText(chatToText(filtered, cfg.ShowSys))
|
||||||
// Remove the popup page
|
// Remove the popup page
|
||||||
pages.RemovePage("userRoleSelectionPopup")
|
pages.RemovePage("userRoleSelectionPopup")
|
||||||
|
|||||||
@@ -4,11 +4,14 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/gdamore/tcell/v2"
|
"github.com/gdamore/tcell/v2"
|
||||||
"github.com/rivo/tview"
|
"github.com/rivo/tview"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var _ = sync.RWMutex{}
|
||||||
|
|
||||||
// Define constants for cell types
|
// Define constants for cell types
|
||||||
const (
|
const (
|
||||||
CellTypeCheckbox = "checkbox"
|
CellTypeCheckbox = "checkbox"
|
||||||
@@ -112,6 +115,9 @@ func makePropsTable(props map[string]float32) *tview.Table {
|
|||||||
row++
|
row++
|
||||||
}
|
}
|
||||||
// Add checkboxes
|
// Add checkboxes
|
||||||
|
addCheckboxRow("RAG use", cfg.RAGEnabled, func(checked bool) {
|
||||||
|
cfg.RAGEnabled = checked
|
||||||
|
})
|
||||||
addCheckboxRow("Inject role", injectRole, func(checked bool) {
|
addCheckboxRow("Inject role", injectRole, func(checked bool) {
|
||||||
injectRole = checked
|
injectRole = checked
|
||||||
})
|
})
|
||||||
@@ -154,7 +160,9 @@ func makePropsTable(props map[string]float32) *tview.Table {
|
|||||||
}
|
}
|
||||||
// Assume local llama.cpp
|
// Assume local llama.cpp
|
||||||
refreshLocalModelsIfEmpty()
|
refreshLocalModelsIfEmpty()
|
||||||
return LocalModels.Load().([]string)
|
localModelsMu.RLock()
|
||||||
|
defer localModelsMu.RUnlock()
|
||||||
|
return LocalModels
|
||||||
}
|
}
|
||||||
// Add input fields
|
// Add input fields
|
||||||
addInputRow("New char to write msg as", "", func(text string) {
|
addInputRow("New char to write msg as", "", func(text string) {
|
||||||
@@ -257,8 +265,7 @@ func makePropsTable(props map[string]float32) *tview.Table {
|
|||||||
|
|
||||||
// Check for empty options list
|
// Check for empty options list
|
||||||
if len(data.Options) == 0 {
|
if len(data.Options) == 0 {
|
||||||
localModels := LocalModels.Load().([]string)
|
logger.Warn("empty options list for", "label", label, "api", cfg.CurrentAPI, "localModelsLen", len(LocalModels), "orModelsLen", len(ORFreeModels))
|
||||||
logger.Warn("empty options list for", "label", label, "api", cfg.CurrentAPI, "localModelsLen", len(localModels), "orModelsLen", len(ORFreeModels))
|
|
||||||
message := "No options available for " + label
|
message := "No options available for " + label
|
||||||
if label == "Select a model" {
|
if label == "Select a model" {
|
||||||
switch {
|
switch {
|
||||||
|
|||||||
314
rag/embedder.go
314
rag/embedder.go
@@ -9,13 +9,6 @@ import (
|
|||||||
"gf-lt/models"
|
"gf-lt/models"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/sugarme/tokenizer"
|
|
||||||
"github.com/sugarme/tokenizer/pretrained"
|
|
||||||
"github.com/yalue/onnxruntime_go"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Embedder defines the interface for embedding text
|
// Embedder defines the interface for embedding text
|
||||||
@@ -34,10 +27,8 @@ type APIEmbedder struct {
|
|||||||
func NewAPIEmbedder(l *slog.Logger, cfg *config.Config) *APIEmbedder {
|
func NewAPIEmbedder(l *slog.Logger, cfg *config.Config) *APIEmbedder {
|
||||||
return &APIEmbedder{
|
return &APIEmbedder{
|
||||||
logger: l,
|
logger: l,
|
||||||
client: &http.Client{
|
client: &http.Client{},
|
||||||
Timeout: 30 * time.Second,
|
cfg: cfg,
|
||||||
},
|
|
||||||
cfg: cfg,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -143,302 +134,11 @@ func (a *APIEmbedder) EmbedSlice(lines []string) ([][]float32, error) {
|
|||||||
return embeddings, nil
|
return embeddings, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: ONNXEmbedder implementation would go here
|
||||||
|
// This would require:
|
||||||
// 1. Loading ONNX models locally
|
// 1. Loading ONNX models locally
|
||||||
// 2. Using a Go ONNX runtime (like gorgonia/onnx or similar)
|
// 2. Using a Go ONNX runtime (like gorgonia/onnx or similar)
|
||||||
// 3. Converting text to embeddings without external API calls
|
// 3. Converting text to embeddings without external API calls
|
||||||
type ONNXEmbedder struct {
|
//
|
||||||
session *onnxruntime_go.DynamicAdvancedSession
|
// For now, we'll focus on the API implementation which is already working in the current system,
|
||||||
tokenizer *tokenizer.Tokenizer
|
// and can be extended later when we have ONNX runtime integration
|
||||||
tokenizerPath string
|
|
||||||
dims int
|
|
||||||
logger *slog.Logger
|
|
||||||
mu sync.Mutex
|
|
||||||
modelPath string
|
|
||||||
}
|
|
||||||
|
|
||||||
var onnxInitOnce sync.Once
|
|
||||||
var onnxReady bool
|
|
||||||
var onnxLibPath string
|
|
||||||
var cudaLibPath string
|
|
||||||
|
|
||||||
var onnxLibPaths = []string{
|
|
||||||
"/usr/lib/libonnxruntime.so",
|
|
||||||
"/usr/lib/libonnxruntime.so.1.24.2",
|
|
||||||
"/usr/local/lib/libonnxruntime.so",
|
|
||||||
"/usr/lib/x86_64-linux-gnu/libonnxruntime.so",
|
|
||||||
"/opt/onnxruntime/lib/libonnxruntime.so",
|
|
||||||
}
|
|
||||||
|
|
||||||
var cudaLibPaths = []string{
|
|
||||||
"/usr/lib/libonnxruntime_providers_cuda.so",
|
|
||||||
"/usr/local/lib/libonnxruntime_providers_cuda.so",
|
|
||||||
"/opt/onnxruntime/lib/libonnxruntime_providers_cuda.so",
|
|
||||||
}
|
|
||||||
|
|
||||||
func findONNXLibrary() string {
|
|
||||||
for _, path := range onnxLibPaths {
|
|
||||||
if _, err := os.Stat(path); err == nil {
|
|
||||||
return path
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func findCUDALibrary() string {
|
|
||||||
for _, path := range cudaLibPaths {
|
|
||||||
if _, err := os.Stat(path); err == nil {
|
|
||||||
return path
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Logger) (*ONNXEmbedder, error) {
|
|
||||||
// Check if model and tokenizer files exist
|
|
||||||
if _, err := os.Stat(modelPath); err != nil {
|
|
||||||
return nil, fmt.Errorf("ONNX model not found: %w", err)
|
|
||||||
}
|
|
||||||
if _, err := os.Stat(tokenizerPath); err != nil {
|
|
||||||
return nil, fmt.Errorf("tokenizer not found: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find ONNX library
|
|
||||||
onnxLibPath = findONNXLibrary()
|
|
||||||
if onnxLibPath == "" {
|
|
||||||
return nil, errors.New("ONNX runtime library not found in standard locations")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find CUDA provider library (optional)
|
|
||||||
cudaLibPath = findCUDALibrary()
|
|
||||||
if cudaLibPath == "" {
|
|
||||||
fmt.Println("WARNING: CUDA provider library not found, will use CPU")
|
|
||||||
}
|
|
||||||
emb := &ONNXEmbedder{
|
|
||||||
tokenizerPath: tokenizerPath,
|
|
||||||
dims: dims,
|
|
||||||
logger: logger,
|
|
||||||
modelPath: modelPath,
|
|
||||||
}
|
|
||||||
return emb, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ONNXEmbedder) ensureInitialized() error {
|
|
||||||
if e.session != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
e.mu.Lock()
|
|
||||||
defer e.mu.Unlock()
|
|
||||||
if e.session != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// Load tokenizer lazily
|
|
||||||
if e.tokenizer == nil {
|
|
||||||
tok, err := pretrained.FromFile(e.tokenizerPath)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to load tokenizer: %w", err)
|
|
||||||
}
|
|
||||||
e.tokenizer = tok
|
|
||||||
}
|
|
||||||
onnxInitOnce.Do(func() {
|
|
||||||
onnxruntime_go.SetSharedLibraryPath(onnxLibPath)
|
|
||||||
if err := onnxruntime_go.InitializeEnvironment(); err != nil {
|
|
||||||
e.logger.Error("failed to initialize ONNX runtime", "error", err)
|
|
||||||
onnxReady = false
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Register CUDA provider if available
|
|
||||||
if cudaLibPath != "" {
|
|
||||||
if err := onnxruntime_go.RegisterExecutionProviderLibrary("CUDA", cudaLibPath); err != nil {
|
|
||||||
e.logger.Warn("failed to register CUDA provider", "error", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
onnxReady = true
|
|
||||||
})
|
|
||||||
if !onnxReady {
|
|
||||||
return errors.New("ONNX runtime not ready")
|
|
||||||
}
|
|
||||||
// Create session options
|
|
||||||
opts, err := onnxruntime_go.NewSessionOptions()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create session options: %w", err)
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
_ = opts.Destroy()
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Try to add CUDA provider
|
|
||||||
useCUDA := cudaLibPath != ""
|
|
||||||
if useCUDA {
|
|
||||||
cudaOpts, err := onnxruntime_go.NewCUDAProviderOptions()
|
|
||||||
if err != nil {
|
|
||||||
e.logger.Warn("failed to create CUDA provider options, falling back to CPU", "error", err)
|
|
||||||
useCUDA = false
|
|
||||||
} else {
|
|
||||||
defer func() {
|
|
||||||
_ = cudaOpts.Destroy()
|
|
||||||
}()
|
|
||||||
if err := cudaOpts.Update(map[string]string{"device_id": "0"}); err != nil {
|
|
||||||
e.logger.Warn("failed to update CUDA options, falling back to CPU", "error", err)
|
|
||||||
useCUDA = false
|
|
||||||
} else if err := opts.AppendExecutionProviderCUDA(cudaOpts); err != nil {
|
|
||||||
e.logger.Warn("failed to append CUDA provider, falling back to CPU", "error", err)
|
|
||||||
useCUDA = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if useCUDA {
|
|
||||||
e.logger.Info("Using CUDA for ONNX inference")
|
|
||||||
} else {
|
|
||||||
e.logger.Info("Using CPU for ONNX inference")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create session with options
|
|
||||||
session, err := onnxruntime_go.NewDynamicAdvancedSession(
|
|
||||||
e.getModelPath(),
|
|
||||||
[]string{"input_ids", "attention_mask"},
|
|
||||||
[]string{"sentence_embedding"},
|
|
||||||
opts,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create ONNX session: %w", err)
|
|
||||||
}
|
|
||||||
e.session = session
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ONNXEmbedder) getModelPath() string {
|
|
||||||
return e.modelPath
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ONNXEmbedder) Destroy() error {
|
|
||||||
e.mu.Lock()
|
|
||||||
defer e.mu.Unlock()
|
|
||||||
if e.session != nil {
|
|
||||||
if err := e.session.Destroy(); err != nil {
|
|
||||||
return fmt.Errorf("failed to destroy ONNX session: %w", err)
|
|
||||||
}
|
|
||||||
e.session = nil
|
|
||||||
e.logger.Info("ONNX session destroyed, VRAM freed")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
|
|
||||||
if err := e.ensureInitialized(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
// 1. Tokenize
|
|
||||||
encoding, err := e.tokenizer.EncodeSingle(text)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("tokenization failed: %w", err)
|
|
||||||
}
|
|
||||||
// 2. Convert to int64 and create attention mask
|
|
||||||
ids := encoding.Ids
|
|
||||||
inputIDs := make([]int64, len(ids))
|
|
||||||
attentionMask := make([]int64, len(ids))
|
|
||||||
for i, id := range ids {
|
|
||||||
inputIDs[i] = int64(id)
|
|
||||||
attentionMask[i] = 1
|
|
||||||
}
|
|
||||||
// 3. Create input tensors (shape: [1, seq_len])
|
|
||||||
seqLen := int64(len(inputIDs))
|
|
||||||
inputIDsTensor, err := onnxruntime_go.NewTensor[int64](
|
|
||||||
onnxruntime_go.NewShape(1, seqLen),
|
|
||||||
inputIDs,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create input_ids tensor: %w", err)
|
|
||||||
}
|
|
||||||
defer func() { _ = inputIDsTensor.Destroy() }()
|
|
||||||
maskTensor, err := onnxruntime_go.NewTensor[int64](
|
|
||||||
onnxruntime_go.NewShape(1, seqLen),
|
|
||||||
attentionMask,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create attention_mask tensor: %w", err)
|
|
||||||
}
|
|
||||||
defer func() { _ = maskTensor.Destroy() }()
|
|
||||||
// 4. Create output tensor
|
|
||||||
outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](
|
|
||||||
onnxruntime_go.NewShape(1, int64(e.dims)),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create output tensor: %w", err)
|
|
||||||
}
|
|
||||||
defer func() { _ = outputTensor.Destroy() }()
|
|
||||||
// 5. Run inference
|
|
||||||
err = e.session.Run(
|
|
||||||
[]onnxruntime_go.Value{inputIDsTensor, maskTensor},
|
|
||||||
[]onnxruntime_go.Value{outputTensor},
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("inference failed: %w", err)
|
|
||||||
}
|
|
||||||
// 6. Copy output data
|
|
||||||
outputData := outputTensor.GetData()
|
|
||||||
embedding := make([]float32, len(outputData))
|
|
||||||
copy(embedding, outputData)
|
|
||||||
return embedding, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
|
|
||||||
if err := e.ensureInitialized(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
encodings := make([]*tokenizer.Encoding, len(texts))
|
|
||||||
maxLen := 0
|
|
||||||
for i, txt := range texts {
|
|
||||||
enc, err := e.tokenizer.EncodeSingle(txt)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
encodings[i] = enc
|
|
||||||
if l := len(enc.Ids); l > maxLen {
|
|
||||||
maxLen = l
|
|
||||||
}
|
|
||||||
}
|
|
||||||
batchSize := len(texts)
|
|
||||||
inputIDs := make([]int64, batchSize*maxLen)
|
|
||||||
attentionMask := make([]int64, batchSize*maxLen)
|
|
||||||
for i, enc := range encodings {
|
|
||||||
ids := enc.Ids
|
|
||||||
offset := i * maxLen
|
|
||||||
for j, id := range ids {
|
|
||||||
inputIDs[offset+j] = int64(id)
|
|
||||||
attentionMask[offset+j] = 1
|
|
||||||
}
|
|
||||||
// Remaining positions are already zero (padding)
|
|
||||||
}
|
|
||||||
// Create tensors with shape [batchSize, maxLen]
|
|
||||||
inputTensor, _ := onnxruntime_go.NewTensor[int64](
|
|
||||||
onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
|
|
||||||
inputIDs,
|
|
||||||
)
|
|
||||||
defer func() { _ = inputTensor.Destroy() }()
|
|
||||||
maskTensor, _ := onnxruntime_go.NewTensor[int64](
|
|
||||||
onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
|
|
||||||
attentionMask,
|
|
||||||
)
|
|
||||||
defer func() { _ = maskTensor.Destroy() }()
|
|
||||||
outputTensor, _ := onnxruntime_go.NewEmptyTensor[float32](
|
|
||||||
onnxruntime_go.NewShape(int64(batchSize), int64(e.dims)),
|
|
||||||
)
|
|
||||||
defer func() { _ = outputTensor.Destroy() }()
|
|
||||||
err := e.session.Run(
|
|
||||||
[]onnxruntime_go.Value{inputTensor, maskTensor},
|
|
||||||
[]onnxruntime_go.Value{outputTensor},
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
// Extract embeddings per batch item
|
|
||||||
data := outputTensor.GetData()
|
|
||||||
embeddings := make([][]float32, batchSize)
|
|
||||||
for i := 0; i < batchSize; i++ {
|
|
||||||
start := i * e.dims
|
|
||||||
emb := make([]float32, e.dims)
|
|
||||||
copy(emb, data[start:start+e.dims])
|
|
||||||
embeddings[i] = emb
|
|
||||||
}
|
|
||||||
return embeddings, nil
|
|
||||||
}
|
|
||||||
|
|||||||
732
rag/rag.go
732
rag/rag.go
@@ -1,7 +1,6 @@
|
|||||||
package rag
|
package rag
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"gf-lt/config"
|
"gf-lt/config"
|
||||||
@@ -10,168 +9,51 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"path"
|
"path"
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/neurosnap/sentences/english"
|
"github.com/neurosnap/sentences/english"
|
||||||
)
|
)
|
||||||
|
|
||||||
const ()
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Status messages for TUI integration
|
// Status messages for TUI integration
|
||||||
LongJobStatusCh = make(chan string, 100) // Increased buffer size for parallel batch updates
|
LongJobStatusCh = make(chan string, 10) // Increased buffer size to prevent blocking
|
||||||
FinishedRAGStatus = "finished loading RAG file; press Enter"
|
FinishedRAGStatus = "finished loading RAG file; press Enter"
|
||||||
LoadedFileRAGStatus = "loaded file"
|
LoadedFileRAGStatus = "loaded file"
|
||||||
ErrRAGStatus = "some error occurred; failed to transfer data to vector db"
|
ErrRAGStatus = "some error occurred; failed to transfer data to vector db"
|
||||||
)
|
)
|
||||||
|
|
||||||
type RAG struct {
|
type RAG struct {
|
||||||
logger *slog.Logger
|
logger *slog.Logger
|
||||||
store storage.FullRepo
|
store storage.FullRepo
|
||||||
cfg *config.Config
|
cfg *config.Config
|
||||||
embedder Embedder
|
embedder Embedder
|
||||||
storage *VectorStorage
|
storage *VectorStorage
|
||||||
mu sync.RWMutex
|
mu sync.Mutex
|
||||||
idleMu sync.Mutex
|
|
||||||
fallbackMsg string
|
|
||||||
idleTimer *time.Timer
|
|
||||||
idleTimeout time.Duration
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// batchTask represents a single batch to be embedded
|
func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) *RAG {
|
||||||
type batchTask struct {
|
// Initialize with API embedder by default, could be configurable later
|
||||||
batchIndex int
|
embedder := NewAPIEmbedder(l, cfg)
|
||||||
paragraphs []string
|
|
||||||
filename string
|
|
||||||
totalBatches int
|
|
||||||
}
|
|
||||||
|
|
||||||
// batchResult represents the result of embedding a batch
|
|
||||||
type batchResult struct {
|
|
||||||
batchIndex int
|
|
||||||
embeddings [][]float32
|
|
||||||
paragraphs []string
|
|
||||||
filename string
|
|
||||||
}
|
|
||||||
|
|
||||||
// sendStatusNonBlocking sends a status message without blocking
|
|
||||||
func (r *RAG) sendStatusNonBlocking(status string) {
|
|
||||||
select {
|
|
||||||
case LongJobStatusCh <- status:
|
|
||||||
default:
|
|
||||||
r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", status)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) (*RAG, error) {
|
|
||||||
var embedder Embedder
|
|
||||||
var fallbackMsg string
|
|
||||||
if cfg.EmbedModelPath != "" && cfg.EmbedTokenizerPath != "" {
|
|
||||||
emb, err := NewONNXEmbedder(cfg.EmbedModelPath, cfg.EmbedTokenizerPath, cfg.EmbedDims, l)
|
|
||||||
if err != nil {
|
|
||||||
l.Error("failed to create ONNX embedder, falling back to API", "error", err)
|
|
||||||
fallbackMsg = err.Error()
|
|
||||||
embedder = NewAPIEmbedder(l, cfg)
|
|
||||||
} else {
|
|
||||||
embedder = emb
|
|
||||||
l.Info("using ONNX embedder", "model", cfg.EmbedModelPath, "dims", cfg.EmbedDims)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
embedder = NewAPIEmbedder(l, cfg)
|
|
||||||
l.Info("using API embedder", "url", cfg.EmbedURL)
|
|
||||||
}
|
|
||||||
rag := &RAG{
|
rag := &RAG{
|
||||||
logger: l,
|
logger: l,
|
||||||
store: s,
|
store: s,
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
embedder: embedder,
|
embedder: embedder,
|
||||||
storage: NewVectorStorage(l, s),
|
storage: NewVectorStorage(l, s),
|
||||||
fallbackMsg: fallbackMsg,
|
|
||||||
idleTimeout: 30 * time.Second,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: Vector tables are created via database migrations, not at runtime
|
// Note: Vector tables are created via database migrations, not at runtime
|
||||||
|
|
||||||
return rag, nil
|
return rag
|
||||||
}
|
}
|
||||||
|
|
||||||
func createChunks(sentences []string, wordLimit, overlapWords uint32) []string {
|
func wordCounter(sentence string) int {
|
||||||
if len(sentences) == 0 {
|
return len(strings.Split(strings.TrimSpace(sentence), " "))
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if overlapWords >= wordLimit {
|
|
||||||
overlapWords = wordLimit / 2
|
|
||||||
}
|
|
||||||
var chunks []string
|
|
||||||
i := 0
|
|
||||||
for i < len(sentences) {
|
|
||||||
var chunkWords []string
|
|
||||||
wordCount := 0
|
|
||||||
j := i
|
|
||||||
for j < len(sentences) && wordCount <= int(wordLimit) {
|
|
||||||
sentence := sentences[j]
|
|
||||||
words := strings.Fields(sentence)
|
|
||||||
chunkWords = append(chunkWords, sentence)
|
|
||||||
wordCount += len(words)
|
|
||||||
j++
|
|
||||||
// If this sentence alone exceeds limit, still include it and stop
|
|
||||||
if wordCount > int(wordLimit) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(chunkWords) == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
chunk := strings.Join(chunkWords, " ")
|
|
||||||
chunks = append(chunks, chunk)
|
|
||||||
if j >= len(sentences) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
// Move i forward by skipping overlap
|
|
||||||
if overlapWords == 0 {
|
|
||||||
i = j
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Calculate how many sentences to skip to achieve overlapWords
|
|
||||||
overlapRemaining := int(overlapWords)
|
|
||||||
newI := i
|
|
||||||
for newI < j && overlapRemaining > 0 {
|
|
||||||
words := len(strings.Fields(sentences[newI]))
|
|
||||||
overlapRemaining -= words
|
|
||||||
if overlapRemaining >= 0 {
|
|
||||||
newI++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if newI == i {
|
|
||||||
newI = j
|
|
||||||
}
|
|
||||||
i = newI
|
|
||||||
}
|
|
||||||
return chunks
|
|
||||||
}
|
|
||||||
|
|
||||||
func sanitizeFTSQuery(query string) string {
|
|
||||||
// Remove double quotes and other problematic characters for FTS5
|
|
||||||
query = strings.ReplaceAll(query, "\"", " ")
|
|
||||||
query = strings.ReplaceAll(query, "'", " ")
|
|
||||||
query = strings.ReplaceAll(query, ";", " ")
|
|
||||||
query = strings.ReplaceAll(query, "\\", " ")
|
|
||||||
query = strings.TrimSpace(query)
|
|
||||||
if query == "" {
|
|
||||||
return "*" // match all
|
|
||||||
}
|
|
||||||
return query
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RAG) LoadRAG(fpath string) error {
|
func (r *RAG) LoadRAG(fpath string) error {
|
||||||
return r.LoadRAGWithContext(context.Background(), fpath)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
|
|
||||||
r.mu.Lock()
|
r.mu.Lock()
|
||||||
defer r.mu.Unlock()
|
defer r.mu.Unlock()
|
||||||
fileText, err := ExtractText(fpath)
|
fileText, err := ExtractText(fpath)
|
||||||
@@ -179,9 +61,11 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
r.logger.Debug("rag: loaded file", "fp", fpath)
|
r.logger.Debug("rag: loaded file", "fp", fpath)
|
||||||
|
select {
|
||||||
// Send initial status (non-blocking with retry)
|
case LongJobStatusCh <- LoadedFileRAGStatus:
|
||||||
r.sendStatusNonBlocking(LoadedFileRAGStatus)
|
default:
|
||||||
|
r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", LoadedFileRAGStatus)
|
||||||
|
}
|
||||||
tokenizer, err := english.NewSentenceTokenizer(nil)
|
tokenizer, err := english.NewSentenceTokenizer(nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -191,9 +75,31 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
|
|||||||
for i, s := range sentences {
|
for i, s := range sentences {
|
||||||
sents[i] = s.Text
|
sents[i] = s.Text
|
||||||
}
|
}
|
||||||
|
// Group sentences into paragraphs based on word limit
|
||||||
// Create chunks with overlap
|
paragraphs := []string{}
|
||||||
paragraphs := createChunks(sents, r.cfg.RAGWordLimit, r.cfg.RAGOverlapWords)
|
par := strings.Builder{}
|
||||||
|
for i := 0; i < len(sents); i++ {
|
||||||
|
if strings.TrimSpace(sents[i]) != "" {
|
||||||
|
if par.Len() > 0 {
|
||||||
|
par.WriteString(" ")
|
||||||
|
}
|
||||||
|
par.WriteString(sents[i])
|
||||||
|
}
|
||||||
|
if wordCounter(par.String()) > int(r.cfg.RAGWordLimit) {
|
||||||
|
paragraph := strings.TrimSpace(par.String())
|
||||||
|
if paragraph != "" {
|
||||||
|
paragraphs = append(paragraphs, paragraph)
|
||||||
|
}
|
||||||
|
par.Reset()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Handle any remaining content in the paragraph buffer
|
||||||
|
if par.Len() > 0 {
|
||||||
|
paragraph := strings.TrimSpace(par.String())
|
||||||
|
if paragraph != "" {
|
||||||
|
paragraphs = append(paragraphs, paragraph)
|
||||||
|
}
|
||||||
|
}
|
||||||
// Adjust batch size if needed
|
// Adjust batch size if needed
|
||||||
if len(paragraphs) < r.cfg.RAGBatchSize && len(paragraphs) > 0 {
|
if len(paragraphs) < r.cfg.RAGBatchSize && len(paragraphs) > 0 {
|
||||||
r.cfg.RAGBatchSize = len(paragraphs)
|
r.cfg.RAGBatchSize = len(paragraphs)
|
||||||
@@ -201,348 +107,91 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
|
|||||||
if len(paragraphs) == 0 {
|
if len(paragraphs) == 0 {
|
||||||
return errors.New("no valid paragraphs found in file")
|
return errors.New("no valid paragraphs found in file")
|
||||||
}
|
}
|
||||||
totalBatches := (len(paragraphs) + r.cfg.RAGBatchSize - 1) / r.cfg.RAGBatchSize
|
// Process paragraphs in batches synchronously
|
||||||
r.logger.Debug("starting parallel embedding", "total_batches", totalBatches, "batch_size", r.cfg.RAGBatchSize)
|
batchCount := 0
|
||||||
|
for i := 0; i < len(paragraphs); i += r.cfg.RAGBatchSize {
|
||||||
// Determine concurrency level
|
end := i + r.cfg.RAGBatchSize
|
||||||
concurrency := runtime.NumCPU()
|
if end > len(paragraphs) {
|
||||||
if concurrency > totalBatches {
|
end = len(paragraphs)
|
||||||
concurrency = totalBatches
|
}
|
||||||
}
|
batch := paragraphs[i:end]
|
||||||
if concurrency < 1 {
|
batchCount++
|
||||||
concurrency = 1
|
// Filter empty paragraphs
|
||||||
}
|
nonEmptyBatch := make([]string, 0, len(batch))
|
||||||
// If using ONNX embedder, limit concurrency to 1 due to mutex serialization
|
for _, p := range batch {
|
||||||
var isONNX bool
|
if strings.TrimSpace(p) != "" {
|
||||||
if _, isONNX = r.embedder.(*ONNXEmbedder); isONNX {
|
nonEmptyBatch = append(nonEmptyBatch, strings.TrimSpace(p))
|
||||||
concurrency = 1
|
|
||||||
}
|
|
||||||
embedderType := "API"
|
|
||||||
if isONNX {
|
|
||||||
embedderType = "ONNX"
|
|
||||||
}
|
|
||||||
r.logger.Debug("parallel embedding setup",
|
|
||||||
"total_batches", totalBatches,
|
|
||||||
"concurrency", concurrency,
|
|
||||||
"embedder", embedderType,
|
|
||||||
"batch_size", r.cfg.RAGBatchSize)
|
|
||||||
|
|
||||||
// Create context with timeout (30 minutes) and cancellation for error handling
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
// Channels for task distribution and results
|
|
||||||
taskCh := make(chan batchTask, totalBatches)
|
|
||||||
resultCh := make(chan batchResult, totalBatches)
|
|
||||||
errorCh := make(chan error, totalBatches)
|
|
||||||
|
|
||||||
// Start worker goroutines
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
for w := 0; w < concurrency; w++ {
|
|
||||||
wg.Add(1)
|
|
||||||
go r.embeddingWorker(ctx, w, taskCh, resultCh, errorCh, &wg)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close task channel after all tasks are sent (by separate goroutine)
|
|
||||||
go func() {
|
|
||||||
// Ensure task channel is closed when this goroutine exits
|
|
||||||
defer close(taskCh)
|
|
||||||
r.logger.Debug("task distributor started", "total_batches", totalBatches)
|
|
||||||
for i := 0; i < totalBatches; i++ {
|
|
||||||
start := i * r.cfg.RAGBatchSize
|
|
||||||
end := start + r.cfg.RAGBatchSize
|
|
||||||
if end > len(paragraphs) {
|
|
||||||
end = len(paragraphs)
|
|
||||||
}
|
|
||||||
batch := paragraphs[start:end]
|
|
||||||
|
|
||||||
// Filter empty paragraphs
|
|
||||||
nonEmptyBatch := make([]string, 0, len(batch))
|
|
||||||
for _, p := range batch {
|
|
||||||
if strings.TrimSpace(p) != "" {
|
|
||||||
nonEmptyBatch = append(nonEmptyBatch, strings.TrimSpace(p))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
task := batchTask{
|
|
||||||
batchIndex: i,
|
|
||||||
paragraphs: nonEmptyBatch,
|
|
||||||
filename: path.Base(fpath),
|
|
||||||
totalBatches: totalBatches,
|
|
||||||
}
|
|
||||||
|
|
||||||
select {
|
|
||||||
case taskCh <- task:
|
|
||||||
r.logger.Debug("task distributor sent batch", "batch", i, "paragraphs", len(nonEmptyBatch))
|
|
||||||
case <-ctx.Done():
|
|
||||||
r.logger.Debug("task distributor cancelled", "batches_sent", i+1, "total_batches", totalBatches)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
r.logger.Debug("task distributor finished", "batches_sent", totalBatches)
|
if len(nonEmptyBatch) == 0 {
|
||||||
}()
|
|
||||||
|
|
||||||
// Wait for workers to finish and close result channel
|
|
||||||
go func() {
|
|
||||||
wg.Wait()
|
|
||||||
close(resultCh)
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Process results in order and write to database
|
|
||||||
nextExpectedBatch := 0
|
|
||||||
resultsBuffer := make(map[int]batchResult)
|
|
||||||
filename := path.Base(fpath)
|
|
||||||
batchesProcessed := 0
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
|
|
||||||
case err := <-errorCh:
|
|
||||||
// First error from any worker, cancel everything
|
|
||||||
cancel()
|
|
||||||
r.logger.Error("embedding worker failed", "error", err)
|
|
||||||
r.sendStatusNonBlocking(ErrRAGStatus)
|
|
||||||
return fmt.Errorf("embedding failed: %w", err)
|
|
||||||
|
|
||||||
case result, ok := <-resultCh:
|
|
||||||
if !ok {
|
|
||||||
// All results processed
|
|
||||||
resultCh = nil
|
|
||||||
r.logger.Debug("result channel closed", "batches_processed", batchesProcessed, "total_batches", totalBatches)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store result in buffer
|
|
||||||
resultsBuffer[result.batchIndex] = result
|
|
||||||
|
|
||||||
// Process buffered results in order
|
|
||||||
for {
|
|
||||||
if res, exists := resultsBuffer[nextExpectedBatch]; exists {
|
|
||||||
// Write this batch to database
|
|
||||||
if err := r.writeBatchToStorage(ctx, res, filename); err != nil {
|
|
||||||
cancel()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
batchesProcessed++
|
|
||||||
// Send progress update
|
|
||||||
statusMsg := fmt.Sprintf("processed batch %d/%d", batchesProcessed, totalBatches)
|
|
||||||
r.sendStatusNonBlocking(statusMsg)
|
|
||||||
|
|
||||||
delete(resultsBuffer, nextExpectedBatch)
|
|
||||||
nextExpectedBatch++
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
// No channels ready, check for deadlock conditions
|
|
||||||
if resultCh == nil && nextExpectedBatch < totalBatches {
|
|
||||||
// Missing batch results after result channel closed
|
|
||||||
r.logger.Error("missing batch results",
|
|
||||||
"expected", totalBatches,
|
|
||||||
"received", nextExpectedBatch,
|
|
||||||
"missing", totalBatches-nextExpectedBatch)
|
|
||||||
|
|
||||||
// Wait a short time for any delayed errors, then cancel
|
|
||||||
select {
|
|
||||||
case <-time.After(5 * time.Second):
|
|
||||||
cancel()
|
|
||||||
return fmt.Errorf("missing batch results: expected %d, got %d", totalBatches, nextExpectedBatch)
|
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
case err := <-errorCh:
|
|
||||||
cancel()
|
|
||||||
r.logger.Error("embedding worker failed after result channel closed", "error", err)
|
|
||||||
r.sendStatusNonBlocking(ErrRAGStatus)
|
|
||||||
return fmt.Errorf("embedding failed: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If we reach here, no deadlock yet, just busy loop prevention
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we're done
|
|
||||||
if resultCh == nil && nextExpectedBatch >= totalBatches {
|
|
||||||
r.logger.Debug("all batches processed successfully", "total", totalBatches)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
r.logger.Debug("finished writing vectors", "batches", batchesProcessed)
|
|
||||||
r.resetIdleTimer()
|
|
||||||
r.sendStatusNonBlocking(FinishedRAGStatus)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// embeddingWorker processes batch embedding tasks
|
|
||||||
func (r *RAG) embeddingWorker(ctx context.Context, workerID int, taskCh <-chan batchTask, resultCh chan<- batchResult, errorCh chan<- error, wg *sync.WaitGroup) {
|
|
||||||
defer wg.Done()
|
|
||||||
r.logger.Debug("embedding worker started", "worker", workerID)
|
|
||||||
|
|
||||||
// Panic recovery to ensure worker doesn't crash silently
|
|
||||||
defer func() {
|
|
||||||
if rec := recover(); rec != nil {
|
|
||||||
r.logger.Error("embedding worker panicked", "worker", workerID, "panic", rec)
|
|
||||||
// Try to send error, but don't block if channel is full
|
|
||||||
select {
|
|
||||||
case errorCh <- fmt.Errorf("worker %d panicked: %v", workerID, rec):
|
|
||||||
default:
|
|
||||||
r.logger.Warn("error channel full, dropping panic error", "worker", workerID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
for task := range taskCh {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
r.logger.Debug("embedding worker cancelled", "worker", workerID)
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
r.logger.Debug("worker processing batch", "worker", workerID, "batch", task.batchIndex, "paragraphs", len(task.paragraphs), "total_batches", task.totalBatches)
|
|
||||||
|
|
||||||
// Skip empty batches
|
|
||||||
if len(task.paragraphs) == 0 {
|
|
||||||
select {
|
|
||||||
case resultCh <- batchResult{
|
|
||||||
batchIndex: task.batchIndex,
|
|
||||||
embeddings: nil,
|
|
||||||
paragraphs: nil,
|
|
||||||
filename: task.filename,
|
|
||||||
}:
|
|
||||||
case <-ctx.Done():
|
|
||||||
r.logger.Debug("embedding worker cancelled while sending empty batch", "worker", workerID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
r.logger.Debug("worker sent empty batch", "worker", workerID, "batch", task.batchIndex)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Embed with retry for API embedder
|
// Embed the batch
|
||||||
embeddings, err := r.embedWithRetry(ctx, task.paragraphs, 3)
|
embeddings, err := r.embedder.EmbedSlice(nonEmptyBatch)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Try to send error, but don't block indefinitely
|
r.logger.Error("failed to embed batch", "error", err, "batch", batchCount)
|
||||||
select {
|
select {
|
||||||
case errorCh <- fmt.Errorf("worker %d batch %d: %w", workerID, task.batchIndex, err):
|
case LongJobStatusCh <- ErrRAGStatus:
|
||||||
case <-ctx.Done():
|
default:
|
||||||
r.logger.Debug("embedding worker cancelled while sending error", "worker", workerID)
|
r.logger.Warn("LongJobStatusCh channel full, dropping message")
|
||||||
}
|
}
|
||||||
return
|
return fmt.Errorf("failed to embed batch %d: %w", batchCount, err)
|
||||||
}
|
}
|
||||||
// Send result with context awareness
|
if len(embeddings) != len(nonEmptyBatch) {
|
||||||
|
err := errors.New("embedding count mismatch")
|
||||||
|
r.logger.Error("embedding mismatch", "expected", len(nonEmptyBatch), "got", len(embeddings))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Write vectors to storage
|
||||||
|
filename := path.Base(fpath)
|
||||||
|
for j, text := range nonEmptyBatch {
|
||||||
|
vector := models.VectorRow{
|
||||||
|
Embeddings: embeddings[j],
|
||||||
|
RawText: text,
|
||||||
|
Slug: fmt.Sprintf("%s_%d_%d", filename, batchCount, j),
|
||||||
|
FileName: filename,
|
||||||
|
}
|
||||||
|
if err := r.storage.WriteVector(&vector); err != nil {
|
||||||
|
r.logger.Error("failed to write vector to DB", "error", err, "slug", vector.Slug)
|
||||||
|
select {
|
||||||
|
case LongJobStatusCh <- ErrRAGStatus:
|
||||||
|
default:
|
||||||
|
r.logger.Warn("LongJobStatusCh channel full, dropping message")
|
||||||
|
}
|
||||||
|
return fmt.Errorf("failed to write vector: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.logger.Debug("wrote batch to db", "batch", batchCount, "size", len(nonEmptyBatch))
|
||||||
|
// Send progress status
|
||||||
|
statusMsg := fmt.Sprintf("processed batch %d/%d", batchCount, (len(paragraphs)+r.cfg.RAGBatchSize-1)/r.cfg.RAGBatchSize)
|
||||||
select {
|
select {
|
||||||
case resultCh <- batchResult{
|
case LongJobStatusCh <- statusMsg:
|
||||||
batchIndex: task.batchIndex,
|
default:
|
||||||
embeddings: embeddings,
|
r.logger.Warn("LongJobStatusCh channel full, dropping message")
|
||||||
paragraphs: task.paragraphs,
|
|
||||||
filename: task.filename,
|
|
||||||
}:
|
|
||||||
case <-ctx.Done():
|
|
||||||
r.logger.Debug("embedding worker cancelled while sending result", "worker", workerID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
r.logger.Debug("worker completed batch", "worker", workerID, "batch", task.batchIndex, "embeddings", len(embeddings))
|
|
||||||
}
|
|
||||||
r.logger.Debug("embedding worker finished", "worker", workerID)
|
|
||||||
}
|
|
||||||
|
|
||||||
// embedWithRetry attempts embedding with exponential backoff for API embedder
|
|
||||||
func (r *RAG) embedWithRetry(ctx context.Context, paragraphs []string, maxRetries int) ([][]float32, error) {
|
|
||||||
var lastErr error
|
|
||||||
for attempt := 0; attempt < maxRetries; attempt++ {
|
|
||||||
if attempt > 0 {
|
|
||||||
// Exponential backoff
|
|
||||||
backoff := time.Duration(attempt*attempt) * time.Second
|
|
||||||
if backoff > 10*time.Second {
|
|
||||||
backoff = 10 * time.Second
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case <-time.After(backoff):
|
|
||||||
case <-ctx.Done():
|
|
||||||
return nil, ctx.Err()
|
|
||||||
}
|
|
||||||
r.logger.Debug("retrying embedding", "attempt", attempt, "max_retries", maxRetries)
|
|
||||||
}
|
|
||||||
|
|
||||||
embeddings, err := r.embedder.EmbedSlice(paragraphs)
|
|
||||||
if err == nil {
|
|
||||||
// Validate embedding count
|
|
||||||
if len(embeddings) != len(paragraphs) {
|
|
||||||
return nil, fmt.Errorf("embedding count mismatch: expected %d, got %d", len(paragraphs), len(embeddings))
|
|
||||||
}
|
|
||||||
return embeddings, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
lastErr = err
|
|
||||||
// Only retry for API embedder errors (network/timeout)
|
|
||||||
// For ONNX embedder, fail fast
|
|
||||||
if _, isAPI := r.embedder.(*APIEmbedder); !isAPI {
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("embedding failed after %d attempts: %w", maxRetries, lastErr)
|
r.logger.Debug("finished writing vectors", "batches", batchCount)
|
||||||
}
|
|
||||||
|
|
||||||
// writeBatchToStorage writes a single batch of vectors to the database
|
|
||||||
func (r *RAG) writeBatchToStorage(ctx context.Context, result batchResult, filename string) error {
|
|
||||||
if len(result.embeddings) == 0 {
|
|
||||||
// Empty batch, skip
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// Check context before starting
|
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case LongJobStatusCh <- FinishedRAGStatus:
|
||||||
return ctx.Err()
|
|
||||||
default:
|
default:
|
||||||
|
r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", FinishedRAGStatus)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build all vectors for batch write
|
|
||||||
vectors := make([]*models.VectorRow, 0, len(result.paragraphs))
|
|
||||||
for j, text := range result.paragraphs {
|
|
||||||
vectors = append(vectors, &models.VectorRow{
|
|
||||||
Embeddings: result.embeddings[j],
|
|
||||||
RawText: text,
|
|
||||||
Slug: fmt.Sprintf("%s_%d_%d", filename, result.batchIndex+1, j),
|
|
||||||
FileName: filename,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write all vectors in a single transaction
|
|
||||||
if err := r.storage.WriteVectors(vectors); err != nil {
|
|
||||||
r.logger.Error("failed to write vectors batch to DB", "error", err, "batch", result.batchIndex+1, "size", len(vectors))
|
|
||||||
r.sendStatusNonBlocking(ErrRAGStatus)
|
|
||||||
return fmt.Errorf("failed to write vectors batch: %w", err)
|
|
||||||
}
|
|
||||||
r.logger.Debug("wrote batch to db", "batch", result.batchIndex+1, "size", len(result.paragraphs))
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RAG) LineToVector(line string) ([]float32, error) {
|
func (r *RAG) LineToVector(line string) ([]float32, error) {
|
||||||
r.resetIdleTimer()
|
|
||||||
return r.embedder.Embed(line)
|
return r.embedder.Embed(line)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RAG) searchEmb(emb *models.EmbeddingResp, limit int) ([]models.VectorRow, error) {
|
func (r *RAG) SearchEmb(emb *models.EmbeddingResp) ([]models.VectorRow, error) {
|
||||||
r.resetIdleTimer()
|
return r.storage.SearchClosest(emb.Embedding)
|
||||||
return r.storage.SearchClosest(emb.Embedding, limit)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *RAG) searchKeyword(query string, limit int) ([]models.VectorRow, error) {
|
|
||||||
r.resetIdleTimer()
|
|
||||||
sanitized := sanitizeFTSQuery(query)
|
|
||||||
return r.storage.SearchKeyword(sanitized, limit)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RAG) ListLoaded() ([]string, error) {
|
func (r *RAG) ListLoaded() ([]string, error) {
|
||||||
r.mu.RLock()
|
|
||||||
defer r.mu.RUnlock()
|
|
||||||
return r.storage.ListFiles()
|
return r.storage.ListFiles()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RAG) RemoveFile(filename string) error {
|
func (r *RAG) RemoveFile(filename string) error {
|
||||||
r.mu.Lock()
|
|
||||||
defer r.mu.Unlock()
|
|
||||||
r.resetIdleTimer()
|
|
||||||
return r.storage.RemoveEmbByFileName(filename)
|
return r.storage.RemoveEmbByFileName(filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -562,13 +211,10 @@ func (r *RAG) RefineQuery(query string) string {
|
|||||||
return original
|
return original
|
||||||
}
|
}
|
||||||
query = strings.ToLower(query)
|
query = strings.ToLower(query)
|
||||||
words := strings.Fields(query)
|
for _, stopWord := range stopWords {
|
||||||
if len(words) >= 3 {
|
wordPattern := `\b` + stopWord + `\b`
|
||||||
for _, stopWord := range stopWords {
|
re := regexp.MustCompile(wordPattern)
|
||||||
wordPattern := `\b` + stopWord + `\b`
|
query = re.ReplaceAllString(query, "")
|
||||||
re := regexp.MustCompile(wordPattern)
|
|
||||||
query = re.ReplaceAllString(query, "")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
query = strings.TrimSpace(query)
|
query = strings.TrimSpace(query)
|
||||||
if len(query) < 5 {
|
if len(query) < 5 {
|
||||||
@@ -600,7 +246,7 @@ func (r *RAG) extractImportantPhrases(query string) string {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if isImportant || len(word) >= 3 {
|
if isImportant || len(word) > 3 {
|
||||||
important = append(important, word)
|
important = append(important, word)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -619,36 +265,6 @@ func (r *RAG) GenerateQueryVariations(query string) []string {
|
|||||||
if len(parts) == 0 {
|
if len(parts) == 0 {
|
||||||
return variations
|
return variations
|
||||||
}
|
}
|
||||||
// Get loaded filenames to filter out filename terms
|
|
||||||
filenames, err := r.storage.ListFiles()
|
|
||||||
if err == nil && len(filenames) > 0 {
|
|
||||||
// Convert to lowercase for case-insensitive matching
|
|
||||||
lowerFilenames := make([]string, len(filenames))
|
|
||||||
for i, f := range filenames {
|
|
||||||
lowerFilenames[i] = strings.ToLower(f)
|
|
||||||
}
|
|
||||||
filteredParts := make([]string, 0, len(parts))
|
|
||||||
for _, part := range parts {
|
|
||||||
partLower := strings.ToLower(part)
|
|
||||||
skip := false
|
|
||||||
for _, fn := range lowerFilenames {
|
|
||||||
if strings.Contains(fn, partLower) || strings.Contains(partLower, fn) {
|
|
||||||
skip = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !skip {
|
|
||||||
filteredParts = append(filteredParts, part)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If filteredParts not empty and different from original, add filtered query
|
|
||||||
if len(filteredParts) > 0 && len(filteredParts) != len(parts) {
|
|
||||||
filteredQuery := strings.Join(filteredParts, " ")
|
|
||||||
if len(filteredQuery) >= 5 {
|
|
||||||
variations = append(variations, filteredQuery)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(parts) >= 2 {
|
if len(parts) >= 2 {
|
||||||
trimmed := strings.Join(parts[:len(parts)-1], " ")
|
trimmed := strings.Join(parts[:len(parts)-1], " ")
|
||||||
if len(trimmed) >= 5 {
|
if len(trimmed) >= 5 {
|
||||||
@@ -712,14 +328,9 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
|
|||||||
})
|
})
|
||||||
unique := make([]models.VectorRow, 0)
|
unique := make([]models.VectorRow, 0)
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
fileCounts := make(map[string]int)
|
|
||||||
for i := range scored {
|
for i := range scored {
|
||||||
if !seen[scored[i].row.Slug] {
|
if !seen[scored[i].row.Slug] {
|
||||||
if fileCounts[scored[i].row.FileName] >= 2 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seen[scored[i].row.Slug] = true
|
seen[scored[i].row.Slug] = true
|
||||||
fileCounts[scored[i].row.FileName]++
|
|
||||||
unique = append(unique, scored[i].row)
|
unique = append(unique, scored[i].row)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -730,9 +341,6 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *RAG) SynthesizeAnswer(results []models.VectorRow, query string) (string, error) {
|
func (r *RAG) SynthesizeAnswer(results []models.VectorRow, query string) (string, error) {
|
||||||
r.mu.RLock()
|
|
||||||
defer r.mu.RUnlock()
|
|
||||||
r.resetIdleTimer()
|
|
||||||
if len(results) == 0 {
|
if len(results) == 0 {
|
||||||
return "No relevant information found in the vector database.", nil
|
return "No relevant information found in the vector database.", nil
|
||||||
}
|
}
|
||||||
@@ -761,7 +369,7 @@ func (r *RAG) SynthesizeAnswer(results []models.VectorRow, query string) (string
|
|||||||
Embedding: emb,
|
Embedding: emb,
|
||||||
Index: 0,
|
Index: 0,
|
||||||
}
|
}
|
||||||
topResults, err := r.searchEmb(embResp, 1)
|
topResults, err := r.SearchEmb(embResp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.logger.Error("failed to search for synthesis context", "error", err)
|
r.logger.Error("failed to search for synthesis context", "error", err)
|
||||||
return "", err
|
return "", err
|
||||||
@@ -788,14 +396,9 @@ func truncateString(s string, maxLen int) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
|
func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
|
||||||
r.mu.RLock()
|
|
||||||
defer r.mu.RUnlock()
|
|
||||||
r.resetIdleTimer()
|
|
||||||
refined := r.RefineQuery(query)
|
refined := r.RefineQuery(query)
|
||||||
variations := r.GenerateQueryVariations(refined)
|
variations := r.GenerateQueryVariations(refined)
|
||||||
|
allResults := make([]models.VectorRow, 0)
|
||||||
// Collect embedding search results from all variations
|
|
||||||
var embResults []models.VectorRow
|
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
for _, q := range variations {
|
for _, q := range variations {
|
||||||
emb, err := r.LineToVector(q)
|
emb, err := r.LineToVector(q)
|
||||||
@@ -803,78 +406,29 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
|
|||||||
r.logger.Error("failed to embed query variation", "error", err, "query", q)
|
r.logger.Error("failed to embed query variation", "error", err, "query", q)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
embResp := &models.EmbeddingResp{
|
embResp := &models.EmbeddingResp{
|
||||||
Embedding: emb,
|
Embedding: emb,
|
||||||
Index: 0,
|
Index: 0,
|
||||||
}
|
}
|
||||||
results, err := r.searchEmb(embResp, limit*2) // Get more candidates
|
|
||||||
|
results, err := r.SearchEmb(embResp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.logger.Error("failed to search embeddings", "error", err, "query", q)
|
r.logger.Error("failed to search embeddings", "error", err, "query", q)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, row := range results {
|
for _, row := range results {
|
||||||
if !seen[row.Slug] {
|
if !seen[row.Slug] {
|
||||||
seen[row.Slug] = true
|
seen[row.Slug] = true
|
||||||
embResults = append(embResults, row)
|
allResults = append(allResults, row)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Sort embedding results by distance (lower is better)
|
reranked := r.RerankResults(allResults, query)
|
||||||
sort.Slice(embResults, func(i, j int) bool {
|
if len(reranked) > limit {
|
||||||
return embResults[i].Distance < embResults[j].Distance
|
reranked = reranked[:limit]
|
||||||
})
|
|
||||||
|
|
||||||
// Perform keyword search
|
|
||||||
kwResults, err := r.searchKeyword(refined, limit*2)
|
|
||||||
if err != nil {
|
|
||||||
r.logger.Warn("keyword search failed, using only embeddings", "error", err)
|
|
||||||
kwResults = nil
|
|
||||||
}
|
}
|
||||||
// Sort keyword results by distance (already sorted by BM25 score)
|
|
||||||
// kwResults already sorted by distance (lower is better)
|
|
||||||
|
|
||||||
// Combine using Reciprocal Rank Fusion (RRF)
|
|
||||||
const rrfK = 60
|
|
||||||
type scoredRow struct {
|
|
||||||
row models.VectorRow
|
|
||||||
score float64
|
|
||||||
}
|
|
||||||
scoreMap := make(map[string]float64)
|
|
||||||
// Add embedding results
|
|
||||||
for rank, row := range embResults {
|
|
||||||
score := 1.0 / (float64(rank) + rrfK)
|
|
||||||
scoreMap[row.Slug] += score
|
|
||||||
}
|
|
||||||
// Add keyword results
|
|
||||||
for rank, row := range kwResults {
|
|
||||||
score := 1.0 / (float64(rank) + rrfK)
|
|
||||||
scoreMap[row.Slug] += score
|
|
||||||
// Ensure row exists in combined results
|
|
||||||
if _, exists := seen[row.Slug]; !exists {
|
|
||||||
embResults = append(embResults, row)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Create slice of scored rows
|
|
||||||
scoredRows := make([]scoredRow, 0, len(embResults))
|
|
||||||
for _, row := range embResults {
|
|
||||||
score := scoreMap[row.Slug]
|
|
||||||
scoredRows = append(scoredRows, scoredRow{row: row, score: score})
|
|
||||||
}
|
|
||||||
// Sort by descending RRF score
|
|
||||||
sort.Slice(scoredRows, func(i, j int) bool {
|
|
||||||
return scoredRows[i].score > scoredRows[j].score
|
|
||||||
})
|
|
||||||
// Take top limit
|
|
||||||
if len(scoredRows) > limit {
|
|
||||||
scoredRows = scoredRows[:limit]
|
|
||||||
}
|
|
||||||
// Convert back to VectorRow
|
|
||||||
finalResults := make([]models.VectorRow, len(scoredRows))
|
|
||||||
for i, sr := range scoredRows {
|
|
||||||
finalResults[i] = sr.row
|
|
||||||
}
|
|
||||||
// Apply reranking heuristics
|
|
||||||
reranked := r.RerankResults(finalResults, query)
|
|
||||||
return reranked, nil
|
return reranked, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -883,58 +437,16 @@ var (
|
|||||||
ragOnce sync.Once
|
ragOnce sync.Once
|
||||||
)
|
)
|
||||||
|
|
||||||
func (r *RAG) FallbackMessage() string {
|
|
||||||
return r.fallbackMsg
|
|
||||||
}
|
|
||||||
|
|
||||||
func Init(c *config.Config, l *slog.Logger, s storage.FullRepo) error {
|
func Init(c *config.Config, l *slog.Logger, s storage.FullRepo) error {
|
||||||
var err error
|
|
||||||
ragOnce.Do(func() {
|
ragOnce.Do(func() {
|
||||||
if c == nil || l == nil || s == nil {
|
if c == nil || l == nil || s == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ragInstance, err = New(l, s, c)
|
ragInstance = New(l, s, c)
|
||||||
})
|
})
|
||||||
return err
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetInstance() *RAG {
|
func GetInstance() *RAG {
|
||||||
return ragInstance
|
return ragInstance
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RAG) resetIdleTimer() {
|
|
||||||
r.idleMu.Lock()
|
|
||||||
defer r.idleMu.Unlock()
|
|
||||||
if r.idleTimer != nil {
|
|
||||||
r.idleTimer.Stop()
|
|
||||||
}
|
|
||||||
r.idleTimer = time.AfterFunc(r.idleTimeout, func() {
|
|
||||||
r.freeONNXMemory()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *RAG) freeONNXMemory() {
|
|
||||||
r.mu.Lock()
|
|
||||||
defer r.mu.Unlock()
|
|
||||||
if onnx, ok := r.embedder.(*ONNXEmbedder); ok {
|
|
||||||
if err := onnx.Destroy(); err != nil {
|
|
||||||
r.logger.Error("failed to free ONNX memory", "error", err)
|
|
||||||
} else {
|
|
||||||
r.logger.Info("freed ONNX VRAM after idle timeout")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *RAG) Destroy() {
|
|
||||||
r.mu.Lock()
|
|
||||||
defer r.mu.Unlock()
|
|
||||||
if r.idleTimer != nil {
|
|
||||||
r.idleTimer.Stop()
|
|
||||||
r.idleTimer = nil
|
|
||||||
}
|
|
||||||
if onnx, ok := r.embedder.(*ONNXEmbedder); ok {
|
|
||||||
if err := onnx.Destroy(); err != nil {
|
|
||||||
r.logger.Error("failed to destroy ONNX embedder", "error", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
232
rag/storage.go
232
rag/storage.go
@@ -1,7 +1,6 @@
|
|||||||
package rag
|
package rag
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"database/sql"
|
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"gf-lt/models"
|
"gf-lt/models"
|
||||||
@@ -63,17 +62,6 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
embeddingSize := len(row.Embeddings)
|
|
||||||
// Start transaction
|
|
||||||
tx, err := vs.sqlxDB.Beginx()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err != nil {
|
|
||||||
_ = tx.Rollback()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Serialize the embeddings to binary
|
// Serialize the embeddings to binary
|
||||||
serializedEmbeddings := SerializeVector(row.Embeddings)
|
serializedEmbeddings := SerializeVector(row.Embeddings)
|
||||||
@@ -81,102 +69,10 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
|
|||||||
"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES (?, ?, ?, ?)",
|
"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES (?, ?, ?, ?)",
|
||||||
tableName,
|
tableName,
|
||||||
)
|
)
|
||||||
if _, err := tx.Exec(query, serializedEmbeddings, row.Slug, row.RawText, row.FileName); err != nil {
|
if _, err := vs.sqlxDB.Exec(query, serializedEmbeddings, row.Slug, row.RawText, row.FileName); err != nil {
|
||||||
vs.logger.Error("failed to write vector", "error", err, "slug", row.Slug)
|
vs.logger.Error("failed to write vector", "error", err, "slug", row.Slug)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Insert into FTS table
|
|
||||||
ftsQuery := `INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES (?, ?, ?, ?)`
|
|
||||||
if _, err := tx.Exec(ftsQuery, row.Slug, row.RawText, row.FileName, embeddingSize); err != nil {
|
|
||||||
vs.logger.Error("failed to write to FTS table", "error", err, "slug", row.Slug)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = tx.Commit()
|
|
||||||
if err != nil {
|
|
||||||
vs.logger.Error("failed to commit transaction", "error", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// WriteVectors stores multiple embedding vectors in a single transaction
|
|
||||||
func (vs *VectorStorage) WriteVectors(rows []*models.VectorRow) error {
|
|
||||||
if len(rows) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// SQLite has limit of 999 parameters per statement, each row uses 4 parameters
|
|
||||||
const maxBatchSize = 200 // 200 * 4 = 800 < 999
|
|
||||||
if len(rows) > maxBatchSize {
|
|
||||||
// Process in chunks
|
|
||||||
for i := 0; i < len(rows); i += maxBatchSize {
|
|
||||||
end := i + maxBatchSize
|
|
||||||
if end > len(rows) {
|
|
||||||
end = len(rows)
|
|
||||||
}
|
|
||||||
if err := vs.WriteVectors(rows[i:end]); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// All rows should have same embedding size (same model)
|
|
||||||
firstSize := len(rows[0].Embeddings)
|
|
||||||
for i, row := range rows {
|
|
||||||
if len(row.Embeddings) != firstSize {
|
|
||||||
return fmt.Errorf("embedding size mismatch: row %d has size %d, expected %d", i, len(row.Embeddings), firstSize)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tableName, err := vs.getTableName(rows[0].Embeddings)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// Start transaction
|
|
||||||
tx, err := vs.sqlxDB.Beginx()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err != nil {
|
|
||||||
_ = tx.Rollback()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Build batch insert for embeddings table
|
|
||||||
embeddingPlaceholders := make([]string, 0, len(rows))
|
|
||||||
embeddingArgs := make([]any, 0, len(rows)*4)
|
|
||||||
for _, row := range rows {
|
|
||||||
embeddingPlaceholders = append(embeddingPlaceholders, "(?, ?, ?, ?)")
|
|
||||||
embeddingArgs = append(embeddingArgs, SerializeVector(row.Embeddings), row.Slug, row.RawText, row.FileName)
|
|
||||||
}
|
|
||||||
embeddingQuery := fmt.Sprintf(
|
|
||||||
"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES %s",
|
|
||||||
tableName,
|
|
||||||
strings.Join(embeddingPlaceholders, ", "),
|
|
||||||
)
|
|
||||||
if _, err := tx.Exec(embeddingQuery, embeddingArgs...); err != nil {
|
|
||||||
vs.logger.Error("failed to write vectors batch", "error", err, "batch_size", len(rows))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// Build batch insert for FTS table
|
|
||||||
ftsPlaceholders := make([]string, 0, len(rows))
|
|
||||||
ftsArgs := make([]any, 0, len(rows)*4)
|
|
||||||
embeddingSize := len(rows[0].Embeddings)
|
|
||||||
for _, row := range rows {
|
|
||||||
ftsPlaceholders = append(ftsPlaceholders, "(?, ?, ?, ?)")
|
|
||||||
ftsArgs = append(ftsArgs, row.Slug, row.RawText, row.FileName, embeddingSize)
|
|
||||||
}
|
|
||||||
ftsQuery := "INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES " +
|
|
||||||
strings.Join(ftsPlaceholders, ", ")
|
|
||||||
if _, err := tx.Exec(ftsQuery, ftsArgs...); err != nil {
|
|
||||||
vs.logger.Error("failed to write FTS batch", "error", err, "batch_size", len(rows))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = tx.Commit()
|
|
||||||
if err != nil {
|
|
||||||
vs.logger.Error("failed to commit transaction", "error", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
vs.logger.Debug("wrote vectors batch", "batch_size", len(rows))
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -202,25 +98,30 @@ func (vs *VectorStorage) getTableName(emb []float32) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SearchClosest finds vectors closest to the query vector using efficient cosine similarity calculation
|
// SearchClosest finds vectors closest to the query vector using efficient cosine similarity calculation
|
||||||
func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.VectorRow, error) {
|
func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, error) {
|
||||||
if limit <= 0 {
|
|
||||||
limit = 10
|
|
||||||
}
|
|
||||||
tableName, err := vs.getTableName(query)
|
tableName, err := vs.getTableName(query)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For better performance, instead of loading all vectors at once,
|
||||||
|
// we'll implement batching and potentially add L2 distance-based pre-filtering
|
||||||
|
// since cosine similarity is related to L2 distance for normalized vectors
|
||||||
|
|
||||||
querySQL := "SELECT embeddings, slug, raw_text, filename FROM " + tableName
|
querySQL := "SELECT embeddings, slug, raw_text, filename FROM " + tableName
|
||||||
rows, err := vs.sqlxDB.Query(querySQL)
|
rows, err := vs.sqlxDB.Query(querySQL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer rows.Close()
|
defer rows.Close()
|
||||||
|
|
||||||
|
// Use a min-heap or simple slice to keep track of top 3 closest vectors
|
||||||
type SearchResult struct {
|
type SearchResult struct {
|
||||||
vector models.VectorRow
|
vector models.VectorRow
|
||||||
distance float32
|
distance float32
|
||||||
}
|
}
|
||||||
var topResults []SearchResult
|
var topResults []SearchResult
|
||||||
|
// Process vectors one by one to avoid loading everything into memory
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var (
|
var (
|
||||||
embeddingsBlob []byte
|
embeddingsBlob []byte
|
||||||
@@ -231,9 +132,12 @@ func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.Vec
|
|||||||
vs.logger.Error("failed to scan row", "error", err)
|
vs.logger.Error("failed to scan row", "error", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
storedEmbeddings := DeserializeVector(embeddingsBlob)
|
storedEmbeddings := DeserializeVector(embeddingsBlob)
|
||||||
|
|
||||||
|
// Calculate cosine similarity (returns value between -1 and 1, where 1 is most similar)
|
||||||
similarity := cosineSimilarity(query, storedEmbeddings)
|
similarity := cosineSimilarity(query, storedEmbeddings)
|
||||||
distance := 1 - similarity
|
distance := 1 - similarity // Convert to distance where 0 is most similar
|
||||||
|
|
||||||
result := SearchResult{
|
result := SearchResult{
|
||||||
vector: models.VectorRow{
|
vector: models.VectorRow{
|
||||||
@@ -245,14 +149,20 @@ func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.Vec
|
|||||||
distance: distance,
|
distance: distance,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add to top results and maintain only top 3
|
||||||
topResults = append(topResults, result)
|
topResults = append(topResults, result)
|
||||||
|
|
||||||
|
// Sort and keep only top 3
|
||||||
sort.Slice(topResults, func(i, j int) bool {
|
sort.Slice(topResults, func(i, j int) bool {
|
||||||
return topResults[i].distance < topResults[j].distance
|
return topResults[i].distance < topResults[j].distance
|
||||||
})
|
})
|
||||||
if len(topResults) > limit {
|
|
||||||
topResults = topResults[:limit]
|
if len(topResults) > 3 {
|
||||||
|
topResults = topResults[:3] // Keep only closest 3
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert back to VectorRow slice
|
||||||
results := make([]models.VectorRow, 0, len(topResults))
|
results := make([]models.VectorRow, 0, len(topResults))
|
||||||
for _, result := range topResults {
|
for _, result := range topResults {
|
||||||
result.vector.Distance = result.distance
|
result.vector.Distance = result.distance
|
||||||
@@ -261,100 +171,6 @@ func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.Vec
|
|||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetVectorBySlug retrieves a vector row by its slug
|
|
||||||
func (vs *VectorStorage) GetVectorBySlug(slug string) (*models.VectorRow, error) {
|
|
||||||
embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
|
|
||||||
for _, size := range embeddingSizes {
|
|
||||||
table := fmt.Sprintf("embeddings_%d", size)
|
|
||||||
query := fmt.Sprintf("SELECT embeddings, slug, raw_text, filename FROM %s WHERE slug = ?", table)
|
|
||||||
row := vs.sqlxDB.QueryRow(query, slug)
|
|
||||||
var (
|
|
||||||
embeddingsBlob []byte
|
|
||||||
retrievedSlug, rawText, fileName string
|
|
||||||
)
|
|
||||||
if err := row.Scan(&embeddingsBlob, &retrievedSlug, &rawText, &fileName); err != nil {
|
|
||||||
// No row in this table, continue to next size
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
storedEmbeddings := DeserializeVector(embeddingsBlob)
|
|
||||||
return &models.VectorRow{
|
|
||||||
Embeddings: storedEmbeddings,
|
|
||||||
Slug: retrievedSlug,
|
|
||||||
RawText: rawText,
|
|
||||||
FileName: fileName,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
return nil, fmt.Errorf("vector with slug %s not found", slug)
|
|
||||||
}
|
|
||||||
|
|
||||||
// SearchKeyword performs full-text search using FTS5
|
|
||||||
func (vs *VectorStorage) SearchKeyword(query string, limit int) ([]models.VectorRow, error) {
|
|
||||||
// Use FTS5 bm25 ranking. bm25 returns negative values where more negative is better.
|
|
||||||
// We'll order by bm25 (ascending) and limit.
|
|
||||||
ftsQuery := `SELECT slug, raw_text, filename, bm25(fts_embeddings) as score
|
|
||||||
FROM fts_embeddings
|
|
||||||
WHERE fts_embeddings MATCH ?
|
|
||||||
ORDER BY score
|
|
||||||
LIMIT ?`
|
|
||||||
|
|
||||||
// Try original query first
|
|
||||||
rows, err := vs.sqlxDB.Query(ftsQuery, query, limit)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("FTS search failed: %w", err)
|
|
||||||
}
|
|
||||||
results, err := vs.scanRows(rows)
|
|
||||||
rows.Close()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no results and query contains multiple terms, try OR fallback
|
|
||||||
if len(results) == 0 && strings.Contains(query, " ") && !strings.Contains(strings.ToUpper(query), " OR ") {
|
|
||||||
// Build OR query: term1 OR term2 OR term3
|
|
||||||
terms := strings.Fields(query)
|
|
||||||
if len(terms) > 1 {
|
|
||||||
orQuery := strings.Join(terms, " OR ")
|
|
||||||
rows, err := vs.sqlxDB.Query(ftsQuery, orQuery, limit)
|
|
||||||
if err != nil {
|
|
||||||
// Return original empty results rather than error
|
|
||||||
return results, nil
|
|
||||||
}
|
|
||||||
orResults, err := vs.scanRows(rows)
|
|
||||||
rows.Close()
|
|
||||||
if err == nil {
|
|
||||||
results = orResults
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// scanRows converts SQL rows to VectorRow slice
|
|
||||||
func (vs *VectorStorage) scanRows(rows *sql.Rows) ([]models.VectorRow, error) {
|
|
||||||
var results []models.VectorRow
|
|
||||||
for rows.Next() {
|
|
||||||
var slug, rawText, fileName string
|
|
||||||
var score float64
|
|
||||||
if err := rows.Scan(&slug, &rawText, &fileName, &score); err != nil {
|
|
||||||
vs.logger.Error("failed to scan FTS row", "error", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Convert BM25 score to distance-like metric (lower is better)
|
|
||||||
// BM25 is negative, more negative is better. We'll normalize to positive distance.
|
|
||||||
distance := float32(-score) // Make positive (since score is negative)
|
|
||||||
if distance < 0 {
|
|
||||||
distance = 0
|
|
||||||
}
|
|
||||||
results = append(results, models.VectorRow{
|
|
||||||
Slug: slug,
|
|
||||||
RawText: rawText,
|
|
||||||
FileName: fileName,
|
|
||||||
Distance: distance,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return results, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// ListFiles returns a list of all loaded files
|
// ListFiles returns a list of all loaded files
|
||||||
func (vs *VectorStorage) ListFiles() ([]string, error) {
|
func (vs *VectorStorage) ListFiles() ([]string, error) {
|
||||||
fileLists := make([][]string, 0)
|
fileLists := make([][]string, 0)
|
||||||
@@ -399,10 +215,6 @@ func (vs *VectorStorage) ListFiles() ([]string, error) {
|
|||||||
// RemoveEmbByFileName removes all embeddings associated with a specific filename
|
// RemoveEmbByFileName removes all embeddings associated with a specific filename
|
||||||
func (vs *VectorStorage) RemoveEmbByFileName(filename string) error {
|
func (vs *VectorStorage) RemoveEmbByFileName(filename string) error {
|
||||||
var errors []string
|
var errors []string
|
||||||
// Delete from FTS table first
|
|
||||||
if _, err := vs.sqlxDB.Exec("DELETE FROM fts_embeddings WHERE filename = ?", filename); err != nil {
|
|
||||||
errors = append(errors, err.Error())
|
|
||||||
}
|
|
||||||
embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
|
embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
|
||||||
for _, size := range embeddingSizes {
|
for _, size := range embeddingSizes {
|
||||||
table := fmt.Sprintf("embeddings_%d", size)
|
table := fmt.Sprintf("embeddings_%d", size)
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ func historyToSJSON(msgs []models.RoleMsg) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func exportChat() error {
|
func exportChat() error {
|
||||||
data, err := json.MarshalIndent(chatBody.GetMessages(), "", " ")
|
data, err := json.MarshalIndent(chatBody.Messages, "", " ")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -54,7 +54,7 @@ func importChat(filename string) error {
|
|||||||
if _, ok := chatMap[activeChatName]; !ok {
|
if _, ok := chatMap[activeChatName]; !ok {
|
||||||
addNewChat(activeChatName)
|
addNewChat(activeChatName)
|
||||||
}
|
}
|
||||||
chatBody.SetMessages(messages)
|
chatBody.Messages = messages
|
||||||
cfg.AssistantRole = messages[1].Role
|
cfg.AssistantRole = messages[1].Role
|
||||||
if cfg.AssistantRole == cfg.UserRole {
|
if cfg.AssistantRole == cfg.UserRole {
|
||||||
cfg.AssistantRole = messages[2].Role
|
cfg.AssistantRole = messages[2].Role
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
-- Drop FTS5 virtual table
|
|
||||||
DROP TABLE IF EXISTS fts_embeddings;
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
-- Create FTS5 virtual table for full-text search
|
|
||||||
CREATE VIRTUAL TABLE IF NOT EXISTS fts_embeddings USING fts5(
|
|
||||||
slug UNINDEXED,
|
|
||||||
raw_text,
|
|
||||||
filename UNINDEXED,
|
|
||||||
embedding_size UNINDEXED,
|
|
||||||
tokenize='porter unicode61' -- Use porter stemmer and unicode61 tokenizer
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Create triggers to maintain FTS table when embeddings are inserted/deleted
|
|
||||||
-- Note: We'll handle inserts/deletes programmatically for simplicity
|
|
||||||
-- but triggers could be added here if needed.
|
|
||||||
|
|
||||||
-- Indexes for performance (FTS5 manages its own indexes)
|
|
||||||
-- No additional indexes needed for FTS5 virtual table.
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
-- Clear FTS table (optional)
|
|
||||||
DELETE FROM fts_embeddings;
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
-- Populate FTS table with existing embeddings
|
|
||||||
DELETE FROM fts_embeddings;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 384 FROM embeddings_384;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 768 FROM embeddings_768;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 1024 FROM embeddings_1024;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 1536 FROM embeddings_1536;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 2048 FROM embeddings_2048;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 3072 FROM embeddings_3072;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 4096 FROM embeddings_4096;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 5120 FROM embeddings_5120;
|
|
||||||
@@ -102,22 +102,6 @@ func NewProviderSQL(dbPath string, logger *slog.Logger) FullRepo {
|
|||||||
logger.Error("failed to open db connection", "error", err)
|
logger.Error("failed to open db connection", "error", err)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Enable WAL mode for better concurrency and performance
|
|
||||||
if _, err := db.Exec("PRAGMA journal_mode = WAL;"); err != nil {
|
|
||||||
logger.Warn("failed to enable WAL mode", "error", err)
|
|
||||||
}
|
|
||||||
if _, err := db.Exec("PRAGMA synchronous = NORMAL;"); err != nil {
|
|
||||||
logger.Warn("failed to set synchronous mode", "error", err)
|
|
||||||
}
|
|
||||||
// Increase cache size for better performance
|
|
||||||
if _, err := db.Exec("PRAGMA cache_size = -2000;"); err != nil {
|
|
||||||
logger.Warn("failed to set cache size", "error", err)
|
|
||||||
}
|
|
||||||
// Log actual journal mode for debugging
|
|
||||||
var journalMode string
|
|
||||||
if err := db.QueryRow("PRAGMA journal_mode;").Scan(&journalMode); err == nil {
|
|
||||||
logger.Debug("SQLite journal mode", "mode", journalMode)
|
|
||||||
}
|
|
||||||
p := ProviderSQL{db: db, logger: logger}
|
p := ProviderSQL{db: db, logger: logger}
|
||||||
if err := p.Migrate(); err != nil {
|
if err := p.Migrate(); err != nil {
|
||||||
logger.Error("migration failed, app cannot start", "error", err)
|
logger.Error("migration failed, app cannot start", "error", err)
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"gf-lt/models"
|
"gf-lt/models"
|
||||||
"sort"
|
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
@@ -12,7 +11,7 @@ import (
|
|||||||
|
|
||||||
type VectorRepo interface {
|
type VectorRepo interface {
|
||||||
WriteVector(*models.VectorRow) error
|
WriteVector(*models.VectorRow) error
|
||||||
SearchClosest(q []float32, limit int) ([]models.VectorRow, error)
|
SearchClosest(q []float32) ([]models.VectorRow, error)
|
||||||
ListFiles() ([]string, error)
|
ListFiles() ([]string, error)
|
||||||
RemoveEmbByFileName(filename string) error
|
RemoveEmbByFileName(filename string) error
|
||||||
DB() *sqlx.DB
|
DB() *sqlx.DB
|
||||||
@@ -80,7 +79,7 @@ func (p ProviderSQL) WriteVector(row *models.VectorRow) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p ProviderSQL) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
|
func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
|
||||||
tableName, err := fetchTableName(q)
|
tableName, err := fetchTableName(q)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -95,7 +94,7 @@ func (p ProviderSQL) SearchClosest(q []float32, limit int) ([]models.VectorRow,
|
|||||||
vector models.VectorRow
|
vector models.VectorRow
|
||||||
distance float32
|
distance float32
|
||||||
}
|
}
|
||||||
var allResults []SearchResult
|
var topResults []SearchResult
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var (
|
var (
|
||||||
embeddingsBlob []byte
|
embeddingsBlob []byte
|
||||||
@@ -120,19 +119,28 @@ func (p ProviderSQL) SearchClosest(q []float32, limit int) ([]models.VectorRow,
|
|||||||
},
|
},
|
||||||
distance: distance,
|
distance: distance,
|
||||||
}
|
}
|
||||||
allResults = append(allResults, result)
|
|
||||||
}
|
// Add to top results and maintain only top results
|
||||||
// Sort by distance
|
topResults = append(topResults, result)
|
||||||
sort.Slice(allResults, func(i, j int) bool {
|
|
||||||
return allResults[i].distance < allResults[j].distance
|
// Sort and keep only top results
|
||||||
})
|
// We'll keep the top 3 closest vectors
|
||||||
// Truncate to limit
|
if len(topResults) > 3 {
|
||||||
if len(allResults) > limit {
|
// Simple sort and truncate to maintain only 3 best matches
|
||||||
allResults = allResults[:limit]
|
for i := 0; i < len(topResults); i++ {
|
||||||
|
for j := i + 1; j < len(topResults); j++ {
|
||||||
|
if topResults[i].distance > topResults[j].distance {
|
||||||
|
topResults[i], topResults[j] = topResults[j], topResults[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
topResults = topResults[:3]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert back to VectorRow slice
|
// Convert back to VectorRow slice
|
||||||
results := make([]models.VectorRow, len(allResults))
|
results := make([]models.VectorRow, len(topResults))
|
||||||
for i, result := range allResults {
|
for i, result := range topResults {
|
||||||
result.vector.Distance = result.distance
|
result.vector.Distance = result.distance
|
||||||
results[i] = result.vector
|
results[i] = result.vector
|
||||||
}
|
}
|
||||||
|
|||||||
7
sysprompts/cluedo.json
Normal file
7
sysprompts/cluedo.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"sys_prompt": "A game of cluedo. Players are {{user}}, {{char}}, {{char2}};\n\nrooms: hall, lounge, dinning room kitchen, ballroom, conservatory, billiard room, library, study;\nweapons: candlestick, dagger, lead pipe, revolver, rope, spanner;\npeople: miss Scarlett, colonel Mustard, mrs. White, reverend Green, mrs. Peacock, professor Plum;\n\nA murder happened in a mansion with 9 rooms. Victim is dr. Black.\nPlayers goal is to find out who commited a murder, in what room and with what weapon.\nWeapons, people and rooms not involved in murder are distributed between players (as cards) by tool agent.\nThe objective of the game is to deduce the details of the murder. There are six characters, six murder weapons, and nine rooms, leaving the players with 324 possibilities. As soon as a player enters a room, they may make a suggestion as to the details, naming a suspect, the room they are in, and the weapon. For example: \"I suspect Professor Plum, in the Dining Room, with the candlestick\".\nOnce a player makes a suggestion, the others are called upon to disprove it.\nBefore the player's move, tool agent will remind that players their cards. There are two types of moves: making a suggestion (suggestion_move) and disproving other player suggestion (evidence_move);\nIn this version player wins when the correct details are named in the suggestion_move.\n\n<example_game>\n{{user}}:\nlet's start a game of cluedo!\ntool: cards of {{char}} are 'LEAD PIPE', 'BALLROOM', 'CONSERVATORY', 'STUDY', 'Mrs. White'; suggestion_move;\n{{char}}:\n(putting miss Scarlet into the Hall with the Revolver) \"I suspect miss Scarlett, in the Hall, with the revolver.\"\ntool: cards of {{char2}} are 'SPANNER', 'DAGGER', 'Professor Plum', 'LIBRARY', 'Mrs. Peacock'; evidence_move;\n{{char2}}:\n\"No objections.\" (no cards matching the suspicion of {{char}})\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; evidence_move;\n{{user}}:\n\"I object. Miss Scarlett is innocent.\" (shows card with 'Miss Scarlett')\ntool: cards of {{char2}} are 'SPANNER', 'DAGGER', 'Professor Plum', 'LIBRARY', 'Mrs. Peacock'; suggestion_move;\n{{char2}}:\n*So it was not Miss Scarlett, good to know.*\n(moves Mrs. White to the Billiard Room) \"It might have been Mrs. White, in the Billiard Room, with the Revolver.\"\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; evidence_move;\n{{user}}:\n(no matching cards for the assumption of {{char2}}) \"Sounds possible to me.\"\ntool: cards of {{char}} are 'LEAD PIPE', 'BALLROOM', 'CONSERVATORY', 'STUDY', 'Mrs. White'; evidence_move;\n{{char}}:\n(shows Mrs. White card) \"No. Was not Mrs. White\"\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; suggestion_move;\n{{user}}:\n*So not Mrs. White...* (moves Reverend Green into the Billiard Room) \"I suspect Reverend Green, in the Billiard Room, with the Revolver.\"\ntool: Correct. It was Reverend Green in the Billiard Room, with the revolver. {{user}} wins.\n</example_game>",
|
||||||
|
"role": "CluedoPlayer",
|
||||||
|
"role2": "CluedoEnjoyer",
|
||||||
|
"filepath": "sysprompts/cluedo.json",
|
||||||
|
"first_msg": "Hey guys! Want to play cluedo?"
|
||||||
|
}
|
||||||
30
tables.go
30
tables.go
@@ -128,8 +128,8 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
|
|||||||
pages.RemovePage(historyPage)
|
pages.RemovePage(historyPage)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
chatBody.SetMessages(history)
|
chatBody.Messages = history
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
activeChatName = selectedChat
|
activeChatName = selectedChat
|
||||||
pages.RemovePage(historyPage)
|
pages.RemovePage(historyPage)
|
||||||
return
|
return
|
||||||
@@ -149,8 +149,8 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
|
|||||||
}
|
}
|
||||||
showToast("chat deleted", selectedChat+" was deleted")
|
showToast("chat deleted", selectedChat+" was deleted")
|
||||||
// load last chat
|
// load last chat
|
||||||
chatBody.SetMessages(loadOldChatOrGetNew())
|
chatBody.Messages = loadOldChatOrGetNew()
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
pages.RemovePage(historyPage)
|
pages.RemovePage(historyPage)
|
||||||
return
|
return
|
||||||
case "update card":
|
case "update card":
|
||||||
@@ -163,24 +163,16 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
|
|||||||
showToast("error", "no such card: "+agentName)
|
showToast("error", "no such card: "+agentName)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if msg0, ok := chatBody.GetMessageAt(0); ok {
|
cc.SysPrompt = chatBody.Messages[0].Content
|
||||||
cc.SysPrompt = msg0.Content
|
cc.FirstMsg = chatBody.Messages[1].Content
|
||||||
}
|
|
||||||
if msg1, ok := chatBody.GetMessageAt(1); ok {
|
|
||||||
cc.FirstMsg = msg1.Content
|
|
||||||
}
|
|
||||||
if err := pngmeta.WriteToPng(cc.ToSpec(cfg.UserRole), cc.FilePath, cc.FilePath); err != nil {
|
if err := pngmeta.WriteToPng(cc.ToSpec(cfg.UserRole), cc.FilePath, cc.FilePath); err != nil {
|
||||||
logger.Error("failed to write charcard", "error", err)
|
logger.Error("failed to write charcard", "error", err)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
case "move sysprompt onto 1st msg":
|
case "move sysprompt onto 1st msg":
|
||||||
chatBody.WithLock(func(cb *models.ChatBody) {
|
chatBody.Messages[1].Content = chatBody.Messages[0].Content + chatBody.Messages[1].Content
|
||||||
if len(cb.Messages) >= 2 {
|
chatBody.Messages[0].Content = rpDefenitionSysMsg
|
||||||
cb.Messages[1].Content = cb.Messages[0].Content + cb.Messages[1].Content
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
cb.Messages[0].Content = rpDefenitionSysMsg
|
|
||||||
}
|
|
||||||
})
|
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
|
||||||
activeChatName = selectedChat
|
activeChatName = selectedChat
|
||||||
pages.RemovePage(historyPage)
|
pages.RemovePage(historyPage)
|
||||||
return
|
return
|
||||||
@@ -571,7 +563,7 @@ func makeAgentTable(agentList []string) *tview.Table {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
// replace textview
|
// replace textview
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
colorText()
|
colorText()
|
||||||
updateStatusLine()
|
updateStatusLine()
|
||||||
// sysModal.ClearButtons()
|
// sysModal.ClearButtons()
|
||||||
@@ -740,7 +732,7 @@ func makeImportChatTable(filenames []string) *tview.Table {
|
|||||||
colorText()
|
colorText()
|
||||||
updateStatusLine()
|
updateStatusLine()
|
||||||
// redraw the text in text area
|
// redraw the text in text area
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
pages.RemovePage(historyPage)
|
pages.RemovePage(historyPage)
|
||||||
app.SetFocus(textArea)
|
app.SetFocus(textArea)
|
||||||
return
|
return
|
||||||
|
|||||||
26
tools.go
26
tools.go
@@ -278,25 +278,13 @@ func updateToolCapabilities() {
|
|||||||
// getWebAgentClient returns a singleton AgentClient for web agents.
|
// getWebAgentClient returns a singleton AgentClient for web agents.
|
||||||
func getWebAgentClient() *agent.AgentClient {
|
func getWebAgentClient() *agent.AgentClient {
|
||||||
webAgentClientOnce.Do(func() {
|
webAgentClientOnce.Do(func() {
|
||||||
if cfg == nil {
|
|
||||||
if logger != nil {
|
|
||||||
logger.Warn("web agent client unavailable: config not initialized")
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if logger == nil {
|
|
||||||
if logger != nil {
|
|
||||||
logger.Warn("web agent client unavailable: logger not initialized")
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
getToken := func() string {
|
getToken := func() string {
|
||||||
if chunkParser == nil {
|
if chunkParser == nil {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
return chunkParser.GetToken()
|
return chunkParser.GetToken()
|
||||||
}
|
}
|
||||||
webAgentClient = agent.NewAgentClient(cfg, *logger, getToken)
|
webAgentClient = agent.NewAgentClient(cfg, logger, getToken)
|
||||||
})
|
})
|
||||||
return webAgentClient
|
return webAgentClient
|
||||||
}
|
}
|
||||||
@@ -306,13 +294,13 @@ func registerWebAgents() {
|
|||||||
webAgentsOnce.Do(func() {
|
webAgentsOnce.Do(func() {
|
||||||
client := getWebAgentClient()
|
client := getWebAgentClient()
|
||||||
// Register rag_search agent
|
// Register rag_search agent
|
||||||
agent.Register("rag_search", agent.NewWebAgentB(client, ragSearchSysPrompt))
|
agent.RegisterB("rag_search", agent.NewWebAgentB(client, ragSearchSysPrompt))
|
||||||
// Register websearch agent
|
// Register websearch agent
|
||||||
agent.Register("websearch", agent.NewWebAgentB(client, webSearchSysPrompt))
|
agent.RegisterB("websearch", agent.NewWebAgentB(client, webSearchSysPrompt))
|
||||||
// Register read_url agent
|
// Register read_url agent
|
||||||
agent.Register("read_url", agent.NewWebAgentB(client, readURLSysPrompt))
|
agent.RegisterB("read_url", agent.NewWebAgentB(client, readURLSysPrompt))
|
||||||
// Register summarize_chat agent
|
// Register summarize_chat agent
|
||||||
agent.Register("summarize_chat", agent.NewWebAgentB(client, summarySysPrompt))
|
agent.RegisterB("summarize_chat", agent.NewWebAgentB(client, summarySysPrompt))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1215,11 +1203,11 @@ func isCommandAllowed(command string, args ...string) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func summarizeChat(args map[string]string) []byte {
|
func summarizeChat(args map[string]string) []byte {
|
||||||
if chatBody.GetMessageCount() == 0 {
|
if len(chatBody.Messages) == 0 {
|
||||||
return []byte("No chat history to summarize.")
|
return []byte("No chat history to summarize.")
|
||||||
}
|
}
|
||||||
// Format chat history for the agent
|
// Format chat history for the agent
|
||||||
chatText := chatToText(chatBody.GetMessages(), true) // include system and tool messages
|
chatText := chatToText(chatBody.Messages, true) // include system and tool messages
|
||||||
return []byte(chatText)
|
return []byte(chatText)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
174
tui.go
174
tui.go
@@ -29,8 +29,6 @@ var (
|
|||||||
statusLineWidget *tview.TextView
|
statusLineWidget *tview.TextView
|
||||||
helpView *tview.TextView
|
helpView *tview.TextView
|
||||||
flex *tview.Flex
|
flex *tview.Flex
|
||||||
bottomFlex *tview.Flex
|
|
||||||
notificationWidget *tview.TextView
|
|
||||||
imgView *tview.Image
|
imgView *tview.Image
|
||||||
defaultImage = "sysprompts/llama.png"
|
defaultImage = "sysprompts/llama.png"
|
||||||
indexPickWindow *tview.InputField
|
indexPickWindow *tview.InputField
|
||||||
@@ -38,7 +36,6 @@ var (
|
|||||||
roleEditWindow *tview.InputField
|
roleEditWindow *tview.InputField
|
||||||
shellInput *tview.InputField
|
shellInput *tview.InputField
|
||||||
confirmModal *tview.Modal
|
confirmModal *tview.Modal
|
||||||
toastTimer *time.Timer
|
|
||||||
confirmPageName = "confirm"
|
confirmPageName = "confirm"
|
||||||
fullscreenMode bool
|
fullscreenMode bool
|
||||||
positionVisible bool = true
|
positionVisible bool = true
|
||||||
@@ -140,8 +137,8 @@ func setShellMode(enabled bool) {
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
// showToast displays a temporary notification in the bottom-right corner.
|
// showToast displays a temporary message in the top‑right corner.
|
||||||
// It auto-hides after 3 seconds.
|
// It auto‑hides after 3 seconds and disappears when clicked.
|
||||||
func showToast(title, message string) {
|
func showToast(title, message string) {
|
||||||
sanitize := func(s string, maxLen int) string {
|
sanitize := func(s string, maxLen int) string {
|
||||||
sanitized := strings.Map(func(r rune) rune {
|
sanitized := strings.Map(func(r rune) rune {
|
||||||
@@ -157,68 +154,33 @@ func showToast(title, message string) {
|
|||||||
}
|
}
|
||||||
title = sanitize(title, 50)
|
title = sanitize(title, 50)
|
||||||
message = sanitize(message, 197)
|
message = sanitize(message, 197)
|
||||||
if toastTimer != nil {
|
notification := tview.NewTextView().
|
||||||
toastTimer.Stop()
|
SetTextAlign(tview.AlignCenter).
|
||||||
}
|
SetDynamicColors(true).
|
||||||
// show blocking notification to not mess up flex
|
SetRegions(true).
|
||||||
if fullscreenMode {
|
SetText(fmt.Sprintf("[yellow]%s[-]\n", message)).
|
||||||
notification := tview.NewTextView().
|
SetChangedFunc(func() {
|
||||||
SetTextAlign(tview.AlignCenter).
|
app.Draw()
|
||||||
SetDynamicColors(true).
|
|
||||||
SetRegions(true).
|
|
||||||
SetText(fmt.Sprintf("[yellow]%s[-]\n", message)).
|
|
||||||
SetChangedFunc(func() {
|
|
||||||
app.Draw()
|
|
||||||
})
|
|
||||||
notification.SetTitleAlign(tview.AlignLeft).
|
|
||||||
SetBorder(true).
|
|
||||||
SetTitle(title)
|
|
||||||
// Wrap it in a full‑screen Flex to position it in the top‑right corner.
|
|
||||||
// Outer Flex (row) pushes content to the top; inner Flex (column) pushes to the right.
|
|
||||||
background := tview.NewFlex().SetDirection(tview.FlexRow).
|
|
||||||
AddItem(nil, 0, 1, false). // top spacer
|
|
||||||
AddItem(tview.NewFlex().SetDirection(tview.FlexColumn).
|
|
||||||
AddItem(nil, 0, 1, false). // left spacer
|
|
||||||
AddItem(notification, 40, 1, true), // notification width 40
|
|
||||||
5, 1, false) // notification height 5
|
|
||||||
// Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
|
|
||||||
pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
|
|
||||||
pages.AddPage(pageName, background, true, true)
|
|
||||||
// Auto‑dismiss after 2 seconds, since blocking is more annoying
|
|
||||||
time.AfterFunc(2*time.Second, func() {
|
|
||||||
app.QueueUpdateDraw(func() {
|
|
||||||
if pages.HasPage(pageName) {
|
|
||||||
pages.RemovePage(pageName)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
return
|
notification.SetTitleAlign(tview.AlignLeft).
|
||||||
}
|
SetBorder(true).
|
||||||
notificationWidget.SetTitle(title)
|
SetTitle(title)
|
||||||
notificationWidget.SetText(fmt.Sprintf("[yellow]%s[-]", message))
|
// Wrap it in a full‑screen Flex to position it in the top‑right corner.
|
||||||
go func() {
|
// Outer Flex (row) pushes content to the top; inner Flex (column) pushes to the right.
|
||||||
|
background := tview.NewFlex().SetDirection(tview.FlexRow).
|
||||||
|
AddItem(nil, 0, 1, false). // top spacer
|
||||||
|
AddItem(tview.NewFlex().SetDirection(tview.FlexColumn).
|
||||||
|
AddItem(nil, 0, 1, false). // left spacer
|
||||||
|
AddItem(notification, 40, 1, true), // notification width 40
|
||||||
|
5, 1, false) // notification height 5
|
||||||
|
// Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
|
||||||
|
pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
|
||||||
|
pages.AddPage(pageName, background, true, true)
|
||||||
|
// Auto‑dismiss after 3 seconds.
|
||||||
|
time.AfterFunc(3*time.Second, func() {
|
||||||
app.QueueUpdateDraw(func() {
|
app.QueueUpdateDraw(func() {
|
||||||
flex.RemoveItem(bottomFlex)
|
if pages.HasPage(pageName) {
|
||||||
flex.RemoveItem(statusLineWidget)
|
pages.RemovePage(pageName)
|
||||||
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
|
|
||||||
AddItem(textArea, 0, 1, true).
|
|
||||||
AddItem(notificationWidget, 40, 1, false)
|
|
||||||
flex.AddItem(bottomFlex, 0, 10, true)
|
|
||||||
if positionVisible {
|
|
||||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}()
|
|
||||||
toastTimer = time.AfterFunc(3*time.Second, func() {
|
|
||||||
app.QueueUpdateDraw(func() {
|
|
||||||
flex.RemoveItem(bottomFlex)
|
|
||||||
flex.RemoveItem(statusLineWidget)
|
|
||||||
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
|
|
||||||
AddItem(textArea, 0, 1, true).
|
|
||||||
AddItem(notificationWidget, 0, 0, false)
|
|
||||||
flex.AddItem(bottomFlex, 0, 10, true)
|
|
||||||
if positionVisible {
|
|
||||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
@@ -273,7 +235,7 @@ func init() {
|
|||||||
shellHistoryPos = -1
|
shellHistoryPos = -1
|
||||||
}
|
}
|
||||||
// Handle Tab key for @ file completion
|
// Handle Tab key for @ file completion
|
||||||
if event.Key() == tcell.KeyTab && shellMode {
|
if event.Key() == tcell.KeyTab {
|
||||||
currentText := shellInput.GetText()
|
currentText := shellInput.GetText()
|
||||||
atIndex := strings.LastIndex(currentText, "@")
|
atIndex := strings.LastIndex(currentText, "@")
|
||||||
if atIndex >= 0 {
|
if atIndex >= 0 {
|
||||||
@@ -324,26 +286,12 @@ func init() {
|
|||||||
SetDynamicColors(true).
|
SetDynamicColors(true).
|
||||||
SetRegions(true).
|
SetRegions(true).
|
||||||
SetChangedFunc(func() {
|
SetChangedFunc(func() {
|
||||||
// INFO:
|
|
||||||
// https://github.com/rivo/tview/wiki/Concurrency#event-handlers
|
|
||||||
// although already called by default per tview specs
|
|
||||||
// calling it explicitly makes text streaming to look more smooth
|
|
||||||
app.Draw()
|
app.Draw()
|
||||||
})
|
})
|
||||||
notificationWidget = tview.NewTextView().
|
|
||||||
SetTextAlign(tview.AlignCenter).
|
|
||||||
SetDynamicColors(true).
|
|
||||||
SetRegions(true).
|
|
||||||
SetChangedFunc(func() {
|
|
||||||
})
|
|
||||||
notificationWidget.SetBorder(true).SetTitle("notification")
|
|
||||||
bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
|
|
||||||
AddItem(textArea, 0, 1, true).
|
|
||||||
AddItem(notificationWidget, 0, 0, false)
|
|
||||||
//
|
//
|
||||||
flex = tview.NewFlex().SetDirection(tview.FlexRow).
|
flex = tview.NewFlex().SetDirection(tview.FlexRow).
|
||||||
AddItem(textView, 0, 40, false).
|
AddItem(textView, 0, 40, false).
|
||||||
AddItem(bottomFlex, 0, 10, true)
|
AddItem(textArea, 0, 10, true) // Restore original height
|
||||||
if positionVisible {
|
if positionVisible {
|
||||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
flex.AddItem(statusLineWidget, 0, 2, false)
|
||||||
}
|
}
|
||||||
@@ -355,7 +303,7 @@ func init() {
|
|||||||
searchResults = nil // Clear search results
|
searchResults = nil // Clear search results
|
||||||
searchResultLengths = nil // Clear search result lengths
|
searchResultLengths = nil // Clear search result lengths
|
||||||
originalTextForSearch = ""
|
originalTextForSearch = ""
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys)) // Reset text without search regions
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys)) // Reset text without search regions
|
||||||
colorText() // Apply normal chat coloring
|
colorText() // Apply normal chat coloring
|
||||||
} else {
|
} else {
|
||||||
// Original logic if no search is active
|
// Original logic if no search is active
|
||||||
@@ -412,14 +360,10 @@ func init() {
|
|||||||
// y += h / 2
|
// y += h / 2
|
||||||
// return x, y, w, h
|
// return x, y, w, h
|
||||||
// })
|
// })
|
||||||
notificationWidget.SetDrawFunc(func(screen tcell.Screen, x, y, w, h int) (int, int, int, int) {
|
|
||||||
y += h / 2
|
|
||||||
return x, y, w, h
|
|
||||||
})
|
|
||||||
// Initially set up flex without search bar
|
// Initially set up flex without search bar
|
||||||
flex = tview.NewFlex().SetDirection(tview.FlexRow).
|
flex = tview.NewFlex().SetDirection(tview.FlexRow).
|
||||||
AddItem(textView, 0, 40, false).
|
AddItem(textView, 0, 40, false).
|
||||||
AddItem(bottomFlex, 0, 10, true)
|
AddItem(textArea, 0, 10, true) // Restore original height
|
||||||
if positionVisible {
|
if positionVisible {
|
||||||
flex.AddItem(statusLineWidget, 0, 2, false)
|
flex.AddItem(statusLineWidget, 0, 2, false)
|
||||||
}
|
}
|
||||||
@@ -436,11 +380,9 @@ func init() {
|
|||||||
pages.RemovePage(editMsgPage)
|
pages.RemovePage(editMsgPage)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
chatBody.WithLock(func(cb *models.ChatBody) {
|
chatBody.Messages[selectedIndex].SetText(editedMsg)
|
||||||
cb.Messages[selectedIndex].SetText(editedMsg)
|
|
||||||
})
|
|
||||||
// change textarea
|
// change textarea
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
pages.RemovePage(editMsgPage)
|
pages.RemovePage(editMsgPage)
|
||||||
editMode = false
|
editMode = false
|
||||||
return nil
|
return nil
|
||||||
@@ -468,11 +410,9 @@ func init() {
|
|||||||
pages.RemovePage(roleEditPage)
|
pages.RemovePage(roleEditPage)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if selectedIndex >= 0 && selectedIndex < chatBody.GetMessageCount() {
|
if selectedIndex >= 0 && selectedIndex < len(chatBody.Messages) {
|
||||||
chatBody.WithLock(func(cb *models.ChatBody) {
|
chatBody.Messages[selectedIndex].Role = newRole
|
||||||
cb.Messages[selectedIndex].Role = newRole
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
})
|
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
|
||||||
colorText()
|
colorText()
|
||||||
pages.RemovePage(roleEditPage)
|
pages.RemovePage(roleEditPage)
|
||||||
}
|
}
|
||||||
@@ -501,7 +441,7 @@ func init() {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
selectedIndex = siInt
|
selectedIndex = siInt
|
||||||
if chatBody.GetMessageCount()-1 < selectedIndex || selectedIndex < 0 {
|
if len(chatBody.Messages)-1 < selectedIndex || selectedIndex < 0 {
|
||||||
msg := "chosen index is out of bounds, will copy user input"
|
msg := "chosen index is out of bounds, will copy user input"
|
||||||
logger.Warn(msg, "index", selectedIndex)
|
logger.Warn(msg, "index", selectedIndex)
|
||||||
showToast("error", msg)
|
showToast("error", msg)
|
||||||
@@ -511,7 +451,7 @@ func init() {
|
|||||||
hideIndexBar() // Hide overlay instead of removing page directly
|
hideIndexBar() // Hide overlay instead of removing page directly
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
m := chatBody.GetMessages()[selectedIndex]
|
m := chatBody.Messages[selectedIndex]
|
||||||
switch {
|
switch {
|
||||||
case roleEditMode:
|
case roleEditMode:
|
||||||
hideIndexBar() // Hide overlay first
|
hideIndexBar() // Hide overlay first
|
||||||
@@ -578,7 +518,7 @@ func init() {
|
|||||||
searchResults = nil
|
searchResults = nil
|
||||||
searchResultLengths = nil
|
searchResultLengths = nil
|
||||||
originalTextForSearch = ""
|
originalTextForSearch = ""
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
colorText()
|
colorText()
|
||||||
return
|
return
|
||||||
} else {
|
} else {
|
||||||
@@ -636,7 +576,7 @@ func init() {
|
|||||||
//
|
//
|
||||||
textArea.SetMovedFunc(updateStatusLine)
|
textArea.SetMovedFunc(updateStatusLine)
|
||||||
updateStatusLine()
|
updateStatusLine()
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
colorText()
|
colorText()
|
||||||
if scrollToEndEnabled {
|
if scrollToEndEnabled {
|
||||||
textView.ScrollToEnd()
|
textView.ScrollToEnd()
|
||||||
@@ -650,7 +590,7 @@ func init() {
|
|||||||
if event.Key() == tcell.KeyRune && event.Rune() == '5' && event.Modifiers()&tcell.ModAlt != 0 {
|
if event.Key() == tcell.KeyRune && event.Rune() == '5' && event.Modifiers()&tcell.ModAlt != 0 {
|
||||||
// switch cfg.ShowSys
|
// switch cfg.ShowSys
|
||||||
cfg.ShowSys = !cfg.ShowSys
|
cfg.ShowSys = !cfg.ShowSys
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
colorText()
|
colorText()
|
||||||
}
|
}
|
||||||
if event.Key() == tcell.KeyRune && event.Rune() == '3' && event.Modifiers()&tcell.ModAlt != 0 {
|
if event.Key() == tcell.KeyRune && event.Rune() == '3' && event.Modifiers()&tcell.ModAlt != 0 {
|
||||||
@@ -683,7 +623,7 @@ func init() {
|
|||||||
// Handle Alt+T to toggle thinking block visibility
|
// Handle Alt+T to toggle thinking block visibility
|
||||||
if event.Key() == tcell.KeyRune && event.Rune() == 't' && event.Modifiers()&tcell.ModAlt != 0 {
|
if event.Key() == tcell.KeyRune && event.Rune() == 't' && event.Modifiers()&tcell.ModAlt != 0 {
|
||||||
thinkingCollapsed = !thinkingCollapsed
|
thinkingCollapsed = !thinkingCollapsed
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
colorText()
|
colorText()
|
||||||
status := "expanded"
|
status := "expanded"
|
||||||
if thinkingCollapsed {
|
if thinkingCollapsed {
|
||||||
@@ -695,7 +635,7 @@ func init() {
|
|||||||
// Handle Ctrl+T to toggle tool call/response visibility
|
// Handle Ctrl+T to toggle tool call/response visibility
|
||||||
if event.Key() == tcell.KeyCtrlT {
|
if event.Key() == tcell.KeyCtrlT {
|
||||||
toolCollapsed = !toolCollapsed
|
toolCollapsed = !toolCollapsed
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
colorText()
|
colorText()
|
||||||
status := "expanded"
|
status := "expanded"
|
||||||
if toolCollapsed {
|
if toolCollapsed {
|
||||||
@@ -738,14 +678,14 @@ func init() {
|
|||||||
}
|
}
|
||||||
if event.Key() == tcell.KeyF2 && !botRespMode {
|
if event.Key() == tcell.KeyF2 && !botRespMode {
|
||||||
// regen last msg
|
// regen last msg
|
||||||
if chatBody.GetMessageCount() == 0 {
|
if len(chatBody.Messages) == 0 {
|
||||||
showToast("info", "no messages to regenerate")
|
showToast("info", "no messages to regenerate")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
chatBody.TruncateMessages(chatBody.GetMessageCount() - 1)
|
chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1]
|
||||||
// there is no case where user msg is regenerated
|
// there is no case where user msg is regenerated
|
||||||
// lastRole := chatBody.GetMessages()[chatBody.GetMessageCount()-1].Role
|
// lastRole := chatBody.Messages[len(chatBody.Messages)-1].Role
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
// go chatRound("", cfg.UserRole, textView, true, false)
|
// go chatRound("", cfg.UserRole, textView, true, false)
|
||||||
if cfg.TTS_ENABLED {
|
if cfg.TTS_ENABLED {
|
||||||
TTSDoneChan <- true
|
TTSDoneChan <- true
|
||||||
@@ -764,12 +704,12 @@ func init() {
|
|||||||
colorText()
|
colorText()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if chatBody.GetMessageCount() == 0 {
|
if len(chatBody.Messages) == 0 {
|
||||||
showToast("info", "no messages to delete")
|
showToast("info", "no messages to delete")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
chatBody.TruncateMessages(chatBody.GetMessageCount() - 1)
|
chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1]
|
||||||
textView.SetText(chatToText(chatBody.GetMessages(), cfg.ShowSys))
|
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
|
||||||
if cfg.TTS_ENABLED {
|
if cfg.TTS_ENABLED {
|
||||||
TTSDoneChan <- true
|
TTSDoneChan <- true
|
||||||
}
|
}
|
||||||
@@ -817,7 +757,7 @@ func init() {
|
|||||||
if event.Key() == tcell.KeyF7 {
|
if event.Key() == tcell.KeyF7 {
|
||||||
// copy msg to clipboard
|
// copy msg to clipboard
|
||||||
editMode = false
|
editMode = false
|
||||||
m := chatBody.GetMessages()[chatBody.GetMessageCount()-1]
|
m := chatBody.Messages[len(chatBody.Messages)-1]
|
||||||
msgText := m.GetText()
|
msgText := m.GetText()
|
||||||
if err := copyToClipboard(msgText); err != nil {
|
if err := copyToClipboard(msgText); err != nil {
|
||||||
logger.Error("failed to copy to clipboard", "error", err)
|
logger.Error("failed to copy to clipboard", "error", err)
|
||||||
@@ -1001,10 +941,10 @@ func init() {
|
|||||||
TTSDoneChan <- true
|
TTSDoneChan <- true
|
||||||
}
|
}
|
||||||
if event.Key() == tcell.KeyRune && event.Rune() == '0' && event.Modifiers()&tcell.ModAlt != 0 && cfg.TTS_ENABLED {
|
if event.Key() == tcell.KeyRune && event.Rune() == '0' && event.Modifiers()&tcell.ModAlt != 0 && cfg.TTS_ENABLED {
|
||||||
if chatBody.GetMessageCount() > 0 {
|
if len(chatBody.Messages) > 0 {
|
||||||
// Stop any currently playing TTS first
|
// Stop any currently playing TTS first
|
||||||
TTSDoneChan <- true
|
TTSDoneChan <- true
|
||||||
lastMsg := chatBody.GetMessages()[chatBody.GetMessageCount()-1]
|
lastMsg := chatBody.Messages[len(chatBody.Messages)-1]
|
||||||
cleanedText := models.CleanText(lastMsg.GetText())
|
cleanedText := models.CleanText(lastMsg.GetText())
|
||||||
if cleanedText != "" {
|
if cleanedText != "" {
|
||||||
// nolint: errcheck
|
// nolint: errcheck
|
||||||
@@ -1016,7 +956,7 @@ func init() {
|
|||||||
if event.Key() == tcell.KeyCtrlW {
|
if event.Key() == tcell.KeyCtrlW {
|
||||||
// INFO: continue bot/text message
|
// INFO: continue bot/text message
|
||||||
// without new role
|
// without new role
|
||||||
lastRole := chatBody.GetMessages()[chatBody.GetMessageCount()-1].Role
|
lastRole := chatBody.Messages[len(chatBody.Messages)-1].Role
|
||||||
// go chatRound("", lastRole, textView, false, true)
|
// go chatRound("", lastRole, textView, false, true)
|
||||||
chatRoundChan <- &models.ChatRoundReq{Role: lastRole, Resume: true}
|
chatRoundChan <- &models.ChatRoundReq{Role: lastRole, Resume: true}
|
||||||
return nil
|
return nil
|
||||||
@@ -1102,7 +1042,7 @@ func init() {
|
|||||||
if event.Key() == tcell.KeyRune && event.Modifiers() == tcell.ModAlt && event.Rune() == '9' {
|
if event.Key() == tcell.KeyRune && event.Modifiers() == tcell.ModAlt && event.Rune() == '9' {
|
||||||
// Warm up (load) the currently selected model
|
// Warm up (load) the currently selected model
|
||||||
go warmUpModel()
|
go warmUpModel()
|
||||||
showToast("model warmup", "loading model: "+chatBody.GetModel())
|
showToast("model warmup", "loading model: "+chatBody.Model)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// cannot send msg in editMode or botRespMode
|
// cannot send msg in editMode or botRespMode
|
||||||
@@ -1141,7 +1081,7 @@ func init() {
|
|||||||
}
|
}
|
||||||
// add user icon before user msg
|
// add user icon before user msg
|
||||||
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
||||||
nl, chatBody.GetMessageCount(), persona, msgText)
|
nl, len(chatBody.Messages), persona, msgText)
|
||||||
textArea.SetText("", true)
|
textArea.SetText("", true)
|
||||||
if scrollToEndEnabled {
|
if scrollToEndEnabled {
|
||||||
textView.ScrollToEnd()
|
textView.ScrollToEnd()
|
||||||
@@ -1155,7 +1095,7 @@ func init() {
|
|||||||
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
|
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if event.Key() == tcell.KeyTab && !shellMode {
|
if event.Key() == tcell.KeyTab {
|
||||||
currentF := app.GetFocus()
|
currentF := app.GetFocus()
|
||||||
if currentF == textArea {
|
if currentF == textArea {
|
||||||
currentText := textArea.GetText()
|
currentText := textArea.GetText()
|
||||||
|
|||||||
Reference in New Issue
Block a user