29 Commits

Author SHA1 Message Date
Grail Finder
d144ee76d9 Chore: pw tools to be disabled as default 2026-03-04 11:45:54 +03:00
Grail Finder
abcaad6609 Enha: native notification implementation 2026-03-04 11:25:13 +03:00
Grail Finder
50ce0200af Fix: graceful shutdown in tui, to avoid other key block 2026-03-04 08:29:47 +03:00
Grail Finder
58ccd63f4a Fix: avoid raw terminal after ctrl+c exit 2026-03-04 08:25:53 +03:00
Grail Finder
3611d7eb59 Fix: missfire of no-vision notification 2026-03-03 16:55:09 +03:00
Grail Finder
8974d2f52c Fix: remove panics from code 2026-03-03 14:51:36 +03:00
Grail Finder
6b0d03f2d6 Fix: decompres before notify 2026-03-03 14:26:06 +03:00
Grail Finder
fb4deb1161 Fix: handle empty choices 2026-03-03 14:13:18 +03:00
Grail Finder
0e5d37666f Enha: id for card map 2026-03-03 11:46:03 +03:00
Grail Finder
093103bdd7 Feat (pw_tools): click_at 2026-03-03 10:53:04 +03:00
Grail Finder
6c9a1ba56b Chore: change 'when askes' to more proactive phrasing 2026-03-03 09:37:34 +03:00
Grail Finder
93ecfc8a34 Enha: palywright dom and elements fetching 2026-03-03 09:27:05 +03:00
Grail Finder
0c9c590d8f Enha (playwright): conditionaly install and use tools 2026-03-03 09:15:18 +03:00
Grail Finder
d130254e88 Chore (pw): restructure 2026-03-03 08:35:18 +03:00
Grail Finder
6e7a063300 Enha: remove window tools if no vision 2026-03-03 08:27:14 +03:00
Grail Finder
c05b93299c Chore: linter complaints 2026-03-03 07:38:57 +03:00
Grail Finder
cad1bd46c1 Feat: playwright tools 2026-03-02 19:20:54 +03:00
Grail Finder
4bddce3700 Enha: compute estimate of non llm text 2026-03-02 15:21:45 +03:00
Grail Finder
fcc71987bf Feat: token use estimation 2026-03-02 14:54:20 +03:00
Grail Finder
8458edf5a8 Enha: interrupt llm and tool both 2026-03-02 12:19:50 +03:00
Grail Finder
07b06bb0d3 Enha: tabcompletion is back in textarea 2026-03-02 12:09:27 +03:00
Grail Finder
3389b1d83b Fix: linter complaints 2026-03-02 11:39:55 +03:00
Grail Finder
4f6000a43a Enha: check if model has vision before giving it vision tools 2026-03-02 11:25:20 +03:00
Grail Finder
9ba46b40cc Feat: screencapture for completion 2026-03-02 11:12:04 +03:00
Grail Finder
5bb456272e Feat: capture window (screenshot) 2026-03-02 10:33:41 +03:00
Grail Finder
8999f48fb9 Fix (completion): handle multiple images in history 2026-03-02 09:23:22 +03:00
Grail Finder
b2f280a7f1 Feat: read img for completion 2026-03-02 07:46:08 +03:00
Grail Finder
65cbd5d6a6 Fix (ctrl+v): trim loaded mark from the model 2026-03-02 07:19:21 +03:00
Grail Finder
caac1d397a Feat: read img tool for chat endpoint 2026-03-02 07:12:28 +03:00
23 changed files with 2006 additions and 281 deletions

163
bot.go
View File

@@ -3,6 +3,7 @@ package main
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"compress/gzip"
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
@@ -64,6 +65,8 @@ var (
"meta-llama/llama-3.3-70b-instruct:free", "meta-llama/llama-3.3-70b-instruct:free",
} }
LocalModels = []string{} LocalModels = []string{}
localModelsData *models.LCPModels
orModelsData *models.ORModels
) )
var thinkBlockRE = regexp.MustCompile(`(?s)<think>.*?</think>`) var thinkBlockRE = regexp.MustCompile(`(?s)<think>.*?</think>`)
@@ -265,9 +268,7 @@ func warmUpModel() {
// Continue with warmup attempt anyway // Continue with warmup attempt anyway
} }
if loaded { if loaded {
if err := notifyUser("model already loaded", "Model "+chatBody.Model+" is already loaded."); err != nil { showToast("model already loaded", "Model "+chatBody.Model+" is already loaded.")
logger.Debug("failed to notify user", "error", err)
}
return return
} }
go func() { go func() {
@@ -355,6 +356,7 @@ func fetchORModels(free bool) ([]string, error) {
if err := json.NewDecoder(resp.Body).Decode(data); err != nil { if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
return nil, err return nil, err
} }
orModelsData = data
freeModels := data.ListModels(free) freeModels := data.ListModels(free)
return freeModels, nil return freeModels, nil
} }
@@ -416,6 +418,7 @@ func fetchLCPModelsWithStatus() (*models.LCPModels, error) {
if err := json.NewDecoder(resp.Body).Decode(data); err != nil { if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
return nil, err return nil, err
} }
localModelsData = data
return data, nil return data, nil
} }
@@ -433,6 +436,33 @@ func isModelLoaded(modelID string) (bool, error) {
return false, nil return false, nil
} }
func ModelHasVision(api, modelID string) bool {
switch {
case strings.Contains(api, "deepseek"):
return false
case strings.Contains(api, "openrouter"):
resp, err := http.Get("https://openrouter.ai/api/v1/models")
if err != nil {
logger.Warn("failed to fetch OR models for vision check", "error", err)
return false
}
defer resp.Body.Close()
orm := &models.ORModels{}
if err := json.NewDecoder(resp.Body).Decode(orm); err != nil {
logger.Warn("failed to decode OR models for vision check", "error", err)
return false
}
return orm.HasVision(modelID)
default:
models, err := fetchLCPModelsWithStatus()
if err != nil {
logger.Warn("failed to fetch LCP models for vision check", "error", err)
return false
}
return models.HasVision(modelID)
}
}
// monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded. // monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded.
func monitorModelLoad(modelID string) { func monitorModelLoad(modelID string) {
go func() { go func() {
@@ -451,9 +481,7 @@ func monitorModelLoad(modelID string) {
continue continue
} }
if loaded { if loaded {
if err := notifyUser("model loaded", "Model "+modelID+" is now loaded and ready."); err != nil { showToast("model loaded", "Model "+modelID+" is now loaded and ready.")
logger.Debug("failed to notify user", "error", err)
}
refreshChatDisplay() refreshChatDisplay()
return return
} }
@@ -464,6 +492,17 @@ func monitorModelLoad(modelID string) {
// extractDetailedErrorFromBytes extracts detailed error information from response body bytes // extractDetailedErrorFromBytes extracts detailed error information from response body bytes
func extractDetailedErrorFromBytes(body []byte, statusCode int) string { func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
// Try to decompress gzip if the response is compressed
if len(body) >= 2 && body[0] == 0x1f && body[1] == 0x8b {
reader, err := gzip.NewReader(bytes.NewReader(body))
if err == nil {
decompressed, err := io.ReadAll(reader)
reader.Close()
if err == nil {
body = decompressed
}
}
}
// Try to parse as JSON to extract detailed error information // Try to parse as JSON to extract detailed error information
var errorResponse map[string]any var errorResponse map[string]any
if err := json.Unmarshal(body, &errorResponse); err == nil { if err := json.Unmarshal(body, &errorResponse); err == nil {
@@ -529,9 +568,7 @@ func sendMsgToLLM(body io.Reader) {
req, err := http.NewRequest("POST", cfg.CurrentAPI, body) req, err := http.NewRequest("POST", cfg.CurrentAPI, body)
if err != nil { if err != nil {
logger.Error("newreq error", "error", err) logger.Error("newreq error", "error", err)
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil { showToast("error", "apicall failed:"+err.Error())
logger.Error("failed to notify", "error", err)
}
streamDone <- true streamDone <- true
return return
} }
@@ -543,9 +580,7 @@ func sendMsgToLLM(body io.Reader) {
resp, err := httpClient.Do(req) resp, err := httpClient.Do(req)
if err != nil { if err != nil {
logger.Error("llamacpp api", "error", err) logger.Error("llamacpp api", "error", err)
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil { showToast("error", "apicall failed:"+err.Error())
logger.Error("failed to notify", "error", err)
}
streamDone <- true streamDone <- true
return return
} }
@@ -556,9 +591,7 @@ func sendMsgToLLM(body io.Reader) {
if err != nil { if err != nil {
logger.Error("failed to read error response body", "error", err, "status_code", resp.StatusCode) logger.Error("failed to read error response body", "error", err, "status_code", resp.StatusCode)
detailedError := fmt.Sprintf("HTTP Status: %d, Failed to read response body: %v", resp.StatusCode, err) detailedError := fmt.Sprintf("HTTP Status: %d, Failed to read response body: %v", resp.StatusCode, err)
if err := notifyUser("API Error", detailedError); err != nil { showToast("API Error", detailedError)
logger.Error("failed to notify", "error", err)
}
resp.Body.Close() resp.Body.Close()
streamDone <- true streamDone <- true
return return
@@ -566,9 +599,7 @@ func sendMsgToLLM(body io.Reader) {
// Parse the error response for detailed information // Parse the error response for detailed information
detailedError := extractDetailedErrorFromBytes(bodyBytes, resp.StatusCode) detailedError := extractDetailedErrorFromBytes(bodyBytes, resp.StatusCode)
logger.Error("API returned error status", "status_code", resp.StatusCode, "detailed_error", detailedError) logger.Error("API returned error status", "status_code", resp.StatusCode, "detailed_error", detailedError)
if err := notifyUser("API Error", detailedError); err != nil { showToast("API Error", detailedError)
logger.Error("failed to notify", "error", err)
}
resp.Body.Close() resp.Body.Close()
streamDone <- true streamDone <- true
return return
@@ -605,16 +636,12 @@ func sendMsgToLLM(body io.Reader) {
detailedError := fmt.Sprintf("Streaming connection closed unexpectedly (Status: %d). This may indicate an API error. Check your API provider and model settings.", resp.StatusCode) detailedError := fmt.Sprintf("Streaming connection closed unexpectedly (Status: %d). This may indicate an API error. Check your API provider and model settings.", resp.StatusCode)
logger.Error("error reading response body", "error", err, "detailed_error", detailedError, logger.Error("error reading response body", "error", err, "detailed_error", detailedError,
"status_code", resp.StatusCode, "user_role", cfg.UserRole, "parser", chunkParser, "link", cfg.CurrentAPI) "status_code", resp.StatusCode, "user_role", cfg.UserRole, "parser", chunkParser, "link", cfg.CurrentAPI)
if err := notifyUser("API Error", detailedError); err != nil { showToast("API Error", detailedError)
logger.Error("failed to notify", "error", err)
}
} else { } else {
logger.Error("error reading response body", "error", err, "line", string(line), logger.Error("error reading response body", "error", err, "line", string(line),
"user_role", cfg.UserRole, "parser", chunkParser, "link", cfg.CurrentAPI) "user_role", cfg.UserRole, "parser", chunkParser, "link", cfg.CurrentAPI)
// if err.Error() != "EOF" { // if err.Error() != "EOF" {
if err := notifyUser("API error", err.Error()); err != nil { showToast("API error", err.Error())
logger.Error("failed to notify", "error", err)
}
} }
streamDone <- true streamDone <- true
break break
@@ -641,9 +668,7 @@ func sendMsgToLLM(body io.Reader) {
if err != nil { if err != nil {
logger.Error("error parsing response body", "error", err, logger.Error("error parsing response body", "error", err,
"line", string(line), "url", cfg.CurrentAPI) "line", string(line), "url", cfg.CurrentAPI)
if err := notifyUser("LLM Response Error", "Failed to parse LLM response: "+err.Error()); err != nil { showToast("LLM Response Error", "Failed to parse LLM response: "+err.Error())
logger.Error("failed to notify user", "error", err)
}
streamDone <- true streamDone <- true
break break
} }
@@ -718,7 +743,7 @@ func sendMsgToLLM(body io.Reader) {
} }
interrupt: interrupt:
if interruptResp { // read bytes, so it would not get into beginning of the next req if interruptResp { // read bytes, so it would not get into beginning of the next req
interruptResp = false // interruptResp = false
logger.Info("interrupted bot response", "chunk_counter", counter) logger.Info("interrupted bot response", "chunk_counter", counter)
streamDone <- true streamDone <- true
break break
@@ -772,6 +797,7 @@ func showSpinner() {
} }
func chatRound(r *models.ChatRoundReq) error { func chatRound(r *models.ChatRoundReq) error {
interruptResp = false
botRespMode = true botRespMode = true
go showSpinner() go showSpinner()
updateStatusLine() updateStatusLine()
@@ -937,6 +963,9 @@ out:
} }
// Strip think blocks before parsing for tool calls // Strip think blocks before parsing for tool calls
respTextNoThink := thinkBlockRE.ReplaceAllString(respText.String(), "") respTextNoThink := thinkBlockRE.ReplaceAllString(respText.String(), "")
if interruptResp {
return nil
}
if findCall(respTextNoThink, toolResp.String()) { if findCall(respTextNoThink, toolResp.String()) {
return nil return nil
} }
@@ -1174,17 +1203,59 @@ func findCall(msg, toolCall string) bool {
toolRunningMode = false toolRunningMode = false
toolMsg := string(resp) toolMsg := string(resp)
logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg) logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolMsg)
// Create tool response message with the proper tool_call_id // Create tool response message with the proper tool_call_id
// Mark shell commands as always visible // Mark shell commands as always visible
isShellCommand := fc.Name == "execute_command" isShellCommand := fc.Name == "execute_command"
toolResponseMsg := models.RoleMsg{ // Check if response is multimodal content (image)
var toolResponseMsg models.RoleMsg
if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) {
// Parse multimodal content response
multimodalResp := models.MultimodalToolResp{}
if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" {
// Create RoleMsg with ContentParts
var contentParts []any
for _, part := range multimodalResp.Parts {
partType := part["type"]
switch partType {
case "text":
contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]})
case "image_url":
contentParts = append(contentParts, models.ImageContentPart{
Type: "image_url",
ImageURL: struct {
URL string `json:"url"`
}{URL: part["url"]},
})
default:
continue
}
}
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole,
ContentParts: contentParts,
HasContentParts: true,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
}
} else {
// Fallback to regular content
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole, Role: cfg.ToolRole,
Content: toolMsg, Content: toolMsg,
ToolCallID: lastToolCall.ID, ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand, IsShellCommand: isShellCommand,
} }
}
} else {
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole,
Content: toolMsg,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
}
}
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
chatBody.Messages = append(chatBody.Messages, toolResponseMsg) chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages)) logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
// Clear the stored tool call ID after using it // Clear the stored tool call ID after using it
@@ -1305,8 +1376,8 @@ func applyCharCard(cc *models.CharCard, loadHistory bool) {
} }
func charToStart(agentName string, keepSysP bool) bool { func charToStart(agentName string, keepSysP bool) bool {
cc, ok := sysMap[agentName] cc := GetCardByRole(agentName)
if !ok { if cc == nil {
return false return false
} }
applyCharCard(cc, keepSysP) applyCharCard(cc, keepSysP)
@@ -1339,6 +1410,7 @@ func updateModelLists() {
chatBody.Model = m chatBody.Model = m
cachedModelColor = "green" cachedModelColor = "green"
updateStatusLine() updateStatusLine()
updateToolCapabilities()
app.Draw() app.Draw()
return return
} }
@@ -1366,15 +1438,15 @@ func refreshLocalModelsIfEmpty() {
func summarizeAndStartNewChat() { func summarizeAndStartNewChat() {
if len(chatBody.Messages) == 0 { if len(chatBody.Messages) == 0 {
_ = notifyUser("info", "No chat history to summarize") showToast("info", "No chat history to summarize")
return return
} }
_ = notifyUser("info", "Summarizing chat history...") showToast("info", "Summarizing chat history...")
// Call the summarize_chat tool via agent // Call the summarize_chat tool via agent
summaryBytes := callToolWithAgent("summarize_chat", map[string]string{}) summaryBytes := callToolWithAgent("summarize_chat", map[string]string{})
summary := string(summaryBytes) summary := string(summaryBytes)
if summary == "" { if summary == "" {
_ = notifyUser("error", "Failed to generate summary") showToast("error", "Failed to generate summary")
return return
} }
// Start a new chat // Start a new chat
@@ -1393,7 +1465,7 @@ func summarizeAndStartNewChat() {
if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil { if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
logger.Warn("failed to update storage after injecting summary", "error", err) logger.Warn("failed to update storage after injecting summary", "error", err)
} }
_ = notifyUser("info", "Chat summarized and new chat started with summary as tool response") showToast("info", "Chat summarized and new chat started with summary as tool response")
} }
func init() { func init() {
@@ -1452,6 +1524,23 @@ func init() {
if cfg.STT_ENABLED { if cfg.STT_ENABLED {
asr = NewSTT(logger, cfg) asr = NewSTT(logger, cfg)
} }
if cfg.PlaywrightEnabled {
if err := checkPlaywright(); err != nil {
// slow, need a faster check if playwright install
if err := installPW(); err != nil {
logger.Error("failed to install playwright", "error", err)
cancel()
os.Exit(1)
return
}
if err := checkPlaywright(); err != nil {
logger.Error("failed to run playwright", "error", err)
cancel()
os.Exit(1)
return
}
}
}
// Initialize scrollToEndEnabled based on config // Initialize scrollToEndEnabled based on config
scrollToEndEnabled = cfg.AutoScrollEnabled scrollToEndEnabled = cfg.AutoScrollEnabled
go updateModelLists() go updateModelLists()

View File

@@ -56,3 +56,6 @@ StripThinkingFromAPI = true # Strip <think> blocks from messages before sending
# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled) # Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
# Models that support reasoning will include thinking content wrapped in <think> tags # Models that support reasoning will include thinking content wrapped in <think> tags
ReasoningEffort = "medium" ReasoningEffort = "medium"
# playwright tools
PlaywrightEnabled = false
PlaywrightDebug = false

View File

@@ -70,6 +70,9 @@ type Config struct {
CharSpecificContextEnabled bool `toml:"CharSpecificContextEnabled"` CharSpecificContextEnabled bool `toml:"CharSpecificContextEnabled"`
CharSpecificContextTag string `toml:"CharSpecificContextTag"` CharSpecificContextTag string `toml:"CharSpecificContextTag"`
AutoTurn bool `toml:"AutoTurn"` AutoTurn bool `toml:"AutoTurn"`
// playwright browser
PlaywrightEnabled bool `toml:"PlaywrightEnabled"`
PlaywrightDebug bool `toml:"PlaywrightDebug"` // !headless
} }
func LoadConfig(fn string) (*Config, error) { func LoadConfig(fn string) (*Config, error) {

View File

@@ -162,6 +162,15 @@ Those could be switched in program, but also bould be setup in config.
#### ToolUse #### ToolUse
- Enable or disable explanation of tools to llm, so it could use them. - Enable or disable explanation of tools to llm, so it could use them.
#### Playwright Browser Automation
These settings enable browser automation tools available to the LLM.
- **PlaywrightEnabled** (`false`)
- Enable or disable Playwright browser automation tools for the LLM. When enabled, the LLM can use tools like `pw_browser`, `pw_close`, and `pw_status` to automate browser interactions.
- **PlaywrightDebug** (`false`)
- Enable debug mode for Playwright browser. When set to `true`, the browser runs in visible (non-headless) mode, displaying the GUI for debugging purposes. When `false`, the browser runs in headless mode by default.
### StripThinkingFromAPI (`true`) ### StripThinkingFromAPI (`true`)
- Strip thinking blocks from messages before sending to LLM. Keeps them in chat history for local viewing but reduces token usage in API calls. - Strip thinking blocks from messages before sending to LLM. Keeps them in chat history for local viewing but reduces token usage in API calls.

4
go.mod
View File

@@ -7,6 +7,7 @@ require (
github.com/GrailFinder/google-translate-tts v0.1.3 github.com/GrailFinder/google-translate-tts v0.1.3
github.com/GrailFinder/searchagent v0.2.0 github.com/GrailFinder/searchagent v0.2.0
github.com/PuerkitoBio/goquery v1.11.0 github.com/PuerkitoBio/goquery v1.11.0
github.com/deckarep/golang-set/v2 v2.8.0
github.com/gdamore/tcell/v2 v2.13.2 github.com/gdamore/tcell/v2 v2.13.2
github.com/glebarez/go-sqlite v1.22.0 github.com/glebarez/go-sqlite v1.22.0
github.com/gopxl/beep/v2 v2.1.1 github.com/gopxl/beep/v2 v2.1.1
@@ -14,6 +15,7 @@ require (
github.com/jmoiron/sqlx v1.4.0 github.com/jmoiron/sqlx v1.4.0
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728
github.com/neurosnap/sentences v1.1.2 github.com/neurosnap/sentences v1.1.2
github.com/playwright-community/playwright-go v0.5700.1
github.com/rivo/tview v0.42.0 github.com/rivo/tview v0.42.0
github.com/yuin/goldmark v1.4.13 github.com/yuin/goldmark v1.4.13
) )
@@ -24,6 +26,8 @@ require (
github.com/ebitengine/oto/v3 v3.4.0 // indirect github.com/ebitengine/oto/v3 v3.4.0 // indirect
github.com/ebitengine/purego v0.9.1 // indirect github.com/ebitengine/purego v0.9.1 // indirect
github.com/gdamore/encoding v1.0.1 // indirect github.com/gdamore/encoding v1.0.1 // indirect
github.com/go-jose/go-jose/v3 v3.0.4 // indirect
github.com/go-stack/stack v1.8.1 // indirect
github.com/google/uuid v1.6.0 // indirect github.com/google/uuid v1.6.0 // indirect
github.com/hajimehoshi/go-mp3 v0.3.4 // indirect github.com/hajimehoshi/go-mp3 v0.3.4 // indirect
github.com/hajimehoshi/oto/v2 v2.3.1 // indirect github.com/hajimehoshi/oto/v2 v2.3.1 // indirect

14
go.sum
View File

@@ -10,8 +10,11 @@ github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43
github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ= github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set/v2 v2.8.0 h1:swm0rlPCmdWn9mESxKOjWk8hXSqoxOp+ZlfuyaAdFlQ=
github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/bQ= github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/bQ=
@@ -24,8 +27,13 @@ github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx
github.com/gdamore/tcell/v2 v2.13.2/go.mod h1:+Wfe208WDdB7INEtCsNrAN6O2m+wsTPk1RAovjaILlo= github.com/gdamore/tcell/v2 v2.13.2/go.mod h1:+Wfe208WDdB7INEtCsNrAN6O2m+wsTPk1RAovjaILlo=
github.com/glebarez/go-sqlite v1.22.0 h1:uAcMJhaA6r3LHMTFgP0SifzgXg46yJkgxqyuyec+ruQ= github.com/glebarez/go-sqlite v1.22.0 h1:uAcMJhaA6r3LHMTFgP0SifzgXg46yJkgxqyuyec+ruQ=
github.com/glebarez/go-sqlite v1.22.0/go.mod h1:PlBIdHe0+aUEFn+r2/uthrWq4FxbzugL0L8Li6yQJbc= github.com/glebarez/go-sqlite v1.22.0/go.mod h1:PlBIdHe0+aUEFn+r2/uthrWq4FxbzugL0L8Li6yQJbc=
github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY=
github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw=
github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
@@ -59,6 +67,8 @@ github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7
github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ= github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U=
github.com/playwright-community/playwright-go v0.5700.1/go.mod h1:MlSn1dZrx8rszbCxY6x3qK89ZesJUYVx21B2JnkoNF0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
@@ -67,6 +77,8 @@ github.com/rivo/tview v0.42.0 h1:b/ftp+RxtDsHSaynXTbJb+/n/BxDEi+W3UfF5jILK6c=
github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY= github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
@@ -152,6 +164,8 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb
golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis= modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=

View File

@@ -11,6 +11,7 @@ import (
"path" "path"
"path/filepath" "path/filepath"
"slices" "slices"
"strconv"
"strings" "strings"
"time" "time"
"unicode" "unicode"
@@ -197,7 +198,11 @@ func initSysCards() ([]string, error) {
logger.Warn("empty role", "file", cc.FilePath) logger.Warn("empty role", "file", cc.FilePath)
continue continue
} }
sysMap[cc.Role] = cc if cc.ID == "" {
cc.ID = models.ComputeCardID(cc.Role, cc.FilePath)
}
sysMap[cc.ID] = cc
roleToID[cc.Role] = cc.ID
labels = append(labels, cc.Role) labels = append(labels, cc.Role)
} }
return labels, nil return labels, nil
@@ -286,24 +291,25 @@ func listRolesWithUser() []string {
return result return result
} }
func loadImage() { func loadImage() error {
filepath := defaultImage filepath := defaultImage
cc, ok := sysMap[cfg.AssistantRole] cc := GetCardByRole(cfg.AssistantRole)
if ok { if cc != nil {
if strings.HasSuffix(cc.FilePath, ".png") { if strings.HasSuffix(cc.FilePath, ".png") {
filepath = cc.FilePath filepath = cc.FilePath
} }
} }
file, err := os.Open(filepath) file, err := os.Open(filepath)
if err != nil { if err != nil {
panic(err) return fmt.Errorf("failed to open image: %w", err)
} }
defer file.Close() defer file.Close()
img, _, err := image.Decode(file) img, _, err := image.Decode(file)
if err != nil { if err != nil {
panic(err) return fmt.Errorf("failed to decode image: %w", err)
} }
imgView.SetImage(img) imgView.SetImage(img)
return nil
} }
func strInSlice(s string, sl []string) bool { func strInSlice(s string, sl []string) bool {
@@ -376,9 +382,90 @@ func makeStatusLine() string {
roleInject := fmt.Sprintf(" | [%s:-:b]role injection[-:-:-] (alt+7)", boolColors[injectRole]) roleInject := fmt.Sprintf(" | [%s:-:b]role injection[-:-:-] (alt+7)", boolColors[injectRole])
statusLine += roleInject statusLine += roleInject
} }
// context tokens
contextTokens := getContextTokens()
maxCtx := getMaxContextTokens()
if maxCtx == 0 {
maxCtx = 16384
}
if contextTokens > 0 {
contextInfo := fmt.Sprintf(" | context-estim: [orange:-:b]%d/%d[-:-:-]", contextTokens, maxCtx)
statusLine += contextInfo
}
return statusLine + imageInfo + shellModeInfo return statusLine + imageInfo + shellModeInfo
} }
func getContextTokens() int {
if chatBody == nil || chatBody.Messages == nil {
return 0
}
total := 0
messages := chatBody.Messages
for i := range messages {
msg := &messages[i]
if msg.Stats != nil && msg.Stats.Tokens > 0 {
total += msg.Stats.Tokens
} else if msg.GetText() != "" {
total += len(msg.GetText()) / 4
}
}
return total
}
const deepseekContext = 128000
func getMaxContextTokens() int {
if chatBody == nil || chatBody.Model == "" {
return 0
}
modelName := chatBody.Model
switch {
case strings.Contains(cfg.CurrentAPI, "openrouter"):
if orModelsData != nil {
for i := range orModelsData.Data {
m := &orModelsData.Data[i]
if m.ID == modelName {
return m.ContextLength
}
}
}
case strings.Contains(cfg.CurrentAPI, "deepseek"):
return deepseekContext
default:
if localModelsData != nil {
for i := range localModelsData.Data {
m := &localModelsData.Data[i]
if m.ID == modelName {
for _, arg := range m.Status.Args {
if strings.HasPrefix(arg, "--ctx-size") {
if strings.Contains(arg, "=") {
val := strings.Split(arg, "=")[1]
if n, err := strconv.Atoi(val); err == nil {
return n
}
} else {
idx := -1
for j, a := range m.Status.Args {
if a == "--ctx-size" && j+1 < len(m.Status.Args) {
idx = j + 1
break
}
}
if idx != -1 {
if n, err := strconv.Atoi(m.Status.Args[idx]); err == nil {
return n
}
}
}
}
}
}
}
}
}
return 0
}
// set of roles within card definition and mention in chat history // set of roles within card definition and mention in chat history
func listChatRoles() []string { func listChatRoles() []string {
currentChat, ok := chatMap[activeChatName] currentChat, ok := chatMap[activeChatName]
@@ -386,13 +473,9 @@ func listChatRoles() []string {
if !ok { if !ok {
return cbc return cbc
} }
currentCard, ok := sysMap[currentChat.Agent] currentCard := GetCardByRole(currentChat.Agent)
if !ok { if currentCard == nil {
// case which won't let to switch roles: logger.Warn("failed to find current card", "agent", currentChat.Agent)
// started new chat (basic_sys or any other), at the start it yet be saved or have chatbody
// if it does not have a card or chars, it'll return an empty slice
// log error
logger.Warn("failed to find current card in sysMap", "agent", currentChat.Agent, "sysMap", sysMap)
return cbc return cbc
} }
charset := []string{} charset := []string{}
@@ -408,10 +491,7 @@ func listChatRoles() []string {
func deepseekModelValidator() error { func deepseekModelValidator() error {
if cfg.CurrentAPI == cfg.DeepSeekChatAPI || cfg.CurrentAPI == cfg.DeepSeekCompletionAPI { if cfg.CurrentAPI == cfg.DeepSeekChatAPI || cfg.CurrentAPI == cfg.DeepSeekCompletionAPI {
if chatBody.Model != "deepseek-chat" && chatBody.Model != "deepseek-reasoner" { if chatBody.Model != "deepseek-chat" && chatBody.Model != "deepseek-reasoner" {
if err := notifyUser("bad request", "wrong deepseek model name"); err != nil { showToast("bad request", "wrong deepseek model name")
logger.Warn("failed ot notify user", "error", err)
return err
}
return nil return nil
} }
} }
@@ -611,9 +691,7 @@ func performSearch(term string) {
searchResults = nil searchResults = nil
searchResultLengths = nil searchResultLengths = nil
notification := "Pattern not found: " + term notification := "Pattern not found: " + term
if err := notifyUser("search", notification); err != nil { showToast("search", notification)
logger.Error("failed to send notification", "error", err)
}
return return
} }
// Store the formatted text positions and lengths for accurate highlighting // Store the formatted text positions and lengths for accurate highlighting
@@ -646,9 +724,7 @@ func highlightCurrentMatch() {
textView.Highlight(currentRegion).ScrollToHighlight() textView.Highlight(currentRegion).ScrollToHighlight()
// Send notification about which match we're at // Send notification about which match we're at
notification := fmt.Sprintf("Match %d of %d", searchIndex+1, len(searchResults)) notification := fmt.Sprintf("Match %d of %d", searchIndex+1, len(searchResults))
if err := notifyUser("search", notification); err != nil { showToast("search", notification)
logger.Error("failed to send notification", "error", err)
}
} }
// showSearchBar shows the search input field as an overlay // showSearchBar shows the search input field as an overlay
@@ -738,9 +814,7 @@ func addRegionTags(text string, positions []int, lengths []int, currentIdx int,
// searchNext finds the next occurrence of the search term // searchNext finds the next occurrence of the search term
func searchNext() { func searchNext() {
if len(searchResults) == 0 { if len(searchResults) == 0 {
if err := notifyUser("search", "No search results to navigate"); err != nil { showToast("search", "No search results to navigate")
logger.Error("failed to send notification", "error", err)
}
return return
} }
searchIndex = (searchIndex + 1) % len(searchResults) searchIndex = (searchIndex + 1) % len(searchResults)
@@ -750,9 +824,7 @@ func searchNext() {
// searchPrev finds the previous occurrence of the search term // searchPrev finds the previous occurrence of the search term
func searchPrev() { func searchPrev() {
if len(searchResults) == 0 { if len(searchResults) == 0 {
if err := notifyUser("search", "No search results to navigate"); err != nil { showToast("search", "No search results to navigate")
logger.Error("failed to send notification", "error", err)
}
return return
} }
if searchIndex == 0 { if searchIndex == 0 {

82
llm.go
View File

@@ -3,7 +3,6 @@ package main
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"errors"
"gf-lt/models" "gf-lt/models"
"io" "io"
"strings" "strings"
@@ -119,25 +118,22 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
logger.Debug("formmsg lcpcompletion", "link", cfg.CurrentAPI) logger.Debug("formmsg lcpcompletion", "link", cfg.CurrentAPI)
localImageAttachmentPath := imageAttachmentPath localImageAttachmentPath := imageAttachmentPath
var multimodalData []string var multimodalData []string
if msg != "" { // otherwise let the bot to continue
var newMsg models.RoleMsg
if localImageAttachmentPath != "" { if localImageAttachmentPath != "" {
newMsg = models.NewMultimodalMsg(role, []any{})
newMsg.AddTextPart(msg)
imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath) imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
if err != nil { if err != nil {
logger.Error("failed to create image URL from path for completion", logger.Error("failed to create image URL from path for completion",
"error", err, "path", localImageAttachmentPath) "error", err, "path", localImageAttachmentPath)
return nil, err return nil, err
} }
// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...") newMsg.AddImagePart(imageURL, localImageAttachmentPath)
parts := strings.SplitN(imageURL, ",", 2)
if len(parts) == 2 {
multimodalData = append(multimodalData, parts[1])
} else {
logger.Error("invalid image data URL format", "url", imageURL)
return nil, errors.New("invalid image data URL format")
}
imageAttachmentPath = "" // Clear the attachment after use imageAttachmentPath = "" // Clear the attachment after use
} else { // not a multimodal msg or image passed in tool call
newMsg = models.RoleMsg{Role: role, Content: msg}
} }
if msg != "" { // otherwise let the bot to continue
newMsg := models.RoleMsg{Role: role, Content: msg}
newMsg = *processMessageTag(&newMsg) newMsg = *processMessageTag(&newMsg)
chatBody.Messages = append(chatBody.Messages, newMsg) chatBody.Messages = append(chatBody.Messages, newMsg)
} }
@@ -146,22 +142,40 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg}) chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
} }
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages) filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages)
// Build prompt and extract images inline as we process each message
messages := make([]string, len(filteredMessages)) messages := make([]string, len(filteredMessages))
for i := range filteredMessages { for i := range filteredMessages {
messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt() m := stripThinkingFromMsg(&filteredMessages[i])
messages[i] = m.ToPrompt()
// Extract images from this message and add marker inline
if len(m.ContentParts) > 0 {
for _, part := range m.ContentParts {
var imgURL string
// Check for struct type
if imgPart, ok := part.(models.ImageContentPart); ok {
imgURL = imgPart.ImageURL.URL
} else if partMap, ok := part.(map[string]any); ok {
// Check for map type (from JSON unmarshaling)
if partType, exists := partMap["type"]; exists && partType == "image_url" {
if imgURLMap, ok := partMap["image_url"].(map[string]any); ok {
if url, ok := imgURLMap["url"].(string); ok {
imgURL = url
}
}
}
}
if imgURL != "" {
// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
parts := strings.SplitN(imgURL, ",", 2)
if len(parts) == 2 {
multimodalData = append(multimodalData, parts[1])
messages[i] += " <__media__>"
}
}
}
}
} }
prompt := strings.Join(messages, "\n") prompt := strings.Join(messages, "\n")
// Add multimodal media markers to the prompt text when multimodal data is present
// This is required by llama.cpp multimodal models so they know where to insert media
if len(multimodalData) > 0 {
// Add a media marker for each item in the multimodal data
var sb strings.Builder
sb.WriteString(prompt)
for range multimodalData {
sb.WriteString(" <__media__>") // llama.cpp default multimodal marker
}
prompt = sb.String()
}
// needs to be after <__media__> if there are images // needs to be after <__media__> if there are images
if !resume { if !resume {
botMsgStart := "\n" + botPersona + ":\n" botMsgStart := "\n" + botPersona + ":\n"
@@ -210,11 +224,9 @@ func (op LCPChat) ParseChunk(data []byte) (*models.TextChunk, error) {
logger.Error("failed to decode", "error", err, "line", string(data)) logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err return nil, err
} }
// Handle multiple choices safely
if len(llmchunk.Choices) == 0 { if len(llmchunk.Choices) == 0 {
logger.Warn("LCPChat ParseChunk: no choices in response", "data", string(data)) logger.Warn("LCPChat empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return &models.TextChunk{Finished: true}, nil return &models.TextChunk{}, nil
} }
lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1] lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
resp := &models.TextChunk{ resp := &models.TextChunk{
@@ -335,6 +347,10 @@ func (ds DeepSeekerCompletion) ParseChunk(data []byte) (*models.TextChunk, error
logger.Error("failed to decode", "error", err, "line", string(data)) logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err return nil, err
} }
if len(llmchunk.Choices) == 0 {
logger.Warn("empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return &models.TextChunk{}, nil
}
resp := &models.TextChunk{ resp := &models.TextChunk{
Chunk: llmchunk.Choices[0].Text, Chunk: llmchunk.Choices[0].Text,
} }
@@ -400,6 +416,10 @@ func (ds DeepSeekerChat) ParseChunk(data []byte) (*models.TextChunk, error) {
return nil, err return nil, err
} }
resp := &models.TextChunk{} resp := &models.TextChunk{}
if len(llmchunk.Choices) == 0 {
logger.Warn("empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return resp, nil
}
if llmchunk.Choices[0].FinishReason != "" { if llmchunk.Choices[0].FinishReason != "" {
if llmchunk.Choices[0].Delta.Content != "" { if llmchunk.Choices[0].Delta.Content != "" {
logger.Error("text inside of finish llmchunk", "chunk", llmchunk) logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
@@ -482,6 +502,10 @@ func (or OpenRouterCompletion) ParseChunk(data []byte) (*models.TextChunk, error
logger.Error("failed to decode", "error", err, "line", string(data)) logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err return nil, err
} }
if len(llmchunk.Choices) == 0 {
logger.Warn("empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return &models.TextChunk{}, nil
}
resp := &models.TextChunk{ resp := &models.TextChunk{
Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Text, Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Text,
} }
@@ -544,6 +568,10 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
logger.Error("failed to decode", "error", err, "line", string(data)) logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err return nil, err
} }
if len(llmchunk.Choices) == 0 {
logger.Warn("empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return &models.TextChunk{}, nil
}
lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1] lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
resp := &models.TextChunk{ resp := &models.TextChunk{
Chunk: lastChoice.Delta.Content, Chunk: lastChoice.Delta.Content,

View File

@@ -17,8 +17,9 @@ var (
shellHistoryPos int = -1 shellHistoryPos int = -1
thinkingCollapsed = false thinkingCollapsed = false
toolCollapsed = true toolCollapsed = true
statusLineTempl = "help (F12) | chat: [orange:-:b]%s[-:-:-] (F1) | [%s:-:b]tool use[-:-:-] (ctrl+k) | model: [%s:-:b]%s[-:-:-] (ctrl+l) | [%s:-:b]skip LLM resp[-:-:-] (F10)\nAPI: [orange:-:b]%s[-:-:-] (ctrl+v) | writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | bot will write as [orange:-:b]%s[-:-:-] (ctrl+x)" statusLineTempl = "help (F12) | chat: [orange:-:b]%s[-:-:-] (F1) | [%s:-:b]tool use[-:-:-] (ctrl+k) | model: [%s:-:b]%s[-:-:-] (ctrl+l) | [%s:-:b]skip LLM resp[-:-:-] (F10) | API: [orange:-:b]%s[-:-:-] (ctrl+v)\nwriting as: [orange:-:b]%s[-:-:-] (ctrl+q) | bot will write as [orange:-:b]%s[-:-:-] (ctrl+x)"
focusSwitcher = map[tview.Primitive]tview.Primitive{} focusSwitcher = map[tview.Primitive]tview.Primitive{}
app *tview.Application
) )
func main() { func main() {

View File

@@ -1,6 +1,10 @@
package models package models
import "strings" import (
"crypto/md5"
"fmt"
"strings"
)
// https://github.com/malfoyslastname/character-card-spec-v2/blob/main/spec_v2.md // https://github.com/malfoyslastname/character-card-spec-v2/blob/main/spec_v2.md
// what a bloat; trim to Role->Msg pair and first msg // what a bloat; trim to Role->Msg pair and first msg
@@ -31,6 +35,7 @@ func (c *CharCardSpec) Simplify(userName, fpath string) *CharCard {
fm := strings.ReplaceAll(strings.ReplaceAll(c.FirstMes, "{{char}}", c.Name), "{{user}}", userName) fm := strings.ReplaceAll(strings.ReplaceAll(c.FirstMes, "{{char}}", c.Name), "{{user}}", userName)
sysPr := strings.ReplaceAll(strings.ReplaceAll(c.Description, "{{char}}", c.Name), "{{user}}", userName) sysPr := strings.ReplaceAll(strings.ReplaceAll(c.Description, "{{char}}", c.Name), "{{user}}", userName)
return &CharCard{ return &CharCard{
ID: ComputeCardID(c.Name, fpath),
SysPrompt: sysPr, SysPrompt: sysPr,
FirstMsg: fm, FirstMsg: fm,
Role: c.Name, Role: c.Name,
@@ -39,7 +44,12 @@ func (c *CharCardSpec) Simplify(userName, fpath string) *CharCard {
} }
} }
func ComputeCardID(role, filePath string) string {
return fmt.Sprintf("%x", md5.Sum([]byte(role+filePath)))
}
type CharCard struct { type CharCard struct {
ID string `json:"id"`
SysPrompt string `json:"sys_prompt"` SysPrompt string `json:"sys_prompt"`
FirstMsg string `json:"first_msg"` FirstMsg string `json:"first_msg"`
Role string `json:"role"` Role string `json:"role"`

View File

@@ -2,6 +2,7 @@ package models
const ( const (
LoadedMark = "(loaded) " LoadedMark = "(loaded) "
ToolRespMultyType = "multimodel_content"
) )
type APIType int type APIType int

View File

@@ -391,7 +391,6 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
if err != nil { if err != nil {
return "", err return "", err
} }
// Determine the image format based on file extension // Determine the image format based on file extension
var mimeType string var mimeType string
switch { switch {
@@ -408,10 +407,8 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
default: default:
mimeType = "image/jpeg" // default mimeType = "image/jpeg" // default
} }
// Encode to base64 // Encode to base64
encoded := base64.StdEncoding.EncodeToString(data) encoded := base64.StdEncoding.EncodeToString(data)
// Create data URL // Create data URL
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
} }
@@ -611,6 +608,20 @@ func (lcp *LCPModels) ListModels() []string {
return resp return resp
} }
func (lcp *LCPModels) HasVision(modelID string) bool {
for _, m := range lcp.Data {
if m.ID == modelID {
args := m.Status.Args
for i := 0; i < len(args)-1; i++ {
if args[i] == "--mmproj" {
return true
}
}
}
}
return false
}
type ResponseStats struct { type ResponseStats struct {
Tokens int Tokens int
Duration float64 Duration float64
@@ -623,3 +634,8 @@ type ChatRoundReq struct {
Regen bool Regen bool
Resume bool Resume bool
} }
type MultimodalToolResp struct {
Type string `json:"type"`
Parts []map[string]string `json:"parts"`
}

View File

@@ -172,3 +172,16 @@ func (orm *ORModels) ListModels(free bool) []string {
} }
return resp return resp
} }
func (orm *ORModels) HasVision(modelID string) bool {
for i := range orm.Data {
if orm.Data[i].ID == modelID {
for _, mod := range orm.Data[i].Architecture.InputModalities {
if mod == "image" {
return true
}
}
}
}
return false
}

View File

@@ -109,6 +109,12 @@ func ReadCardJson(fname string) (*models.CharCard, error) {
if err := json.Unmarshal(data, &card); err != nil { if err := json.Unmarshal(data, &card); err != nil {
return nil, err return nil, err
} }
if card.FilePath == "" {
card.FilePath = fname
}
if card.ID == "" {
card.ID = models.ComputeCardID(card.Role, card.FilePath)
}
return &card, nil return &card, nil
} }

View File

@@ -40,9 +40,7 @@ func showModelSelectionPopup() {
default: default:
message = "No llama.cpp models loaded. Ensure llama.cpp server is running with models." message = "No llama.cpp models loaded. Ensure llama.cpp server is running with models."
} }
if err := notifyUser("Empty list", message); err != nil { showToast("Empty list", message)
logger.Error("failed to send notification", "error", err)
}
return return
} }
// Create a list primitive // Create a list primitive
@@ -119,9 +117,7 @@ func showAPILinkSelectionPopup() {
if len(apiLinks) == 0 { if len(apiLinks) == 0 {
logger.Warn("no API links available for selection") logger.Warn("no API links available for selection")
message := "No API links available. Please configure API links in your config file." message := "No API links available. Please configure API links in your config file."
if err := notifyUser("Empty list", message); err != nil { showToast("Empty list", message)
logger.Error("failed to send notification", "error", err)
}
return return
} }
// Create a list primitive // Create a list primitive
@@ -143,6 +139,7 @@ func showAPILinkSelectionPopup() {
apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) { apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
// Update the API in config // Update the API in config
cfg.CurrentAPI = mainText cfg.CurrentAPI = mainText
// updateToolCapabilities()
// Update model list based on new API // Update model list based on new API
// Helper function to get model list for a given API (same as in props_table.go) // Helper function to get model list for a given API (same as in props_table.go)
getModelListForAPI := func(api string) []string { getModelListForAPI := func(api string) []string {
@@ -160,8 +157,9 @@ func showAPILinkSelectionPopup() {
newModelList := getModelListForAPI(cfg.CurrentAPI) newModelList := getModelListForAPI(cfg.CurrentAPI)
// Ensure chatBody.Model is in the new list; if not, set to first available model // Ensure chatBody.Model is in the new list; if not, set to first available model
if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) { if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) {
chatBody.Model = newModelList[0] chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark)
cfg.CurrentModel = chatBody.Model cfg.CurrentModel = chatBody.Model
updateToolCapabilities()
} }
pages.RemovePage("apiLinkSelectionPopup") pages.RemovePage("apiLinkSelectionPopup")
app.SetFocus(textArea) app.SetFocus(textArea)
@@ -204,9 +202,7 @@ func showUserRoleSelectionPopup() {
if len(roles) == 0 { if len(roles) == 0 {
logger.Warn("no roles available for selection") logger.Warn("no roles available for selection")
message := "No roles available for selection." message := "No roles available for selection."
if err := notifyUser("Empty list", message); err != nil { showToast("Empty list", message)
logger.Error("failed to send notification", "error", err)
}
return return
} }
// Create a list primitive // Create a list primitive
@@ -283,9 +279,7 @@ func showBotRoleSelectionPopup() {
if len(roles) == 0 { if len(roles) == 0 {
logger.Warn("no roles available for selection") logger.Warn("no roles available for selection")
message := "No roles available for selection." message := "No roles available for selection."
if err := notifyUser("Empty list", message); err != nil { showToast("Empty list", message)
logger.Error("failed to send notification", "error", err)
}
return return
} }
// Create a list primitive // Create a list primitive
@@ -404,6 +398,66 @@ func showShellFileCompletionPopup(filter string) {
app.SetFocus(widget) app.SetFocus(widget)
} }
func showTextAreaFileCompletionPopup(filter string) {
baseDir := cfg.FilePickerDir
if baseDir == "" {
baseDir = "."
}
complMatches := scanFiles(baseDir, filter)
if len(complMatches) == 0 {
return
}
if len(complMatches) == 1 {
currentText := textArea.GetText()
atIdx := strings.LastIndex(currentText, "@")
if atIdx >= 0 {
before := currentText[:atIdx]
textArea.SetText(before+complMatches[0], true)
}
return
}
widget := tview.NewList().ShowSecondaryText(false).
SetSelectedBackgroundColor(tcell.ColorGray)
widget.SetTitle("file completion").SetBorder(true)
for _, m := range complMatches {
widget.AddItem(m, "", 0, nil)
}
widget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
currentText := textArea.GetText()
atIdx := strings.LastIndex(currentText, "@")
if atIdx >= 0 {
before := currentText[:atIdx]
textArea.SetText(before+mainText, true)
}
pages.RemovePage("textAreaFileCompletionPopup")
app.SetFocus(textArea)
})
widget.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
if event.Key() == tcell.KeyEscape {
pages.RemovePage("textAreaFileCompletionPopup")
app.SetFocus(textArea)
return nil
}
if event.Key() == tcell.KeyRune && event.Rune() == 'x' {
pages.RemovePage("textAreaFileCompletionPopup")
app.SetFocus(textArea)
return nil
}
return event
})
modal := func(p tview.Primitive, width, height int) tview.Primitive {
return tview.NewFlex().
AddItem(nil, 0, 1, false).
AddItem(tview.NewFlex().SetDirection(tview.FlexRow).
AddItem(nil, 0, 1, false).
AddItem(p, height, 1, true).
AddItem(nil, 0, 1, false), width, 1, true).
AddItem(nil, 0, 1, false)
}
pages.AddPage("textAreaFileCompletionPopup", modal(widget, 80, 20), true, true)
app.SetFocus(widget)
}
func updateWidgetColors(theme *tview.Theme) { func updateWidgetColors(theme *tview.Theme) {
bgColor := theme.PrimitiveBackgroundColor bgColor := theme.PrimitiveBackgroundColor
fgColor := theme.PrimaryTextColor fgColor := theme.PrimaryTextColor
@@ -450,9 +504,7 @@ func showColorschemeSelectionPopup() {
if len(schemeNames) == 0 { if len(schemeNames) == 0 {
logger.Warn("no colorschemes available for selection") logger.Warn("no colorschemes available for selection")
message := "No colorschemes available." message := "No colorschemes available."
if err := notifyUser("Empty list", message); err != nil { showToast("Empty list", message)
logger.Error("failed to send notification", "error", err)
}
return return
} }
// Create a list primitive // Create a list primitive

View File

@@ -259,9 +259,7 @@ func makePropsTable(props map[string]float32) *tview.Table {
// Handle nil options // Handle nil options
if data.Options == nil { if data.Options == nil {
logger.Error("options list is nil for", "label", label) logger.Error("options list is nil for", "label", label)
if err := notifyUser("Configuration error", "Options list is nil for "+label); err != nil { showToast("Configuration error", "Options list is nil for "+label)
logger.Error("failed to send notification", "error", err)
}
return return
} }
@@ -279,9 +277,7 @@ func makePropsTable(props map[string]float32) *tview.Table {
message = "No llama.cpp models loaded. Ensure llama.cpp server is running with models." message = "No llama.cpp models loaded. Ensure llama.cpp server is running with models."
} }
} }
if err := notifyUser("Empty list", message); err != nil { showToast("Empty list", message)
logger.Error("failed to send notification", "error", err)
}
return return
} }
// Create a list primitive // Create a list primitive

View File

@@ -168,8 +168,3 @@ func copyToClipboard(text string) error {
cmd.Stdin = strings.NewReader(text) cmd.Stdin = strings.NewReader(text)
return cmd.Run() return cmd.Run()
} }
func notifyUser(topic, message string) error {
cmd := exec.Command("notify-send", topic, message)
return cmd.Run()
}

View File

@@ -10,16 +10,18 @@ import (
//go:embed migrations/* //go:embed migrations/*
var migrationsFS embed.FS var migrationsFS embed.FS
func (p *ProviderSQL) Migrate() { func (p *ProviderSQL) Migrate() error {
// Get the embedded filesystem // Get the embedded filesystem
migrationsDir, err := fs.Sub(migrationsFS, "migrations") migrationsDir, err := fs.Sub(migrationsFS, "migrations")
if err != nil { if err != nil {
p.logger.Error("Failed to get embedded migrations directory;", "error", err) p.logger.Error("Failed to get embedded migrations directory;", "error", err)
return fmt.Errorf("failed to get embedded migrations directory: %w", err)
} }
// List all .up.sql files // List all .up.sql files
files, err := migrationsFS.ReadDir("migrations") files, err := migrationsFS.ReadDir("migrations")
if err != nil { if err != nil {
p.logger.Error("Failed to read migrations directory;", "error", err) p.logger.Error("Failed to read migrations directory;", "error", err)
return fmt.Errorf("failed to read migrations directory: %w", err)
} }
// Execute each .up.sql file // Execute each .up.sql file
for _, file := range files { for _, file := range files {
@@ -27,11 +29,12 @@ func (p *ProviderSQL) Migrate() {
err := p.executeMigration(migrationsDir, file.Name()) err := p.executeMigration(migrationsDir, file.Name())
if err != nil { if err != nil {
p.logger.Error("Failed to execute migration %s: %v", file.Name(), err) p.logger.Error("Failed to execute migration %s: %v", file.Name(), err)
panic(err) return fmt.Errorf("failed to execute migration %s: %w", file.Name(), err)
} }
} }
} }
p.logger.Debug("All migrations executed successfully!") p.logger.Debug("All migrations executed successfully!")
return nil
} }
func (p *ProviderSQL) executeMigration(migrationsDir fs.FS, fileName string) error { func (p *ProviderSQL) executeMigration(migrationsDir fs.FS, fileName string) error {

View File

@@ -103,7 +103,10 @@ func NewProviderSQL(dbPath string, logger *slog.Logger) FullRepo {
return nil return nil
} }
p := ProviderSQL{db: db, logger: logger} p := ProviderSQL{db: db, logger: logger}
p.Migrate() if err := p.Migrate(); err != nil {
logger.Error("migration failed, app cannot start", "error", err)
return nil
}
return p return p
} }

View File

@@ -147,9 +147,7 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
if err := store.RemoveChat(sc.ID); err != nil { if err := store.RemoveChat(sc.ID); err != nil {
logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name) logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name)
} }
if err := notifyUser("chat deleted", selectedChat+" was deleted"); err != nil { showToast("chat deleted", selectedChat+" was deleted")
logger.Error("failed to send notification", "error", err)
}
// load last chat // load last chat
chatBody.Messages = loadOldChatOrGetNew() chatBody.Messages = loadOldChatOrGetNew()
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys)) textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
@@ -159,27 +157,16 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
// save updated card // save updated card
fi := strings.Index(selectedChat, "_") fi := strings.Index(selectedChat, "_")
agentName := selectedChat[fi+1:] agentName := selectedChat[fi+1:]
cc, ok := sysMap[agentName] cc := GetCardByRole(agentName)
if !ok { if cc == nil {
logger.Warn("no such card", "agent", agentName) logger.Warn("no such card", "agent", agentName)
//no:lint showToast("error", "no such card: "+agentName)
if err := notifyUser("error", "no such card: "+agentName); err != nil {
logger.Warn("failed ot notify", "error", err)
}
return return
} }
// if chatBody.Messages[0].Role != "system" || chatBody.Messages[1].Role != agentName {
// if err := notifyUser("error", "unexpected chat structure; card: "+agentName); err != nil {
// logger.Warn("failed ot notify", "error", err)
// }
// return
// }
// change sys_prompt + first msg
cc.SysPrompt = chatBody.Messages[0].Content cc.SysPrompt = chatBody.Messages[0].Content
cc.FirstMsg = chatBody.Messages[1].Content cc.FirstMsg = chatBody.Messages[1].Content
if err := pngmeta.WriteToPng(cc.ToSpec(cfg.UserRole), cc.FilePath, cc.FilePath); err != nil { if err := pngmeta.WriteToPng(cc.ToSpec(cfg.UserRole), cc.FilePath, cc.FilePath); err != nil {
logger.Error("failed to write charcard", logger.Error("failed to write charcard", "error", err)
"error", err)
} }
return return
case "move sysprompt onto 1st msg": case "move sysprompt onto 1st msg":
@@ -190,33 +177,29 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
pages.RemovePage(historyPage) pages.RemovePage(historyPage)
return return
case "new_chat_from_card": case "new_chat_from_card":
// Reread card from file and start fresh chat
fi := strings.Index(selectedChat, "_") fi := strings.Index(selectedChat, "_")
agentName := selectedChat[fi+1:] agentName := selectedChat[fi+1:]
cc, ok := sysMap[agentName] cc := GetCardByRole(agentName)
if !ok { if cc == nil {
logger.Warn("no such card", "agent", agentName) logger.Warn("no such card", "agent", agentName)
if err := notifyUser("error", "no such card: "+agentName); err != nil { showToast("error", "no such card: "+agentName)
logger.Warn("failed to notify", "error", err)
}
return return
} }
// Reload card from disk
newCard, err := pngmeta.ReadCard(cc.FilePath, cfg.UserRole) newCard, err := pngmeta.ReadCard(cc.FilePath, cfg.UserRole)
if err != nil { if err != nil {
logger.Error("failed to reload charcard", "path", cc.FilePath, "error", err) logger.Error("failed to reload charcard", "path", cc.FilePath, "error", err)
newCard, err = pngmeta.ReadCardJson(cc.FilePath) newCard, err = pngmeta.ReadCardJson(cc.FilePath)
if err != nil { if err != nil {
logger.Error("failed to reload charcard", "path", cc.FilePath, "error", err) logger.Error("failed to reload charcard", "path", cc.FilePath, "error", err)
if err := notifyUser("error", "failed to reload card: "+cc.FilePath); err != nil { showToast("error", "failed to reload card: "+cc.FilePath)
logger.Warn("failed to notify", "error", err)
}
return return
} }
} }
// Update sysMap with fresh card data if newCard.ID == "" {
sysMap[agentName] = newCard newCard.ID = models.ComputeCardID(newCard.Role, newCard.FilePath)
// fetching sysprompt and first message anew from the card }
sysMap[newCard.ID] = newCard
roleToID[newCard.Role] = newCard.ID
startNewChat(false) startNewChat(false)
pages.RemovePage(historyPage) pages.RemovePage(historyPage)
return return
@@ -457,13 +440,13 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
go func() { go func() {
if err := ragger.LoadRAG(fpath); err != nil { if err := ragger.LoadRAG(fpath); err != nil {
logger.Error("failed to embed file", "chat", fpath, "error", err) logger.Error("failed to embed file", "chat", fpath, "error", err)
_ = notifyUser("RAG", "failed to embed file; error: "+err.Error()) showToast("RAG", "failed to embed file; error: "+err.Error())
app.QueueUpdate(func() { app.QueueUpdate(func() {
pages.RemovePage(RAGPage) pages.RemovePage(RAGPage)
}) })
return return
} }
_ = notifyUser("RAG", "file loaded successfully") showToast("RAG", "file loaded successfully")
app.QueueUpdate(func() { app.QueueUpdate(func() {
pages.RemovePage(RAGPage) pages.RemovePage(RAGPage)
}) })
@@ -474,13 +457,13 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
go func() { go func() {
if err := ragger.RemoveFile(f.name); err != nil { if err := ragger.RemoveFile(f.name); err != nil {
logger.Error("failed to unload file from RAG", "filename", f.name, "error", err) logger.Error("failed to unload file from RAG", "filename", f.name, "error", err)
_ = notifyUser("RAG", "failed to unload file; error: "+err.Error()) showToast("RAG", "failed to unload file; error: "+err.Error())
app.QueueUpdate(func() { app.QueueUpdate(func() {
pages.RemovePage(RAGPage) pages.RemovePage(RAGPage)
}) })
return return
} }
_ = notifyUser("RAG", "file unloaded successfully") showToast("RAG", "file unloaded successfully")
app.QueueUpdate(func() { app.QueueUpdate(func() {
pages.RemovePage(RAGPage) pages.RemovePage(RAGPage)
}) })
@@ -492,9 +475,7 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
logger.Error("failed to delete file", "filename", fpath, "error", err) logger.Error("failed to delete file", "filename", fpath, "error", err)
return return
} }
if err := notifyUser("chat deleted", fpath+" was deleted"); err != nil { showToast("chat deleted", fpath+" was deleted")
logger.Error("failed to send notification", "error", err)
}
return return
default: default:
pages.RemovePage(RAGPage) pages.RemovePage(RAGPage)
@@ -529,8 +510,8 @@ func makeAgentTable(agentList []string) *tview.Table {
SetSelectable(false)) SetSelectable(false))
case 1: case 1:
if actions[c-1] == "filepath" { if actions[c-1] == "filepath" {
cc, ok := sysMap[agentList[r]] cc := GetCardByRole(agentList[r])
if !ok { if cc == nil {
continue continue
} }
chatActTable.SetCell(r, c, chatActTable.SetCell(r, c,
@@ -603,9 +584,7 @@ func makeAgentTable(agentList []string) *tview.Table {
if err := store.RemoveChat(sc.ID); err != nil { if err := store.RemoveChat(sc.ID); err != nil {
logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name) logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name)
} }
if err := notifyUser("chat deleted", selected+" was deleted"); err != nil { showToast("chat deleted", selected+" was deleted")
logger.Error("failed to send notification", "error", err)
}
pages.RemovePage(agentPage) pages.RemovePage(agentPage)
return return
default: default:
@@ -676,13 +655,9 @@ func makeCodeBlockTable(codeBlocks []string) *tview.Table {
switch tc.Text { switch tc.Text {
case "copy": case "copy":
if err := copyToClipboard(selected); err != nil { if err := copyToClipboard(selected); err != nil {
if err := notifyUser("error", err.Error()); err != nil { showToast("error", err.Error())
logger.Error("failed to send notification", "error", err)
}
}
if err := notifyUser("copied", selected); err != nil {
logger.Error("failed to send notification", "error", err)
} }
showToast("copied", selected)
pages.RemovePage(codeBlockPage) pages.RemovePage(codeBlockPage)
app.SetFocus(textArea) app.SetFocus(textArea)
return return
@@ -775,9 +750,7 @@ func makeImportChatTable(filenames []string) *tview.Table {
if err := store.RemoveChat(sc.ID); err != nil { if err := store.RemoveChat(sc.ID); err != nil {
logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name) logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name)
} }
if err := notifyUser("chat deleted", selected+" was deleted"); err != nil { showToast("chat deleted", selected+" was deleted")
logger.Error("failed to send notification", "error", err)
}
pages.RemovePage(historyPage) pages.RemovePage(historyPage)
return return
default: default:

774
tools.go
View File

@@ -77,12 +77,17 @@ Your current tools:
{ {
"name":"file_create", "name":"file_create",
"args": ["path", "content"], "args": ["path", "content"],
"when_to_use": "when asked to create a new file with optional content" "when_to_use": "when there is a need to create a new file with optional content"
}, },
{ {
"name":"file_read", "name":"file_read",
"args": ["path"], "args": ["path"],
"when_to_use": "when asked to read the content of a file" "when_to_use": "when you need to read the content of a file"
},
{
"name":"file_read_image",
"args": ["path"],
"when_to_use": "when you need to read or view an image file"
}, },
{ {
"name":"file_write", "name":"file_write",
@@ -92,7 +97,7 @@ Your current tools:
{ {
"name":"file_write_append", "name":"file_write_append",
"args": ["path", "content"], "args": ["path", "content"],
"when_to_use": "when asked to append content to a file; use sed to edit content" "when_to_use": "when you need append content to a file; use sed to edit content"
}, },
{ {
"name":"file_edit", "name":"file_edit",
@@ -107,22 +112,22 @@ Your current tools:
{ {
"name":"file_move", "name":"file_move",
"args": ["src", "dst"], "args": ["src", "dst"],
"when_to_use": "when asked to move a file from source to destination" "when_to_use": "when you need to move a file from source to destination"
}, },
{ {
"name":"file_copy", "name":"file_copy",
"args": ["src", "dst"], "args": ["src", "dst"],
"when_to_use": "when asked to copy a file from source to destination" "when_to_use": "copy a file from source to destination"
}, },
{ {
"name":"file_list", "name":"file_list",
"args": ["path"], "args": ["path"],
"when_to_use": "when asked to list files in a directory; path is optional (default: current directory)" "when_to_use": "list files in a directory; path is optional (default: current directory)"
}, },
{ {
"name":"execute_command", "name":"execute_command",
"args": ["command", "args"], "args": ["command", "args"],
"when_to_use": "when asked to execute a system command; args is optional; allowed commands: grep, sed, awk, find, cat, head, tail, sort, uniq, wc, ls, echo, cut, tr, cp, mv, rm, mkdir, rmdir, pwd, df, free, ps, top, du, whoami, date, uname, go" "when_to_use": "execute a system command; args is optional; allowed commands: grep, sed, awk, find, cat, head, tail, sort, uniq, wc, ls, echo, cut, tr, cp, mv, rm, mkdir, rmdir, pwd, df, free, ps, top, du, whoami, date, uname, go"
} }
] ]
</tools> </tools>
@@ -157,40 +162,133 @@ After that you are free to respond to the user.
readURLSysPrompt = `Extract and summarize the content from the webpage. Provide key information, main points, and any relevant details.` readURLSysPrompt = `Extract and summarize the content from the webpage. Provide key information, main points, and any relevant details.`
summarySysPrompt = `Please provide a concise summary of the following conversation. Focus on key points, decisions, and actions. Provide only the summary, no additional commentary.` summarySysPrompt = `Please provide a concise summary of the following conversation. Focus on key points, decisions, and actions. Provide only the summary, no additional commentary.`
basicCard = &models.CharCard{ basicCard = &models.CharCard{
ID: models.ComputeCardID("assistant", "basic_sys"),
SysPrompt: basicSysMsg, SysPrompt: basicSysMsg,
FirstMsg: defaultFirstMsg, FirstMsg: defaultFirstMsg,
Role: "", Role: "assistant",
FilePath: "", FilePath: "basic_sys",
} }
sysMap = map[string]*models.CharCard{"basic_sys": basicCard} sysMap = map[string]*models.CharCard{}
sysLabels = []string{"basic_sys"} roleToID = map[string]string{}
sysLabels = []string{"assistant"}
webAgentClient *agent.AgentClient webAgentClient *agent.AgentClient
webAgentClientOnce sync.Once webAgentClientOnce sync.Once
webAgentsOnce sync.Once webAgentsOnce sync.Once
) )
var windowToolSysMsg = `
Additional window tools (available only if xdotool and maim are installed):
[
{
"name":"list_windows",
"args": [],
"when_to_use": "when asked to list visible windows; returns map of window ID to window name"
},
{
"name":"capture_window",
"args": ["window"],
"when_to_use": "when asked to take a screenshot of a specific window; saves to /tmp; window can be ID or name substring; returns file path"
},
{
"name":"capture_window_and_view",
"args": ["window"],
"when_to_use": "when asked to take a screenshot of a specific window and show it; saves to /tmp and returns image for viewing; window can be ID or name substring"
}
]
`
var WebSearcher searcher.WebSurfer var WebSearcher searcher.WebSurfer
var (
windowToolsAvailable bool
xdotoolPath string
maimPath string
modelHasVision bool
)
func init() { func init() {
sysMap[basicCard.ID] = basicCard
roleToID["assistant"] = basicCard.ID
sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "") sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
if err != nil { if err != nil {
panic("failed to init seachagent; error: " + err.Error()) if logger != nil {
logger.Warn("search agent unavailable; web_search tool disabled", "error", err)
} }
WebSearcher = nil
} else {
WebSearcher = sa WebSearcher = sa
}
if err := rag.Init(cfg, logger, store); err != nil { if err := rag.Init(cfg, logger, store); err != nil {
logger.Warn("failed to init rag; rag_search tool will not be available", "error", err) logger.Warn("failed to init rag; rag_search tool will not be available", "error", err)
} }
checkWindowTools()
registerWindowTools()
}
func GetCardByRole(role string) *models.CharCard {
cardID, ok := roleToID[role]
if !ok {
return nil
}
return sysMap[cardID]
}
func checkWindowTools() {
xdotoolPath, _ = exec.LookPath("xdotool")
maimPath, _ = exec.LookPath("maim")
windowToolsAvailable = xdotoolPath != "" && maimPath != ""
if windowToolsAvailable {
logger.Info("window tools available: xdotool and maim found")
} else {
if xdotoolPath == "" {
logger.Warn("xdotool not found, window listing tools will not be available")
}
if maimPath == "" {
logger.Warn("maim not found, window capture tools will not be available")
}
}
}
func updateToolCapabilities() {
if !cfg.ToolUse {
return
}
modelHasVision = false
if cfg == nil || cfg.CurrentAPI == "" {
logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil")
registerWindowTools()
registerPlaywrightTools()
return
}
prevHasVision := modelHasVision
modelHasVision = ModelHasVision(cfg.CurrentAPI, cfg.CurrentModel)
if modelHasVision {
logger.Info("model has vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
} else {
logger.Info("model does not have vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
if windowToolsAvailable && !prevHasVision && !modelHasVision {
showToast("window tools", "Window capture-and-view unavailable: model lacks vision support")
}
}
registerWindowTools()
registerPlaywrightTools()
} }
// getWebAgentClient returns a singleton AgentClient for web agents. // getWebAgentClient returns a singleton AgentClient for web agents.
func getWebAgentClient() *agent.AgentClient { func getWebAgentClient() *agent.AgentClient {
webAgentClientOnce.Do(func() { webAgentClientOnce.Do(func() {
if cfg == nil { if cfg == nil {
panic("cfg not initialized") if logger != nil {
logger.Warn("web agent client unavailable: config not initialized")
}
return
} }
if logger == nil { if logger == nil {
panic("logger not initialized") if logger != nil {
logger.Warn("web agent client unavailable: logger not initialized")
}
return
} }
getToken := func() string { getToken := func() string {
if chunkParser == nil { if chunkParser == nil {
@@ -469,6 +567,43 @@ func fileRead(args map[string]string) []byte {
return jsonResult return jsonResult
} }
func fileReadImage(args map[string]string) []byte {
path, ok := args["path"]
if !ok || path == "" {
msg := "path not provided to file_read_image tool"
logger.Error(msg)
return []byte(msg)
}
path = resolvePath(path)
dataURL, err := models.CreateImageURLFromPath(path)
if err != nil {
msg := "failed to read image; error: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
// result := map[string]any{
// "type": "multimodal_content",
// "parts": []map[string]string{
// {"type": "text", "text": "Image at " + path},
// {"type": "image_url", "url": dataURL},
// },
// }
result := models.MultimodalToolResp{
Type: "multimodal_content",
Parts: []map[string]string{
{"type": "text", "text": "Image at " + path},
{"type": "image_url", "url": dataURL},
},
}
jsonResult, err := json.Marshal(result)
if err != nil {
msg := "failed to marshal result; error: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return jsonResult
}
func fileWrite(args map[string]string) []byte { func fileWrite(args map[string]string) []byte {
path, ok := args["path"] path, ok := args["path"]
if !ok || path == "" { if !ok || path == "" {
@@ -1088,6 +1223,142 @@ func summarizeChat(args map[string]string) []byte {
return []byte(chatText) return []byte(chatText)
} }
func windowIDToHex(decimalID string) string {
id, err := strconv.ParseInt(decimalID, 10, 64)
if err != nil {
return decimalID
}
return fmt.Sprintf("0x%x", id)
}
func listWindows(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
cmd := exec.Command(xdotoolPath, "search", "--name", ".")
output, err := cmd.Output()
if err != nil {
msg := "failed to list windows: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
windowIDs := strings.Fields(string(output))
windows := make(map[string]string)
for _, id := range windowIDs {
id = strings.TrimSpace(id)
if id == "" {
continue
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", id)
nameOutput, err := nameCmd.Output()
if err != nil {
continue
}
name := strings.TrimSpace(string(nameOutput))
windows[id] = name
}
data, err := json.Marshal(windows)
if err != nil {
msg := "failed to marshal window list: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return data
}
func captureWindow(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
window, ok := args["window"]
if !ok || window == "" {
return []byte("window parameter required (window ID or name)")
}
var windowID string
if _, err := strconv.Atoi(window); err == nil {
windowID = window
} else {
cmd := exec.Command(xdotoolPath, "search", "--name", window)
output, err := cmd.Output()
if err != nil || len(strings.Fields(string(output))) == 0 {
return []byte("window not found: " + window)
}
windowID = strings.Fields(string(output))[0]
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
nameOutput, _ := nameCmd.Output()
windowName := strings.TrimSpace(string(nameOutput))
windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
if windowName == "" {
windowName = "window"
}
timestamp := time.Now().Unix()
filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
cmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
if err := cmd.Run(); err != nil {
msg := "failed to capture window: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return []byte("screenshot saved: " + filename)
}
func captureWindowAndView(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
window, ok := args["window"]
if !ok || window == "" {
return []byte("window parameter required (window ID or name)")
}
var windowID string
if _, err := strconv.Atoi(window); err == nil {
windowID = window
} else {
cmd := exec.Command(xdotoolPath, "search", "--name", window)
output, err := cmd.Output()
if err != nil || len(strings.Fields(string(output))) == 0 {
return []byte("window not found: " + window)
}
windowID = strings.Fields(string(output))[0]
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
nameOutput, _ := nameCmd.Output()
windowName := strings.TrimSpace(string(nameOutput))
windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
if windowName == "" {
windowName = "window"
}
timestamp := time.Now().Unix()
filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
captureCmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
if err := captureCmd.Run(); err != nil {
msg := "failed to capture window: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
dataURL, err := models.CreateImageURLFromPath(filename)
if err != nil {
msg := "failed to create image URL: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
result := models.MultimodalToolResp{
Type: "multimodal_content",
Parts: []map[string]string{
{"type": "text", "text": "Screenshot saved: " + filename},
{"type": "image_url", "url": dataURL},
},
}
jsonResult, err := json.Marshal(result)
if err != nil {
msg := "failed to marshal result: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return jsonResult
}
type fnSig func(map[string]string) []byte type fnSig func(map[string]string) []byte
var fnMap = map[string]fnSig{ var fnMap = map[string]fnSig{
@@ -1101,6 +1372,7 @@ var fnMap = map[string]fnSig{
"read_url_raw": readURLRaw, "read_url_raw": readURLRaw,
"file_create": fileCreate, "file_create": fileCreate,
"file_read": fileRead, "file_read": fileRead,
"file_read_image": fileReadImage,
"file_write": fileWrite, "file_write": fileWrite,
"file_write_append": fileWriteAppend, "file_write_append": fileWriteAppend,
"file_edit": fileEdit, "file_edit": fileEdit,
@@ -1116,6 +1388,409 @@ var fnMap = map[string]fnSig{
"summarize_chat": summarizeChat, "summarize_chat": summarizeChat,
} }
func removeWindowToolsFromBaseTools() {
windowToolNames := map[string]bool{
"list_windows": true,
"capture_window": true,
"capture_window_and_view": true,
}
var filtered []models.Tool
for _, tool := range baseTools {
if !windowToolNames[tool.Function.Name] {
filtered = append(filtered, tool)
}
}
baseTools = filtered
delete(fnMap, "list_windows")
delete(fnMap, "capture_window")
delete(fnMap, "capture_window_and_view")
}
func removePlaywrightToolsFromBaseTools() {
playwrightToolNames := map[string]bool{
"pw_start": true,
"pw_stop": true,
"pw_is_running": true,
"pw_navigate": true,
"pw_click": true,
"pw_click_at": true,
"pw_fill": true,
"pw_extract_text": true,
"pw_screenshot": true,
"pw_screenshot_and_view": true,
"pw_wait_for_selector": true,
"pw_drag": true,
}
var filtered []models.Tool
for _, tool := range baseTools {
if !playwrightToolNames[tool.Function.Name] {
filtered = append(filtered, tool)
}
}
baseTools = filtered
delete(fnMap, "pw_start")
delete(fnMap, "pw_stop")
delete(fnMap, "pw_is_running")
delete(fnMap, "pw_navigate")
delete(fnMap, "pw_click")
delete(fnMap, "pw_click_at")
delete(fnMap, "pw_fill")
delete(fnMap, "pw_extract_text")
delete(fnMap, "pw_screenshot")
delete(fnMap, "pw_screenshot_and_view")
delete(fnMap, "pw_wait_for_selector")
delete(fnMap, "pw_drag")
}
func registerWindowTools() {
removeWindowToolsFromBaseTools()
if windowToolsAvailable {
fnMap["list_windows"] = listWindows
fnMap["capture_window"] = captureWindow
windowTools := []models.Tool{
{
Type: "function",
Function: models.ToolFunc{
Name: "list_windows",
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window",
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
}
if modelHasVision {
fnMap["capture_window_and_view"] = captureWindowAndView
windowTools = append(windowTools, models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window_and_view",
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
})
}
baseTools = append(baseTools, windowTools...)
toolSysMsg += windowToolSysMsg
}
}
func registerPlaywrightTools() {
removePlaywrightToolsFromBaseTools()
if cfg != nil && cfg.PlaywrightEnabled {
fnMap["pw_start"] = pwStart
fnMap["pw_stop"] = pwStop
fnMap["pw_is_running"] = pwIsRunning
fnMap["pw_navigate"] = pwNavigate
fnMap["pw_click"] = pwClick
fnMap["pw_click_at"] = pwClickAt
fnMap["pw_fill"] = pwFill
fnMap["pw_extract_text"] = pwExtractText
fnMap["pw_screenshot"] = pwScreenshot
fnMap["pw_screenshot_and_view"] = pwScreenshotAndView
fnMap["pw_wait_for_selector"] = pwWaitForSelector
fnMap["pw_drag"] = pwDrag
fnMap["pw_get_html"] = pwGetHTML
fnMap["pw_get_dom"] = pwGetDOM
fnMap["pw_search_elements"] = pwSearchElements
playwrightTools := []models.Tool{
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_start",
Description: "Start a Playwright browser instance. Call this first before using other pw_ tools. Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_stop",
Description: "Stop the Playwright browser instance. Call when done with browser automation.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_is_running",
Description: "Check if Playwright browser is currently running.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_navigate",
Description: "Navigate to a URL in the browser.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"url"},
Properties: map[string]models.ToolArgProps{
"url": models.ToolArgProps{
Type: "string",
Description: "URL to navigate to",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_click",
Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"selector"},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector for the element to click",
},
"index": models.ToolArgProps{
Type: "string",
Description: "optional index for multiple matches (default 0)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_fill",
Description: "Fill an input field with text using CSS selector.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"selector", "text"},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector for the input element",
},
"text": models.ToolArgProps{
Type: "string",
Description: "text to fill into the input",
},
"index": models.ToolArgProps{
Type: "string",
Description: "optional index for multiple matches (default 0)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_extract_text",
Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"selector"},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector (use 'body' for all page text)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_screenshot",
Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector for element to screenshot",
},
"full_page": models.ToolArgProps{
Type: "string",
Description: "optional: 'true' to capture full page (default false)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_screenshot_and_view",
Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector for element to screenshot",
},
"full_page": models.ToolArgProps{
Type: "string",
Description: "optional: 'true' to capture full page (default false)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_wait_for_selector",
Description: "Wait for an element to appear on the page.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"selector"},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector to wait for",
},
"timeout": models.ToolArgProps{
Type: "string",
Description: "optional timeout in ms (default 30000)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_drag",
Description: "Drag the mouse from one point to another.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"x1", "y1", "x2", "y2"},
Properties: map[string]models.ToolArgProps{
"x1": models.ToolArgProps{
Type: "string",
Description: "starting X coordinate",
},
"y1": models.ToolArgProps{
Type: "string",
Description: "starting Y coordinate",
},
"x2": models.ToolArgProps{
Type: "string",
Description: "ending X coordinate",
},
"y2": models.ToolArgProps{
Type: "string",
Description: "ending Y coordinate",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_get_html",
Description: "Get the HTML content of the page or a specific element.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector (default: body)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_get_dom",
Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector (default: body)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_search_elements",
Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"text": models.ToolArgProps{
Type: "string",
Description: "text to search for in elements",
},
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector to search for",
},
},
},
},
},
}
baseTools = append(baseTools, playwrightTools...)
toolSysMsg += browserToolSysMsg
}
}
// callToolWithAgent calls the tool and applies any registered agent. // callToolWithAgent calls the tool and applies any registered agent.
func callToolWithAgent(name string, args map[string]string) []byte { func callToolWithAgent(name string, args map[string]string) []byte {
registerWebAgents() registerWebAgents()
@@ -1327,6 +2002,24 @@ var baseTools = []models.Tool{
}, },
}, },
}, },
// file_read_image
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "file_read_image",
Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"path"},
Properties: map[string]models.ToolArgProps{
"path": models.ToolArgProps{
Type: "string",
Description: "path of the image file to read",
},
},
},
},
},
// file_write // file_write
models.Tool{ models.Tool{
Type: "function", Type: "function",
@@ -1580,3 +2273,56 @@ var baseTools = []models.Tool{
}, },
}, },
} }
func init() {
if windowToolsAvailable {
baseTools = append(baseTools,
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "list_windows",
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window",
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window_and_view",
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
)
}
}

653
tools_playwright.go Normal file
View File

@@ -0,0 +1,653 @@
package main
import (
"encoding/json"
"fmt"
"gf-lt/models"
"os"
"strconv"
"strings"
"sync"
"github.com/playwright-community/playwright-go"
)
var browserToolSysMsg = `
Additional browser automation tools (Playwright):
[
{
"name": "pw_start",
"args": [],
"when_to_use": "start a browser instance before doing any browser automation. Must be called first."
},
{
"name": "pw_stop",
"args": [],
"when_to_use": "stop the browser instance when done with automation."
},
{
"name": "pw_is_running",
"args": [],
"when_to_use": "check if browser is currently running."
},
{
"name": "pw_navigate",
"args": ["url"],
"when_to_use": "open a specific URL in the web browser."
},
{
"name": "pw_click",
"args": ["selector", "index"],
"when_to_use": "click on an element on the current webpage. Use 'index' for multiple matches (default 0)."
},
{
"name": "pw_fill",
"args": ["selector", "text", "index"],
"when_to_use": "type text into an input field. Use 'index' for multiple matches (default 0)."
},
{
"name": "pw_extract_text",
"args": ["selector"],
"when_to_use": "extract text content from the page or specific elements. Use selector 'body' for all page text."
},
{
"name": "pw_screenshot",
"args": ["selector", "full_page"],
"when_to_use": "take a screenshot of the page or a specific element. Returns a file path to the image. Use to verify actions or inspect visual state."
},
{
"name": "pw_screenshot_and_view",
"args": ["selector", "full_page"],
"when_to_use": "take a screenshot and return the image for viewing. Use to visually verify page state."
},
{
"name": "pw_wait_for_selector",
"args": ["selector", "timeout"],
"when_to_use": "wait for an element to appear on the page before proceeding with further actions."
},
{
"name": "pw_drag",
"args": ["x1", "y1", "x2", "y2"],
"when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)."
},
{
"name": "pw_click_at",
"args": ["x", "y"],
"when_to_use": "click at specific X,Y coordinates on the page. Use when you know the exact position."
},
{
"name": "pw_get_html",
"args": ["selector"],
"when_to_use": "get the HTML content of the page or a specific element. Use to understand page structure or extract raw HTML."
},
{
"name": "pw_get_dom",
"args": ["selector"],
"when_to_use": "get a structured DOM representation with tag, attributes, text, and children. Use to inspect element hierarchy and properties."
},
{
"name": "pw_search_elements",
"args": ["text", "selector"],
"when_to_use": "search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML."
}
]
`
var (
pw *playwright.Playwright
browser playwright.Browser
browserStarted bool
browserStartMu sync.Mutex
page playwright.Page
)
func pwShutDown() error {
if pw == nil {
return nil
}
pwStop(nil)
return pw.Stop()
}
func installPW() error {
err := playwright.Install(&playwright.RunOptions{Verbose: false})
if err != nil {
logger.Warn("playwright not available", "error", err)
return err
}
return nil
}
func checkPlaywright() error {
var err error
pw, err = playwright.Run()
if err != nil {
logger.Warn("playwright not available", "error", err)
return err
}
return nil
}
func pwStart(args map[string]string) []byte {
browserStartMu.Lock()
defer browserStartMu.Unlock()
if browserStarted {
return []byte(`{"error": "Browser already started"}`)
}
var err error
browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(!cfg.PlaywrightDebug),
})
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error()))
}
page, err = browser.NewPage()
if err != nil {
browser.Close()
return []byte(fmt.Sprintf(`{"error": "failed to create page: %s"}`, err.Error()))
}
browserStarted = true
return []byte(`{"success": true, "message": "Browser started"}`)
}
func pwStop(args map[string]string) []byte {
browserStartMu.Lock()
defer browserStartMu.Unlock()
if !browserStarted {
return []byte(`{"success": true, "message": "Browser was not running"}`)
}
if page != nil {
page.Close()
page = nil
}
if browser != nil {
browser.Close()
browser = nil
}
browserStarted = false
return []byte(`{"success": true, "message": "Browser stopped"}`)
}
func pwIsRunning(args map[string]string) []byte {
if browserStarted {
return []byte(`{"running": true, "message": "Browser is running"}`)
}
return []byte(`{"running": false, "message": "Browser is not running"}`)
}
func pwNavigate(args map[string]string) []byte {
url, ok := args["url"]
if !ok || url == "" {
return []byte(`{"error": "url not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
_, err := page.Goto(url)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to navigate: %s"}`, err.Error()))
}
title, _ := page.Title()
pageURL := page.URL()
return []byte(fmt.Sprintf(`{"success": true, "title": "%s", "url": "%s"}`, title, pageURL))
}
func pwClick(args map[string]string) []byte {
selector, ok := args["selector"]
if !ok || selector == "" {
return []byte(`{"error": "selector not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
index := 0
if args["index"] != "" {
if i, err := strconv.Atoi(args["index"]); err != nil {
logger.Warn("failed to parse index", "value", args["index"], "error", err)
} else {
index = i
}
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if index >= count {
return []byte(fmt.Sprintf(`{"error": "Element not found at index %d (found %d elements)"}`, index, count))
}
err = locator.Nth(index).Click()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
}
return []byte(`{"success": true, "message": "Clicked element"}`)
}
func pwFill(args map[string]string) []byte {
selector, ok := args["selector"]
if !ok || selector == "" {
return []byte(`{"error": "selector not provided"}`)
}
text := args["text"]
if text == "" {
text = ""
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
index := 0
if args["index"] != "" {
if i, err := strconv.Atoi(args["index"]); err != nil {
logger.Warn("failed to parse index", "value", args["index"], "error", err)
} else {
index = i
}
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if index >= count {
return []byte(fmt.Sprintf(`{"error": "Element not found at index %d"}`, index))
}
err = locator.Nth(index).Fill(text)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to fill: %s"}`, err.Error()))
}
return []byte(`{"success": true, "message": "Filled input"}`)
}
func pwExtractText(args map[string]string) []byte {
selector := args["selector"]
if selector == "" {
selector = "body"
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"error": "No elements found"}`)
}
if selector == "body" {
text, err := page.Locator("body").TextContent()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to get text: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"text": "%s"}`, text))
}
var texts []string
for i := 0; i < count; i++ {
text, err := locator.Nth(i).TextContent()
if err != nil {
continue
}
texts = append(texts, text)
}
return []byte(fmt.Sprintf(`{"text": "%s"}`, joinLines(texts)))
}
func joinLines(lines []string) string {
var sb strings.Builder
for i, line := range lines {
if i > 0 {
sb.WriteString("\n")
}
sb.WriteString(line)
}
return sb.String()
}
func pwScreenshot(args map[string]string) []byte {
selector := args["selector"]
fullPage := args["full_page"] == "true"
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
var err error
if selector != "" && selector != "body" {
locator := page.Locator(selector)
_, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
Path: playwright.String(path),
})
} else {
_, err = page.Screenshot(playwright.PageScreenshotOptions{
Path: playwright.String(path),
FullPage: playwright.Bool(fullPage),
})
}
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"path": "%s"}`, path))
}
func pwScreenshotAndView(args map[string]string) []byte {
selector := args["selector"]
fullPage := args["full_page"] == "true"
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
var err error
if selector != "" && selector != "body" {
locator := page.Locator(selector)
_, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
Path: playwright.String(path),
})
} else {
_, err = page.Screenshot(playwright.PageScreenshotOptions{
Path: playwright.String(path),
FullPage: playwright.Bool(fullPage),
})
}
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
}
dataURL, err := models.CreateImageURLFromPath(path)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to create image URL: %s"}`, err.Error()))
}
resp := models.MultimodalToolResp{
Type: "multimodal_content",
Parts: []map[string]string{
{"type": "text", "text": "Screenshot saved: " + path},
{"type": "image_url", "url": dataURL},
},
}
jsonResult, err := json.Marshal(resp)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to marshal result: %s"}`, err.Error()))
}
return jsonResult
}
func pwWaitForSelector(args map[string]string) []byte {
selector, ok := args["selector"]
if !ok || selector == "" {
return []byte(`{"error": "selector not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
timeout := 30000
if args["timeout"] != "" {
if t, err := strconv.Atoi(args["timeout"]); err != nil {
logger.Warn("failed to parse timeout", "value", args["timeout"], "error", err)
} else {
timeout = t
}
}
locator := page.Locator(selector)
err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(float64(timeout)),
})
if err != nil {
return []byte(fmt.Sprintf(`{"error": "element not found: %s"}`, err.Error()))
}
return []byte(`{"success": true, "message": "Element found"}`)
}
func pwDrag(args map[string]string) []byte {
x1, ok := args["x1"]
if !ok {
return []byte(`{"error": "x1 not provided"}`)
}
y1, ok := args["y1"]
if !ok {
return []byte(`{"error": "y1 not provided"}`)
}
x2, ok := args["x2"]
if !ok {
return []byte(`{"error": "x2 not provided"}`)
}
y2, ok := args["y2"]
if !ok {
return []byte(`{"error": "y2 not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
var fx1, fy1, fx2, fy2 float64
if parsedX1, err := strconv.ParseFloat(x1, 64); err != nil {
logger.Warn("failed to parse x1", "value", x1, "error", err)
} else {
fx1 = parsedX1
}
if parsedY1, err := strconv.ParseFloat(y1, 64); err != nil {
logger.Warn("failed to parse y1", "value", y1, "error", err)
} else {
fy1 = parsedY1
}
if parsedX2, err := strconv.ParseFloat(x2, 64); err != nil {
logger.Warn("failed to parse x2", "value", x2, "error", err)
} else {
fx2 = parsedX2
}
if parsedY2, err := strconv.ParseFloat(y2, 64); err != nil {
logger.Warn("failed to parse y2", "value", y2, "error", err)
} else {
fy2 = parsedY2
}
mouse := page.Mouse()
err := mouse.Move(fx1, fy1)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
}
err = mouse.Down()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error()))
}
err = mouse.Move(fx2, fy2)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
}
err = mouse.Up()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2))
}
func pwClickAt(args map[string]string) []byte {
x, ok := args["x"]
if !ok {
return []byte(`{"error": "x not provided"}`)
}
y, ok := args["y"]
if !ok {
return []byte(`{"error": "y not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
fx, err := strconv.ParseFloat(x, 64)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to parse x: %s"}`, err.Error()))
}
fy, err := strconv.ParseFloat(y, 64)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to parse y: %s"}`, err.Error()))
}
mouse := page.Mouse()
err = mouse.Click(fx, fy)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"success": true, "message": "Clicked at (%s,%s)"}`, x, y))
}
func pwGetHTML(args map[string]string) []byte {
selector := args["selector"]
if selector == "" {
selector = "body"
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"error": "No elements found"}`)
}
html, err := locator.First().InnerHTML()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html)))
}
type DOMElement struct {
Tag string `json:"tag,omitempty"`
Attributes map[string]string `json:"attributes,omitempty"`
Text string `json:"text,omitempty"`
Children []DOMElement `json:"children,omitempty"`
Selector string `json:"selector,omitempty"`
InnerHTML string `json:"innerHTML,omitempty"`
}
func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) {
var results []DOMElement
count, err := locator.Count()
if err != nil {
return nil, err
}
for i := 0; i < count; i++ {
el := locator.Nth(i)
dom, err := elementToDOM(el)
if err != nil {
continue
}
results = append(results, dom)
}
return results, nil
}
func elementToDOM(el playwright.Locator) (DOMElement, error) {
dom := DOMElement{}
tag, err := el.Evaluate(`el => el.nodeName`, nil)
if err == nil {
dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag))
}
attributes := make(map[string]string)
attrs, err := el.Evaluate(`el => {
let attrs = {};
for (let i = 0; i < el.attributes.length; i++) {
let attr = el.attributes[i];
attrs[attr.name] = attr.value;
}
return attrs;
}`, nil)
if err == nil {
if amap, ok := attrs.(map[string]any); ok {
for k, v := range amap {
if vs, ok := v.(string); ok {
attributes[k] = vs
}
}
}
}
if len(attributes) > 0 {
dom.Attributes = attributes
}
text, err := el.TextContent()
if err == nil && text != "" {
dom.Text = text
}
innerHTML, err := el.InnerHTML()
if err == nil && innerHTML != "" {
dom.InnerHTML = innerHTML
}
childCount, _ := el.Count()
if childCount > 0 {
childrenLocator := el.Locator("*")
children, err := buildDOMTree(childrenLocator)
if err == nil && len(children) > 0 {
dom.Children = children
}
}
return dom, nil
}
func pwGetDOM(args map[string]string) []byte {
selector := args["selector"]
if selector == "" {
selector = "body"
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"error": "No elements found"}`)
}
dom, err := elementToDOM(locator.First())
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error()))
}
data, err := json.Marshal(dom)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"dom": %s}`, string(data)))
}
func pwSearchElements(args map[string]string) []byte {
text := args["text"]
selector := args["selector"]
if text == "" && selector == "" {
return []byte(`{"error": "text or selector not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
var locator playwright.Locator
if text != "" {
locator = page.GetByText(text)
} else {
locator = page.Locator(selector)
}
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"elements": []}`)
}
var results []map[string]string
for i := 0; i < count; i++ {
el := locator.Nth(i)
tag, _ := el.Evaluate(`el => el.nodeName`, nil)
text, _ := el.TextContent()
html, _ := el.InnerHTML()
results = append(results, map[string]string{
"index": strconv.Itoa(i),
"tag": strings.ToLower(fmt.Sprintf("%v", tag)),
"text": text,
"html": html,
})
}
data, err := json.Marshal(results)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"elements": %s}`, string(data)))
}
func jsonString(s string) string {
b, _ := json.Marshal(s)
return string(b)
}

179
tui.go
View File

@@ -10,6 +10,7 @@ import (
"path" "path"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/gdamore/tcell/v2" "github.com/gdamore/tcell/v2"
"github.com/rivo/tview" "github.com/rivo/tview"
@@ -21,7 +22,6 @@ func isFullScreenPageActive() bool {
} }
var ( var (
app *tview.Application
pages *tview.Pages pages *tview.Pages
textArea *tview.TextArea textArea *tview.TextArea
editArea *tview.TextArea editArea *tview.TextArea
@@ -137,6 +137,55 @@ func setShellMode(enabled bool) {
}() }()
} }
// showToast displays a temporary message in the topright corner.
// It autohides after 3 seconds and disappears when clicked.
func showToast(title, message string) {
sanitize := func(s string, maxLen int) string {
sanitized := strings.Map(func(r rune) rune {
if r < 32 && r != '\t' {
return -1
}
return r
}, s)
if len(sanitized) > maxLen {
sanitized = sanitized[:maxLen-3] + "..."
}
return sanitized
}
title = sanitize(title, 50)
message = sanitize(message, 197)
notification := tview.NewTextView().
SetTextAlign(tview.AlignCenter).
SetDynamicColors(true).
SetRegions(true).
SetText(fmt.Sprintf("[yellow]%s[-]\n", message)).
SetChangedFunc(func() {
app.Draw()
})
notification.SetTitleAlign(tview.AlignLeft).
SetBorder(true).
SetTitle(title)
// Wrap it in a fullscreen Flex to position it in the topright corner.
// Outer Flex (row) pushes content to the top; inner Flex (column) pushes to the right.
background := tview.NewFlex().SetDirection(tview.FlexRow).
AddItem(nil, 0, 1, false). // top spacer
AddItem(tview.NewFlex().SetDirection(tview.FlexColumn).
AddItem(nil, 0, 1, false). // left spacer
AddItem(notification, 40, 1, true), // notification width 40
5, 1, false) // notification height 5
// Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
pages.AddPage(pageName, background, true, true)
// Autodismiss after 3 seconds.
time.AfterFunc(3*time.Second, func() {
app.QueueUpdateDraw(func() {
if pages.HasPage(pageName) {
pages.RemovePage(pageName)
}
})
})
}
func init() { func init() {
// Start background goroutine to update model color cache // Start background goroutine to update model color cache
startModelColorUpdater() startModelColorUpdater()
@@ -213,8 +262,7 @@ func init() {
pages.SwitchToPage("main") // or whatever your main page is named pages.SwitchToPage("main") // or whatever your main page is named
}) })
confirmModal.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey { confirmModal.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
switch event.Key() { if event.Key() == tcell.KeyRune {
case tcell.KeyRune:
switch event.Rune() { switch event.Rune() {
case 'y', 'Y': case 'y', 'Y':
persona := cfg.UserRole persona := cfg.UserRole
@@ -328,9 +376,7 @@ func init() {
defer colorText() defer colorText()
editedMsg := editArea.GetText() editedMsg := editArea.GetText()
if editedMsg == "" { if editedMsg == "" {
if err := notifyUser("edit", "no edit provided"); err != nil { showToast("edit", "no edit provided")
logger.Error("failed to send notification", "error", err)
}
pages.RemovePage(editMsgPage) pages.RemovePage(editMsgPage)
return nil return nil
} }
@@ -360,9 +406,7 @@ func init() {
case tcell.KeyEnter: case tcell.KeyEnter:
newRole := roleEditWindow.GetText() newRole := roleEditWindow.GetText()
if newRole == "" { if newRole == "" {
if err := notifyUser("edit", "no role provided"); err != nil { showToast("edit", "no role provided")
logger.Error("failed to send notification", "error", err)
}
pages.RemovePage(roleEditPage) pages.RemovePage(roleEditPage)
return return
} }
@@ -389,9 +433,7 @@ func init() {
siInt, err := strconv.Atoi(si) siInt, err := strconv.Atoi(si)
if err != nil { if err != nil {
logger.Error("failed to convert provided index", "error", err, "si", si) logger.Error("failed to convert provided index", "error", err, "si", si)
if err := notifyUser("cancel", "no index provided, copying user input"); err != nil { showToast("cancel", "no index provided, copying user input")
logger.Error("failed to send notification", "error", err)
}
if err := copyToClipboard(textArea.GetText()); err != nil { if err := copyToClipboard(textArea.GetText()); err != nil {
logger.Error("failed to copy to clipboard", "error", err) logger.Error("failed to copy to clipboard", "error", err)
} }
@@ -402,9 +444,7 @@ func init() {
if len(chatBody.Messages)-1 < selectedIndex || selectedIndex < 0 { if len(chatBody.Messages)-1 < selectedIndex || selectedIndex < 0 {
msg := "chosen index is out of bounds, will copy user input" msg := "chosen index is out of bounds, will copy user input"
logger.Warn(msg, "index", selectedIndex) logger.Warn(msg, "index", selectedIndex)
if err := notifyUser("error", msg); err != nil { showToast("error", msg)
logger.Error("failed to send notification", "error", err)
}
if err := copyToClipboard(textArea.GetText()); err != nil { if err := copyToClipboard(textArea.GetText()); err != nil {
logger.Error("failed to copy to clipboard", "error", err) logger.Error("failed to copy to clipboard", "error", err)
} }
@@ -430,9 +470,7 @@ func init() {
} }
previewLen := min(30, len(msgText)) previewLen := min(30, len(msgText))
notification := fmt.Sprintf("msg '%s' was copied to the clipboard", msgText[:previewLen]) notification := fmt.Sprintf("msg '%s' was copied to the clipboard", msgText[:previewLen])
if err := notifyUser("copied", notification); err != nil { showToast("copied", notification)
logger.Error("failed to send notification", "error", err)
}
hideIndexBar() // Hide overlay after copying hideIndexBar() // Hide overlay after copying
} }
return nil return nil
@@ -464,9 +502,7 @@ func init() {
logger.Error("failed to upsert chat", "error", err, "chat", currentChat) logger.Error("failed to upsert chat", "error", err, "chat", currentChat)
} }
notification := fmt.Sprintf("renamed chat to '%s'", activeChatName) notification := fmt.Sprintf("renamed chat to '%s'", activeChatName)
if err := notifyUser("renamed", notification); err != nil { showToast("renamed", notification)
logger.Error("failed to send notification", "error", err)
}
} }
return event return event
}) })
@@ -576,9 +612,7 @@ func init() {
if scrollToEndEnabled { if scrollToEndEnabled {
status = "enabled" status = "enabled"
} }
if err := notifyUser("autoscroll", "Auto-scrolling "+status); err != nil { showToast("autoscroll", "Auto-scrolling "+status)
logger.Error("failed to send notification", "error", err)
}
updateStatusLine() updateStatusLine()
} }
// Handle Alt+7 to toggle injectRole // Handle Alt+7 to toggle injectRole
@@ -595,9 +629,7 @@ func init() {
if thinkingCollapsed { if thinkingCollapsed {
status = "collapsed" status = "collapsed"
} }
if err := notifyUser("thinking", "Thinking blocks "+status); err != nil { showToast("thinking", "Thinking blocks "+status)
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
// Handle Ctrl+T to toggle tool call/response visibility // Handle Ctrl+T to toggle tool call/response visibility
@@ -609,9 +641,7 @@ func init() {
if toolCollapsed { if toolCollapsed {
status = "collapsed" status = "collapsed"
} }
if err := notifyUser("tools", "Tool calls/responses "+status); err != nil { showToast("tools", "Tool calls/responses "+status)
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
if event.Key() == tcell.KeyRune && event.Rune() == 'i' && event.Modifiers()&tcell.ModAlt != 0 { if event.Key() == tcell.KeyRune && event.Rune() == 'i' && event.Modifiers()&tcell.ModAlt != 0 {
@@ -631,9 +661,7 @@ func init() {
// Check if there are no chats for this agent // Check if there are no chats for this agent
if len(chatList) == 0 { if len(chatList) == 0 {
notification := "no chats found for agent: " + cfg.AssistantRole notification := "no chats found for agent: " + cfg.AssistantRole
if err := notifyUser("info", notification); err != nil { showToast("info", notification)
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
chatMap := make(map[string]models.Chat) chatMap := make(map[string]models.Chat)
@@ -651,9 +679,7 @@ func init() {
if event.Key() == tcell.KeyF2 && !botRespMode { if event.Key() == tcell.KeyF2 && !botRespMode {
// regen last msg // regen last msg
if len(chatBody.Messages) == 0 { if len(chatBody.Messages) == 0 {
if err := notifyUser("info", "no messages to regenerate"); err != nil { showToast("info", "no messages to regenerate")
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1] chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1]
@@ -679,9 +705,7 @@ func init() {
return nil return nil
} }
if len(chatBody.Messages) == 0 { if len(chatBody.Messages) == 0 {
if err := notifyUser("info", "no messages to delete"); err != nil { showToast("info", "no messages to delete")
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1] chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1]
@@ -727,6 +751,7 @@ func init() {
if event.Key() == tcell.KeyF6 { if event.Key() == tcell.KeyF6 {
interruptResp = true interruptResp = true
botRespMode = false botRespMode = false
toolRunningMode = false
return nil return nil
} }
if event.Key() == tcell.KeyF7 { if event.Key() == tcell.KeyF7 {
@@ -739,9 +764,7 @@ func init() {
} }
previewLen := min(30, len(msgText)) previewLen := min(30, len(msgText))
notification := fmt.Sprintf("msg '%s' was copied to the clipboard", msgText[:previewLen]) notification := fmt.Sprintf("msg '%s' was copied to the clipboard", msgText[:previewLen])
if err := notifyUser("copied", notification); err != nil { showToast("copied", notification)
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
if event.Key() == tcell.KeyF8 { if event.Key() == tcell.KeyF8 {
@@ -755,9 +778,7 @@ func init() {
text := textView.GetText(false) text := textView.GetText(false)
cb := codeBlockRE.FindAllString(text, -1) cb := codeBlockRE.FindAllString(text, -1)
if len(cb) == 0 { if len(cb) == 0 {
if err := notifyUser("notify", "no code blocks in chat"); err != nil { showToast("notify", "no code blocks in chat")
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
table := makeCodeBlockTable(cb) table := makeCodeBlockTable(cb)
@@ -772,9 +793,7 @@ func init() {
// read files in chat_exports // read files in chat_exports
filelist, err := os.ReadDir(exportDir) filelist, err := os.ReadDir(exportDir)
if err != nil { if err != nil {
if err := notifyUser("failed to load exports", err.Error()); err != nil { showToast("failed to load exports", err.Error())
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
fli := []string{} fli := []string{}
@@ -804,9 +823,7 @@ func init() {
logger.Error("failed to export chat;", "error", err, "chat_name", activeChatName) logger.Error("failed to export chat;", "error", err, "chat_name", activeChatName)
return nil return nil
} }
if err := notifyUser("exported chat", "chat: "+activeChatName+" was exported"); err != nil { showToast("exported chat", "chat: "+activeChatName+" was exported")
logger.Error("failed to send notification", "error", err)
}
return nil return nil
} }
if event.Key() == tcell.KeyCtrlP { if event.Key() == tcell.KeyCtrlP {
@@ -845,9 +862,7 @@ func init() {
labels, err := initSysCards() labels, err := initSysCards()
if err != nil { if err != nil {
logger.Error("failed to read sys dir", "error", err) logger.Error("failed to read sys dir", "error", err)
if err := notifyUser("error", "failed to read: "+cfg.SysDir); err != nil { showToast("error", "failed to read: "+cfg.SysDir)
logger.Debug("failed to notify user", "error", err)
}
return nil return nil
} }
at := makeAgentTable(labels) at := makeAgentTable(labels)
@@ -860,6 +875,7 @@ func init() {
if event.Key() == tcell.KeyCtrlK { if event.Key() == tcell.KeyCtrlK {
// add message from tools // add message from tools
cfg.ToolUse = !cfg.ToolUse cfg.ToolUse = !cfg.ToolUse
updateToolCapabilities()
updateStatusLine() updateStatusLine()
return nil return nil
} }
@@ -871,21 +887,27 @@ func init() {
if err != nil { if err != nil {
logger.Error("failed to open attached image", "path", lastImg, "error", err) logger.Error("failed to open attached image", "path", lastImg, "error", err)
// Fall back to showing agent image // Fall back to showing agent image
loadImage() if err := loadImage(); err != nil {
logger.Warn("failed to load agent image", "error", err)
}
} else { } else {
defer file.Close() defer file.Close()
img, _, err := image.Decode(file) img, _, err := image.Decode(file)
if err != nil { if err != nil {
logger.Error("failed to decode attached image", "path", lastImg, "error", err) logger.Error("failed to decode attached image", "path", lastImg, "error", err)
// Fall back to showing agent image // Fall back to showing agent image
loadImage() if err := loadImage(); err != nil {
logger.Warn("failed to load agent image", "error", err)
}
} else { } else {
imgView.SetImage(img) imgView.SetImage(img)
} }
} }
} else { } else {
// No attached image, show agent image as before // No attached image, show agent image as before
loadImage() if err := loadImage(); err != nil {
logger.Warn("failed to load agent image", "error", err)
}
} }
pages.AddPage(imgPage, imgView, true, true) pages.AddPage(imgPage, imgView, true, true)
return nil return nil
@@ -897,9 +919,7 @@ func init() {
if err != nil { if err != nil {
msg := "failed to inference user speech; error:" + err.Error() msg := "failed to inference user speech; error:" + err.Error()
logger.Error(msg) logger.Error(msg)
if err := notifyUser("stt error", msg); err != nil { showToast("stt error", msg)
logger.Error("failed to notify user", "error", err)
}
return nil return nil
} }
if userSpeech != "" { if userSpeech != "" {
@@ -957,6 +977,17 @@ func init() {
showBotRoleSelectionPopup() showBotRoleSelectionPopup()
return nil return nil
} }
// INFO: shutdown
if event.Key() == tcell.KeyCtrlC {
logger.Info("caught Ctrl+C via tcell event")
go func() {
if err := pwShutDown(); err != nil {
logger.Error("shutdown failed", "err", err)
}
app.Stop()
}()
return nil // swallow the event
}
if event.Key() == tcell.KeyCtrlG { if event.Key() == tcell.KeyCtrlG {
// cfg.RAGDir is the directory with files to use with RAG // cfg.RAGDir is the directory with files to use with RAG
// rag load // rag load
@@ -968,26 +999,20 @@ func init() {
// Create the RAG directory if it doesn't exist // Create the RAG directory if it doesn't exist
if mkdirErr := os.MkdirAll(cfg.RAGDir, 0755); mkdirErr != nil { if mkdirErr := os.MkdirAll(cfg.RAGDir, 0755); mkdirErr != nil {
logger.Error("failed to create RAG directory", "dir", cfg.RAGDir, "error", mkdirErr) logger.Error("failed to create RAG directory", "dir", cfg.RAGDir, "error", mkdirErr)
if notifyerr := notifyUser("failed to create RAG directory", mkdirErr.Error()); notifyerr != nil { showToast("failed to create RAG directory", mkdirErr.Error())
logger.Error("failed to send notification", "error", notifyerr)
}
return nil return nil
} }
// Now try to read the directory again after creating it // Now try to read the directory again after creating it
files, err = os.ReadDir(cfg.RAGDir) files, err = os.ReadDir(cfg.RAGDir)
if err != nil { if err != nil {
logger.Error("failed to read dir after creating it", "dir", cfg.RAGDir, "error", err) logger.Error("failed to read dir after creating it", "dir", cfg.RAGDir, "error", err)
if notifyerr := notifyUser("failed to read RAG directory", err.Error()); notifyerr != nil { showToast("failed to read RAG directory", err.Error())
logger.Error("failed to send notification", "error", notifyerr)
}
return nil return nil
} }
} else { } else {
// Other error (permissions, etc.) // Other error (permissions, etc.)
logger.Error("failed to read dir", "dir", cfg.RAGDir, "error", err) logger.Error("failed to read dir", "dir", cfg.RAGDir, "error", err)
if notifyerr := notifyUser("failed to open RAG files dir", err.Error()); notifyerr != nil { showToast("failed to open RAG files dir", err.Error())
logger.Error("failed to send notification", "error", notifyerr)
}
return nil return nil
} }
} }
@@ -1017,9 +1042,7 @@ func init() {
if event.Key() == tcell.KeyRune && event.Modifiers() == tcell.ModAlt && event.Rune() == '9' { if event.Key() == tcell.KeyRune && event.Modifiers() == tcell.ModAlt && event.Rune() == '9' {
// Warm up (load) the currently selected model // Warm up (load) the currently selected model
go warmUpModel() go warmUpModel()
if err := notifyUser("model warmup", "loading model: "+chatBody.Model); err != nil { showToast("model warmup", "loading model: "+chatBody.Model)
logger.Debug("failed to notify user", "error", err)
}
return nil return nil
} }
// cannot send msg in editMode or botRespMode // cannot send msg in editMode or botRespMode
@@ -1072,6 +1095,18 @@ func init() {
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText} chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
return nil return nil
} }
if event.Key() == tcell.KeyTab {
currentF := app.GetFocus()
if currentF == textArea {
currentText := textArea.GetText()
atIndex := strings.LastIndex(currentText, "@")
if atIndex >= 0 {
filter := currentText[atIndex+1:]
showTextAreaFileCompletionPopup(filter)
}
}
return nil
}
if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn { if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn {
currentF := app.GetFocus() currentF := app.GetFocus()
app.SetFocus(focusSwitcher[currentF]) app.SetFocus(focusSwitcher[currentF])