31 Commits

Author SHA1 Message Date
Grail Finder
d144ee76d9 Chore: pw tools to be disabled as default 2026-03-04 11:45:54 +03:00
Grail Finder
abcaad6609 Enha: native notification implementation 2026-03-04 11:25:13 +03:00
Grail Finder
50ce0200af Fix: graceful shutdown in tui, to avoid other key block 2026-03-04 08:29:47 +03:00
Grail Finder
58ccd63f4a Fix: avoid raw terminal after ctrl+c exit 2026-03-04 08:25:53 +03:00
Grail Finder
3611d7eb59 Fix: missfire of no-vision notification 2026-03-03 16:55:09 +03:00
Grail Finder
8974d2f52c Fix: remove panics from code 2026-03-03 14:51:36 +03:00
Grail Finder
6b0d03f2d6 Fix: decompres before notify 2026-03-03 14:26:06 +03:00
Grail Finder
fb4deb1161 Fix: handle empty choices 2026-03-03 14:13:18 +03:00
Grail Finder
0e5d37666f Enha: id for card map 2026-03-03 11:46:03 +03:00
Grail Finder
093103bdd7 Feat (pw_tools): click_at 2026-03-03 10:53:04 +03:00
Grail Finder
6c9a1ba56b Chore: change 'when askes' to more proactive phrasing 2026-03-03 09:37:34 +03:00
Grail Finder
93ecfc8a34 Enha: palywright dom and elements fetching 2026-03-03 09:27:05 +03:00
Grail Finder
0c9c590d8f Enha (playwright): conditionaly install and use tools 2026-03-03 09:15:18 +03:00
Grail Finder
d130254e88 Chore (pw): restructure 2026-03-03 08:35:18 +03:00
Grail Finder
6e7a063300 Enha: remove window tools if no vision 2026-03-03 08:27:14 +03:00
Grail Finder
c05b93299c Chore: linter complaints 2026-03-03 07:38:57 +03:00
Grail Finder
cad1bd46c1 Feat: playwright tools 2026-03-02 19:20:54 +03:00
Grail Finder
4bddce3700 Enha: compute estimate of non llm text 2026-03-02 15:21:45 +03:00
Grail Finder
fcc71987bf Feat: token use estimation 2026-03-02 14:54:20 +03:00
Grail Finder
8458edf5a8 Enha: interrupt llm and tool both 2026-03-02 12:19:50 +03:00
Grail Finder
07b06bb0d3 Enha: tabcompletion is back in textarea 2026-03-02 12:09:27 +03:00
Grail Finder
3389b1d83b Fix: linter complaints 2026-03-02 11:39:55 +03:00
Grail Finder
4f6000a43a Enha: check if model has vision before giving it vision tools 2026-03-02 11:25:20 +03:00
Grail Finder
9ba46b40cc Feat: screencapture for completion 2026-03-02 11:12:04 +03:00
Grail Finder
5bb456272e Feat: capture window (screenshot) 2026-03-02 10:33:41 +03:00
Grail Finder
8999f48fb9 Fix (completion): handle multiple images in history 2026-03-02 09:23:22 +03:00
Grail Finder
b2f280a7f1 Feat: read img for completion 2026-03-02 07:46:08 +03:00
Grail Finder
65cbd5d6a6 Fix (ctrl+v): trim loaded mark from the model 2026-03-02 07:19:21 +03:00
Grail Finder
caac1d397a Feat: read img tool for chat endpoint 2026-03-02 07:12:28 +03:00
Grail Finder
742f1ca838 Enha: modal affirmation popup on sending empty msg 2026-03-01 16:21:18 +03:00
Grail Finder
e36bade353 Fix: escape with empty textarea not generating response 2026-03-01 13:33:25 +03:00
23 changed files with 2072 additions and 310 deletions

173
bot.go
View File

@@ -3,6 +3,7 @@ package main
import (
"bufio"
"bytes"
"compress/gzip"
"context"
"encoding/json"
"fmt"
@@ -63,7 +64,9 @@ var (
"google/gemma-3-27b-it:free",
"meta-llama/llama-3.3-70b-instruct:free",
}
LocalModels = []string{}
LocalModels = []string{}
localModelsData *models.LCPModels
orModelsData *models.ORModels
)
var thinkBlockRE = regexp.MustCompile(`(?s)<think>.*?</think>`)
@@ -265,9 +268,7 @@ func warmUpModel() {
// Continue with warmup attempt anyway
}
if loaded {
if err := notifyUser("model already loaded", "Model "+chatBody.Model+" is already loaded."); err != nil {
logger.Debug("failed to notify user", "error", err)
}
showToast("model already loaded", "Model "+chatBody.Model+" is already loaded.")
return
}
go func() {
@@ -355,6 +356,7 @@ func fetchORModels(free bool) ([]string, error) {
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
return nil, err
}
orModelsData = data
freeModels := data.ListModels(free)
return freeModels, nil
}
@@ -416,6 +418,7 @@ func fetchLCPModelsWithStatus() (*models.LCPModels, error) {
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
return nil, err
}
localModelsData = data
return data, nil
}
@@ -433,6 +436,33 @@ func isModelLoaded(modelID string) (bool, error) {
return false, nil
}
func ModelHasVision(api, modelID string) bool {
switch {
case strings.Contains(api, "deepseek"):
return false
case strings.Contains(api, "openrouter"):
resp, err := http.Get("https://openrouter.ai/api/v1/models")
if err != nil {
logger.Warn("failed to fetch OR models for vision check", "error", err)
return false
}
defer resp.Body.Close()
orm := &models.ORModels{}
if err := json.NewDecoder(resp.Body).Decode(orm); err != nil {
logger.Warn("failed to decode OR models for vision check", "error", err)
return false
}
return orm.HasVision(modelID)
default:
models, err := fetchLCPModelsWithStatus()
if err != nil {
logger.Warn("failed to fetch LCP models for vision check", "error", err)
return false
}
return models.HasVision(modelID)
}
}
// monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded.
func monitorModelLoad(modelID string) {
go func() {
@@ -451,9 +481,7 @@ func monitorModelLoad(modelID string) {
continue
}
if loaded {
if err := notifyUser("model loaded", "Model "+modelID+" is now loaded and ready."); err != nil {
logger.Debug("failed to notify user", "error", err)
}
showToast("model loaded", "Model "+modelID+" is now loaded and ready.")
refreshChatDisplay()
return
}
@@ -464,6 +492,17 @@ func monitorModelLoad(modelID string) {
// extractDetailedErrorFromBytes extracts detailed error information from response body bytes
func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
// Try to decompress gzip if the response is compressed
if len(body) >= 2 && body[0] == 0x1f && body[1] == 0x8b {
reader, err := gzip.NewReader(bytes.NewReader(body))
if err == nil {
decompressed, err := io.ReadAll(reader)
reader.Close()
if err == nil {
body = decompressed
}
}
}
// Try to parse as JSON to extract detailed error information
var errorResponse map[string]any
if err := json.Unmarshal(body, &errorResponse); err == nil {
@@ -529,9 +568,7 @@ func sendMsgToLLM(body io.Reader) {
req, err := http.NewRequest("POST", cfg.CurrentAPI, body)
if err != nil {
logger.Error("newreq error", "error", err)
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil {
logger.Error("failed to notify", "error", err)
}
showToast("error", "apicall failed:"+err.Error())
streamDone <- true
return
}
@@ -543,9 +580,7 @@ func sendMsgToLLM(body io.Reader) {
resp, err := httpClient.Do(req)
if err != nil {
logger.Error("llamacpp api", "error", err)
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil {
logger.Error("failed to notify", "error", err)
}
showToast("error", "apicall failed:"+err.Error())
streamDone <- true
return
}
@@ -556,9 +591,7 @@ func sendMsgToLLM(body io.Reader) {
if err != nil {
logger.Error("failed to read error response body", "error", err, "status_code", resp.StatusCode)
detailedError := fmt.Sprintf("HTTP Status: %d, Failed to read response body: %v", resp.StatusCode, err)
if err := notifyUser("API Error", detailedError); err != nil {
logger.Error("failed to notify", "error", err)
}
showToast("API Error", detailedError)
resp.Body.Close()
streamDone <- true
return
@@ -566,9 +599,7 @@ func sendMsgToLLM(body io.Reader) {
// Parse the error response for detailed information
detailedError := extractDetailedErrorFromBytes(bodyBytes, resp.StatusCode)
logger.Error("API returned error status", "status_code", resp.StatusCode, "detailed_error", detailedError)
if err := notifyUser("API Error", detailedError); err != nil {
logger.Error("failed to notify", "error", err)
}
showToast("API Error", detailedError)
resp.Body.Close()
streamDone <- true
return
@@ -605,16 +636,12 @@ func sendMsgToLLM(body io.Reader) {
detailedError := fmt.Sprintf("Streaming connection closed unexpectedly (Status: %d). This may indicate an API error. Check your API provider and model settings.", resp.StatusCode)
logger.Error("error reading response body", "error", err, "detailed_error", detailedError,
"status_code", resp.StatusCode, "user_role", cfg.UserRole, "parser", chunkParser, "link", cfg.CurrentAPI)
if err := notifyUser("API Error", detailedError); err != nil {
logger.Error("failed to notify", "error", err)
}
showToast("API Error", detailedError)
} else {
logger.Error("error reading response body", "error", err, "line", string(line),
"user_role", cfg.UserRole, "parser", chunkParser, "link", cfg.CurrentAPI)
// if err.Error() != "EOF" {
if err := notifyUser("API error", err.Error()); err != nil {
logger.Error("failed to notify", "error", err)
}
showToast("API error", err.Error())
}
streamDone <- true
break
@@ -641,9 +668,7 @@ func sendMsgToLLM(body io.Reader) {
if err != nil {
logger.Error("error parsing response body", "error", err,
"line", string(line), "url", cfg.CurrentAPI)
if err := notifyUser("LLM Response Error", "Failed to parse LLM response: "+err.Error()); err != nil {
logger.Error("failed to notify user", "error", err)
}
showToast("LLM Response Error", "Failed to parse LLM response: "+err.Error())
streamDone <- true
break
}
@@ -718,7 +743,7 @@ func sendMsgToLLM(body io.Reader) {
}
interrupt:
if interruptResp { // read bytes, so it would not get into beginning of the next req
interruptResp = false
// interruptResp = false
logger.Info("interrupted bot response", "chunk_counter", counter)
streamDone <- true
break
@@ -772,6 +797,7 @@ func showSpinner() {
}
func chatRound(r *models.ChatRoundReq) error {
interruptResp = false
botRespMode = true
go showSpinner()
updateStatusLine()
@@ -937,6 +963,9 @@ out:
}
// Strip think blocks before parsing for tool calls
respTextNoThink := thinkBlockRE.ReplaceAllString(respText.String(), "")
if interruptResp {
return nil
}
if findCall(respTextNoThink, toolResp.String()) {
return nil
}
@@ -1174,17 +1203,59 @@ func findCall(msg, toolCall string) bool {
toolRunningMode = false
toolMsg := string(resp)
logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolMsg)
// Create tool response message with the proper tool_call_id
// Mark shell commands as always visible
isShellCommand := fc.Name == "execute_command"
toolResponseMsg := models.RoleMsg{
Role: cfg.ToolRole,
Content: toolMsg,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
// Check if response is multimodal content (image)
var toolResponseMsg models.RoleMsg
if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) {
// Parse multimodal content response
multimodalResp := models.MultimodalToolResp{}
if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" {
// Create RoleMsg with ContentParts
var contentParts []any
for _, part := range multimodalResp.Parts {
partType := part["type"]
switch partType {
case "text":
contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]})
case "image_url":
contentParts = append(contentParts, models.ImageContentPart{
Type: "image_url",
ImageURL: struct {
URL string `json:"url"`
}{URL: part["url"]},
})
default:
continue
}
}
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole,
ContentParts: contentParts,
HasContentParts: true,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
}
} else {
// Fallback to regular content
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole,
Content: toolMsg,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
}
}
} else {
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole,
Content: toolMsg,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
}
}
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
// Clear the stored tool call ID after using it
@@ -1305,8 +1376,8 @@ func applyCharCard(cc *models.CharCard, loadHistory bool) {
}
func charToStart(agentName string, keepSysP bool) bool {
cc, ok := sysMap[agentName]
if !ok {
cc := GetCardByRole(agentName)
if cc == nil {
return false
}
applyCharCard(cc, keepSysP)
@@ -1339,6 +1410,7 @@ func updateModelLists() {
chatBody.Model = m
cachedModelColor = "green"
updateStatusLine()
updateToolCapabilities()
app.Draw()
return
}
@@ -1366,15 +1438,15 @@ func refreshLocalModelsIfEmpty() {
func summarizeAndStartNewChat() {
if len(chatBody.Messages) == 0 {
_ = notifyUser("info", "No chat history to summarize")
showToast("info", "No chat history to summarize")
return
}
_ = notifyUser("info", "Summarizing chat history...")
showToast("info", "Summarizing chat history...")
// Call the summarize_chat tool via agent
summaryBytes := callToolWithAgent("summarize_chat", map[string]string{})
summary := string(summaryBytes)
if summary == "" {
_ = notifyUser("error", "Failed to generate summary")
showToast("error", "Failed to generate summary")
return
}
// Start a new chat
@@ -1393,7 +1465,7 @@ func summarizeAndStartNewChat() {
if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
logger.Warn("failed to update storage after injecting summary", "error", err)
}
_ = notifyUser("info", "Chat summarized and new chat started with summary as tool response")
showToast("info", "Chat summarized and new chat started with summary as tool response")
}
func init() {
@@ -1452,6 +1524,23 @@ func init() {
if cfg.STT_ENABLED {
asr = NewSTT(logger, cfg)
}
if cfg.PlaywrightEnabled {
if err := checkPlaywright(); err != nil {
// slow, need a faster check if playwright install
if err := installPW(); err != nil {
logger.Error("failed to install playwright", "error", err)
cancel()
os.Exit(1)
return
}
if err := checkPlaywright(); err != nil {
logger.Error("failed to run playwright", "error", err)
cancel()
os.Exit(1)
return
}
}
}
// Initialize scrollToEndEnabled based on config
scrollToEndEnabled = cfg.AutoScrollEnabled
go updateModelLists()

View File

@@ -56,3 +56,6 @@ StripThinkingFromAPI = true # Strip <think> blocks from messages before sending
# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
# Models that support reasoning will include thinking content wrapped in <think> tags
ReasoningEffort = "medium"
# playwright tools
PlaywrightEnabled = false
PlaywrightDebug = false

View File

@@ -70,6 +70,9 @@ type Config struct {
CharSpecificContextEnabled bool `toml:"CharSpecificContextEnabled"`
CharSpecificContextTag string `toml:"CharSpecificContextTag"`
AutoTurn bool `toml:"AutoTurn"`
// playwright browser
PlaywrightEnabled bool `toml:"PlaywrightEnabled"`
PlaywrightDebug bool `toml:"PlaywrightDebug"` // !headless
}
func LoadConfig(fn string) (*Config, error) {

View File

@@ -162,6 +162,15 @@ Those could be switched in program, but also bould be setup in config.
#### ToolUse
- Enable or disable explanation of tools to llm, so it could use them.
#### Playwright Browser Automation
These settings enable browser automation tools available to the LLM.
- **PlaywrightEnabled** (`false`)
- Enable or disable Playwright browser automation tools for the LLM. When enabled, the LLM can use tools like `pw_browser`, `pw_close`, and `pw_status` to automate browser interactions.
- **PlaywrightDebug** (`false`)
- Enable debug mode for Playwright browser. When set to `true`, the browser runs in visible (non-headless) mode, displaying the GUI for debugging purposes. When `false`, the browser runs in headless mode by default.
### StripThinkingFromAPI (`true`)
- Strip thinking blocks from messages before sending to LLM. Keeps them in chat history for local viewing but reduces token usage in API calls.

4
go.mod
View File

@@ -7,6 +7,7 @@ require (
github.com/GrailFinder/google-translate-tts v0.1.3
github.com/GrailFinder/searchagent v0.2.0
github.com/PuerkitoBio/goquery v1.11.0
github.com/deckarep/golang-set/v2 v2.8.0
github.com/gdamore/tcell/v2 v2.13.2
github.com/glebarez/go-sqlite v1.22.0
github.com/gopxl/beep/v2 v2.1.1
@@ -14,6 +15,7 @@ require (
github.com/jmoiron/sqlx v1.4.0
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728
github.com/neurosnap/sentences v1.1.2
github.com/playwright-community/playwright-go v0.5700.1
github.com/rivo/tview v0.42.0
github.com/yuin/goldmark v1.4.13
)
@@ -24,6 +26,8 @@ require (
github.com/ebitengine/oto/v3 v3.4.0 // indirect
github.com/ebitengine/purego v0.9.1 // indirect
github.com/gdamore/encoding v1.0.1 // indirect
github.com/go-jose/go-jose/v3 v3.0.4 // indirect
github.com/go-stack/stack v1.8.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hajimehoshi/go-mp3 v0.3.4 // indirect
github.com/hajimehoshi/oto/v2 v2.3.1 // indirect

14
go.sum
View File

@@ -10,8 +10,11 @@ github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43
github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set/v2 v2.8.0 h1:swm0rlPCmdWn9mESxKOjWk8hXSqoxOp+ZlfuyaAdFlQ=
github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/bQ=
@@ -24,8 +27,13 @@ github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx
github.com/gdamore/tcell/v2 v2.13.2/go.mod h1:+Wfe208WDdB7INEtCsNrAN6O2m+wsTPk1RAovjaILlo=
github.com/glebarez/go-sqlite v1.22.0 h1:uAcMJhaA6r3LHMTFgP0SifzgXg46yJkgxqyuyec+ruQ=
github.com/glebarez/go-sqlite v1.22.0/go.mod h1:PlBIdHe0+aUEFn+r2/uthrWq4FxbzugL0L8Li6yQJbc=
github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY=
github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw=
github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
@@ -59,6 +67,8 @@ github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7
github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U=
github.com/playwright-community/playwright-go v0.5700.1/go.mod h1:MlSn1dZrx8rszbCxY6x3qK89ZesJUYVx21B2JnkoNF0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
@@ -67,6 +77,8 @@ github.com/rivo/tview v0.42.0 h1:b/ftp+RxtDsHSaynXTbJb+/n/BxDEi+W3UfF5jILK6c=
github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
@@ -152,6 +164,8 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb
golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=

View File

@@ -11,6 +11,7 @@ import (
"path"
"path/filepath"
"slices"
"strconv"
"strings"
"time"
"unicode"
@@ -197,7 +198,11 @@ func initSysCards() ([]string, error) {
logger.Warn("empty role", "file", cc.FilePath)
continue
}
sysMap[cc.Role] = cc
if cc.ID == "" {
cc.ID = models.ComputeCardID(cc.Role, cc.FilePath)
}
sysMap[cc.ID] = cc
roleToID[cc.Role] = cc.ID
labels = append(labels, cc.Role)
}
return labels, nil
@@ -286,24 +291,25 @@ func listRolesWithUser() []string {
return result
}
func loadImage() {
func loadImage() error {
filepath := defaultImage
cc, ok := sysMap[cfg.AssistantRole]
if ok {
cc := GetCardByRole(cfg.AssistantRole)
if cc != nil {
if strings.HasSuffix(cc.FilePath, ".png") {
filepath = cc.FilePath
}
}
file, err := os.Open(filepath)
if err != nil {
panic(err)
return fmt.Errorf("failed to open image: %w", err)
}
defer file.Close()
img, _, err := image.Decode(file)
if err != nil {
panic(err)
return fmt.Errorf("failed to decode image: %w", err)
}
imgView.SetImage(img)
return nil
}
func strInSlice(s string, sl []string) bool {
@@ -376,9 +382,90 @@ func makeStatusLine() string {
roleInject := fmt.Sprintf(" | [%s:-:b]role injection[-:-:-] (alt+7)", boolColors[injectRole])
statusLine += roleInject
}
// context tokens
contextTokens := getContextTokens()
maxCtx := getMaxContextTokens()
if maxCtx == 0 {
maxCtx = 16384
}
if contextTokens > 0 {
contextInfo := fmt.Sprintf(" | context-estim: [orange:-:b]%d/%d[-:-:-]", contextTokens, maxCtx)
statusLine += contextInfo
}
return statusLine + imageInfo + shellModeInfo
}
func getContextTokens() int {
if chatBody == nil || chatBody.Messages == nil {
return 0
}
total := 0
messages := chatBody.Messages
for i := range messages {
msg := &messages[i]
if msg.Stats != nil && msg.Stats.Tokens > 0 {
total += msg.Stats.Tokens
} else if msg.GetText() != "" {
total += len(msg.GetText()) / 4
}
}
return total
}
const deepseekContext = 128000
func getMaxContextTokens() int {
if chatBody == nil || chatBody.Model == "" {
return 0
}
modelName := chatBody.Model
switch {
case strings.Contains(cfg.CurrentAPI, "openrouter"):
if orModelsData != nil {
for i := range orModelsData.Data {
m := &orModelsData.Data[i]
if m.ID == modelName {
return m.ContextLength
}
}
}
case strings.Contains(cfg.CurrentAPI, "deepseek"):
return deepseekContext
default:
if localModelsData != nil {
for i := range localModelsData.Data {
m := &localModelsData.Data[i]
if m.ID == modelName {
for _, arg := range m.Status.Args {
if strings.HasPrefix(arg, "--ctx-size") {
if strings.Contains(arg, "=") {
val := strings.Split(arg, "=")[1]
if n, err := strconv.Atoi(val); err == nil {
return n
}
} else {
idx := -1
for j, a := range m.Status.Args {
if a == "--ctx-size" && j+1 < len(m.Status.Args) {
idx = j + 1
break
}
}
if idx != -1 {
if n, err := strconv.Atoi(m.Status.Args[idx]); err == nil {
return n
}
}
}
}
}
}
}
}
}
return 0
}
// set of roles within card definition and mention in chat history
func listChatRoles() []string {
currentChat, ok := chatMap[activeChatName]
@@ -386,13 +473,9 @@ func listChatRoles() []string {
if !ok {
return cbc
}
currentCard, ok := sysMap[currentChat.Agent]
if !ok {
// case which won't let to switch roles:
// started new chat (basic_sys or any other), at the start it yet be saved or have chatbody
// if it does not have a card or chars, it'll return an empty slice
// log error
logger.Warn("failed to find current card in sysMap", "agent", currentChat.Agent, "sysMap", sysMap)
currentCard := GetCardByRole(currentChat.Agent)
if currentCard == nil {
logger.Warn("failed to find current card", "agent", currentChat.Agent)
return cbc
}
charset := []string{}
@@ -408,10 +491,7 @@ func listChatRoles() []string {
func deepseekModelValidator() error {
if cfg.CurrentAPI == cfg.DeepSeekChatAPI || cfg.CurrentAPI == cfg.DeepSeekCompletionAPI {
if chatBody.Model != "deepseek-chat" && chatBody.Model != "deepseek-reasoner" {
if err := notifyUser("bad request", "wrong deepseek model name"); err != nil {
logger.Warn("failed ot notify user", "error", err)
return err
}
showToast("bad request", "wrong deepseek model name")
return nil
}
}
@@ -611,9 +691,7 @@ func performSearch(term string) {
searchResults = nil
searchResultLengths = nil
notification := "Pattern not found: " + term
if err := notifyUser("search", notification); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("search", notification)
return
}
// Store the formatted text positions and lengths for accurate highlighting
@@ -646,9 +724,7 @@ func highlightCurrentMatch() {
textView.Highlight(currentRegion).ScrollToHighlight()
// Send notification about which match we're at
notification := fmt.Sprintf("Match %d of %d", searchIndex+1, len(searchResults))
if err := notifyUser("search", notification); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("search", notification)
}
// showSearchBar shows the search input field as an overlay
@@ -738,9 +814,7 @@ func addRegionTags(text string, positions []int, lengths []int, currentIdx int,
// searchNext finds the next occurrence of the search term
func searchNext() {
if len(searchResults) == 0 {
if err := notifyUser("search", "No search results to navigate"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("search", "No search results to navigate")
return
}
searchIndex = (searchIndex + 1) % len(searchResults)
@@ -750,9 +824,7 @@ func searchNext() {
// searchPrev finds the previous occurrence of the search term
func searchPrev() {
if len(searchResults) == 0 {
if err := notifyUser("search", "No search results to navigate"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("search", "No search results to navigate")
return
}
if searchIndex == 0 {

98
llm.go
View File

@@ -3,7 +3,6 @@ package main
import (
"bytes"
"encoding/json"
"errors"
"gf-lt/models"
"io"
"strings"
@@ -119,25 +118,22 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
logger.Debug("formmsg lcpcompletion", "link", cfg.CurrentAPI)
localImageAttachmentPath := imageAttachmentPath
var multimodalData []string
if localImageAttachmentPath != "" {
imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
if err != nil {
logger.Error("failed to create image URL from path for completion",
"error", err, "path", localImageAttachmentPath)
return nil, err
}
// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
parts := strings.SplitN(imageURL, ",", 2)
if len(parts) == 2 {
multimodalData = append(multimodalData, parts[1])
} else {
logger.Error("invalid image data URL format", "url", imageURL)
return nil, errors.New("invalid image data URL format")
}
imageAttachmentPath = "" // Clear the attachment after use
}
if msg != "" { // otherwise let the bot to continue
newMsg := models.RoleMsg{Role: role, Content: msg}
var newMsg models.RoleMsg
if localImageAttachmentPath != "" {
newMsg = models.NewMultimodalMsg(role, []any{})
newMsg.AddTextPart(msg)
imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
if err != nil {
logger.Error("failed to create image URL from path for completion",
"error", err, "path", localImageAttachmentPath)
return nil, err
}
newMsg.AddImagePart(imageURL, localImageAttachmentPath)
imageAttachmentPath = "" // Clear the attachment after use
} else { // not a multimodal msg or image passed in tool call
newMsg = models.RoleMsg{Role: role, Content: msg}
}
newMsg = *processMessageTag(&newMsg)
chatBody.Messages = append(chatBody.Messages, newMsg)
}
@@ -146,22 +142,40 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
}
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages)
// Build prompt and extract images inline as we process each message
messages := make([]string, len(filteredMessages))
for i := range filteredMessages {
messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt()
m := stripThinkingFromMsg(&filteredMessages[i])
messages[i] = m.ToPrompt()
// Extract images from this message and add marker inline
if len(m.ContentParts) > 0 {
for _, part := range m.ContentParts {
var imgURL string
// Check for struct type
if imgPart, ok := part.(models.ImageContentPart); ok {
imgURL = imgPart.ImageURL.URL
} else if partMap, ok := part.(map[string]any); ok {
// Check for map type (from JSON unmarshaling)
if partType, exists := partMap["type"]; exists && partType == "image_url" {
if imgURLMap, ok := partMap["image_url"].(map[string]any); ok {
if url, ok := imgURLMap["url"].(string); ok {
imgURL = url
}
}
}
}
if imgURL != "" {
// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
parts := strings.SplitN(imgURL, ",", 2)
if len(parts) == 2 {
multimodalData = append(multimodalData, parts[1])
messages[i] += " <__media__>"
}
}
}
}
}
prompt := strings.Join(messages, "\n")
// Add multimodal media markers to the prompt text when multimodal data is present
// This is required by llama.cpp multimodal models so they know where to insert media
if len(multimodalData) > 0 {
// Add a media marker for each item in the multimodal data
var sb strings.Builder
sb.WriteString(prompt)
for range multimodalData {
sb.WriteString(" <__media__>") // llama.cpp default multimodal marker
}
prompt = sb.String()
}
// needs to be after <__media__> if there are images
if !resume {
botMsgStart := "\n" + botPersona + ":\n"
@@ -210,11 +224,9 @@ func (op LCPChat) ParseChunk(data []byte) (*models.TextChunk, error) {
logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err
}
// Handle multiple choices safely
if len(llmchunk.Choices) == 0 {
logger.Warn("LCPChat ParseChunk: no choices in response", "data", string(data))
return &models.TextChunk{Finished: true}, nil
logger.Warn("LCPChat empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return &models.TextChunk{}, nil
}
lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
resp := &models.TextChunk{
@@ -335,6 +347,10 @@ func (ds DeepSeekerCompletion) ParseChunk(data []byte) (*models.TextChunk, error
logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err
}
if len(llmchunk.Choices) == 0 {
logger.Warn("empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return &models.TextChunk{}, nil
}
resp := &models.TextChunk{
Chunk: llmchunk.Choices[0].Text,
}
@@ -400,6 +416,10 @@ func (ds DeepSeekerChat) ParseChunk(data []byte) (*models.TextChunk, error) {
return nil, err
}
resp := &models.TextChunk{}
if len(llmchunk.Choices) == 0 {
logger.Warn("empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return resp, nil
}
if llmchunk.Choices[0].FinishReason != "" {
if llmchunk.Choices[0].Delta.Content != "" {
logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
@@ -482,6 +502,10 @@ func (or OpenRouterCompletion) ParseChunk(data []byte) (*models.TextChunk, error
logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err
}
if len(llmchunk.Choices) == 0 {
logger.Warn("empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return &models.TextChunk{}, nil
}
resp := &models.TextChunk{
Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Text,
}
@@ -544,6 +568,10 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err
}
if len(llmchunk.Choices) == 0 {
logger.Warn("empty chunk choices", "raw_data", string(data), "chunk", llmchunk)
return &models.TextChunk{}, nil
}
lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
resp := &models.TextChunk{
Chunk: lastChoice.Delta.Content,

View File

@@ -17,8 +17,9 @@ var (
shellHistoryPos int = -1
thinkingCollapsed = false
toolCollapsed = true
statusLineTempl = "help (F12) | chat: [orange:-:b]%s[-:-:-] (F1) | [%s:-:b]tool use[-:-:-] (ctrl+k) | model: [%s:-:b]%s[-:-:-] (ctrl+l) | [%s:-:b]skip LLM resp[-:-:-] (F10)\nAPI: [orange:-:b]%s[-:-:-] (ctrl+v) | writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | bot will write as [orange:-:b]%s[-:-:-] (ctrl+x)"
statusLineTempl = "help (F12) | chat: [orange:-:b]%s[-:-:-] (F1) | [%s:-:b]tool use[-:-:-] (ctrl+k) | model: [%s:-:b]%s[-:-:-] (ctrl+l) | [%s:-:b]skip LLM resp[-:-:-] (F10) | API: [orange:-:b]%s[-:-:-] (ctrl+v)\nwriting as: [orange:-:b]%s[-:-:-] (ctrl+q) | bot will write as [orange:-:b]%s[-:-:-] (ctrl+x)"
focusSwitcher = map[tview.Primitive]tview.Primitive{}
app *tview.Application
)
func main() {

View File

@@ -1,6 +1,10 @@
package models
import "strings"
import (
"crypto/md5"
"fmt"
"strings"
)
// https://github.com/malfoyslastname/character-card-spec-v2/blob/main/spec_v2.md
// what a bloat; trim to Role->Msg pair and first msg
@@ -31,6 +35,7 @@ func (c *CharCardSpec) Simplify(userName, fpath string) *CharCard {
fm := strings.ReplaceAll(strings.ReplaceAll(c.FirstMes, "{{char}}", c.Name), "{{user}}", userName)
sysPr := strings.ReplaceAll(strings.ReplaceAll(c.Description, "{{char}}", c.Name), "{{user}}", userName)
return &CharCard{
ID: ComputeCardID(c.Name, fpath),
SysPrompt: sysPr,
FirstMsg: fm,
Role: c.Name,
@@ -39,7 +44,12 @@ func (c *CharCardSpec) Simplify(userName, fpath string) *CharCard {
}
}
func ComputeCardID(role, filePath string) string {
return fmt.Sprintf("%x", md5.Sum([]byte(role+filePath)))
}
type CharCard struct {
ID string `json:"id"`
SysPrompt string `json:"sys_prompt"`
FirstMsg string `json:"first_msg"`
Role string `json:"role"`

View File

@@ -1,7 +1,8 @@
package models
const (
LoadedMark = "(loaded) "
LoadedMark = "(loaded) "
ToolRespMultyType = "multimodel_content"
)
type APIType int

View File

@@ -391,7 +391,6 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
if err != nil {
return "", err
}
// Determine the image format based on file extension
var mimeType string
switch {
@@ -408,10 +407,8 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
default:
mimeType = "image/jpeg" // default
}
// Encode to base64
encoded := base64.StdEncoding.EncodeToString(data)
// Create data URL
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
}
@@ -611,6 +608,20 @@ func (lcp *LCPModels) ListModels() []string {
return resp
}
func (lcp *LCPModels) HasVision(modelID string) bool {
for _, m := range lcp.Data {
if m.ID == modelID {
args := m.Status.Args
for i := 0; i < len(args)-1; i++ {
if args[i] == "--mmproj" {
return true
}
}
}
}
return false
}
type ResponseStats struct {
Tokens int
Duration float64
@@ -623,3 +634,8 @@ type ChatRoundReq struct {
Regen bool
Resume bool
}
type MultimodalToolResp struct {
Type string `json:"type"`
Parts []map[string]string `json:"parts"`
}

View File

@@ -172,3 +172,16 @@ func (orm *ORModels) ListModels(free bool) []string {
}
return resp
}
func (orm *ORModels) HasVision(modelID string) bool {
for i := range orm.Data {
if orm.Data[i].ID == modelID {
for _, mod := range orm.Data[i].Architecture.InputModalities {
if mod == "image" {
return true
}
}
}
}
return false
}

View File

@@ -109,6 +109,12 @@ func ReadCardJson(fname string) (*models.CharCard, error) {
if err := json.Unmarshal(data, &card); err != nil {
return nil, err
}
if card.FilePath == "" {
card.FilePath = fname
}
if card.ID == "" {
card.ID = models.ComputeCardID(card.Role, card.FilePath)
}
return &card, nil
}

View File

@@ -40,9 +40,7 @@ func showModelSelectionPopup() {
default:
message = "No llama.cpp models loaded. Ensure llama.cpp server is running with models."
}
if err := notifyUser("Empty list", message); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("Empty list", message)
return
}
// Create a list primitive
@@ -119,9 +117,7 @@ func showAPILinkSelectionPopup() {
if len(apiLinks) == 0 {
logger.Warn("no API links available for selection")
message := "No API links available. Please configure API links in your config file."
if err := notifyUser("Empty list", message); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("Empty list", message)
return
}
// Create a list primitive
@@ -143,6 +139,7 @@ func showAPILinkSelectionPopup() {
apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
// Update the API in config
cfg.CurrentAPI = mainText
// updateToolCapabilities()
// Update model list based on new API
// Helper function to get model list for a given API (same as in props_table.go)
getModelListForAPI := func(api string) []string {
@@ -160,8 +157,9 @@ func showAPILinkSelectionPopup() {
newModelList := getModelListForAPI(cfg.CurrentAPI)
// Ensure chatBody.Model is in the new list; if not, set to first available model
if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) {
chatBody.Model = newModelList[0]
chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark)
cfg.CurrentModel = chatBody.Model
updateToolCapabilities()
}
pages.RemovePage("apiLinkSelectionPopup")
app.SetFocus(textArea)
@@ -204,9 +202,7 @@ func showUserRoleSelectionPopup() {
if len(roles) == 0 {
logger.Warn("no roles available for selection")
message := "No roles available for selection."
if err := notifyUser("Empty list", message); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("Empty list", message)
return
}
// Create a list primitive
@@ -283,9 +279,7 @@ func showBotRoleSelectionPopup() {
if len(roles) == 0 {
logger.Warn("no roles available for selection")
message := "No roles available for selection."
if err := notifyUser("Empty list", message); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("Empty list", message)
return
}
// Create a list primitive
@@ -404,6 +398,66 @@ func showShellFileCompletionPopup(filter string) {
app.SetFocus(widget)
}
func showTextAreaFileCompletionPopup(filter string) {
baseDir := cfg.FilePickerDir
if baseDir == "" {
baseDir = "."
}
complMatches := scanFiles(baseDir, filter)
if len(complMatches) == 0 {
return
}
if len(complMatches) == 1 {
currentText := textArea.GetText()
atIdx := strings.LastIndex(currentText, "@")
if atIdx >= 0 {
before := currentText[:atIdx]
textArea.SetText(before+complMatches[0], true)
}
return
}
widget := tview.NewList().ShowSecondaryText(false).
SetSelectedBackgroundColor(tcell.ColorGray)
widget.SetTitle("file completion").SetBorder(true)
for _, m := range complMatches {
widget.AddItem(m, "", 0, nil)
}
widget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
currentText := textArea.GetText()
atIdx := strings.LastIndex(currentText, "@")
if atIdx >= 0 {
before := currentText[:atIdx]
textArea.SetText(before+mainText, true)
}
pages.RemovePage("textAreaFileCompletionPopup")
app.SetFocus(textArea)
})
widget.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
if event.Key() == tcell.KeyEscape {
pages.RemovePage("textAreaFileCompletionPopup")
app.SetFocus(textArea)
return nil
}
if event.Key() == tcell.KeyRune && event.Rune() == 'x' {
pages.RemovePage("textAreaFileCompletionPopup")
app.SetFocus(textArea)
return nil
}
return event
})
modal := func(p tview.Primitive, width, height int) tview.Primitive {
return tview.NewFlex().
AddItem(nil, 0, 1, false).
AddItem(tview.NewFlex().SetDirection(tview.FlexRow).
AddItem(nil, 0, 1, false).
AddItem(p, height, 1, true).
AddItem(nil, 0, 1, false), width, 1, true).
AddItem(nil, 0, 1, false)
}
pages.AddPage("textAreaFileCompletionPopup", modal(widget, 80, 20), true, true)
app.SetFocus(widget)
}
func updateWidgetColors(theme *tview.Theme) {
bgColor := theme.PrimitiveBackgroundColor
fgColor := theme.PrimaryTextColor
@@ -450,9 +504,7 @@ func showColorschemeSelectionPopup() {
if len(schemeNames) == 0 {
logger.Warn("no colorschemes available for selection")
message := "No colorschemes available."
if err := notifyUser("Empty list", message); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("Empty list", message)
return
}
// Create a list primitive

View File

@@ -259,9 +259,7 @@ func makePropsTable(props map[string]float32) *tview.Table {
// Handle nil options
if data.Options == nil {
logger.Error("options list is nil for", "label", label)
if err := notifyUser("Configuration error", "Options list is nil for "+label); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("Configuration error", "Options list is nil for "+label)
return
}
@@ -279,9 +277,7 @@ func makePropsTable(props map[string]float32) *tview.Table {
message = "No llama.cpp models loaded. Ensure llama.cpp server is running with models."
}
}
if err := notifyUser("Empty list", message); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("Empty list", message)
return
}
// Create a list primitive

View File

@@ -168,8 +168,3 @@ func copyToClipboard(text string) error {
cmd.Stdin = strings.NewReader(text)
return cmd.Run()
}
func notifyUser(topic, message string) error {
cmd := exec.Command("notify-send", topic, message)
return cmd.Run()
}

View File

@@ -10,16 +10,18 @@ import (
//go:embed migrations/*
var migrationsFS embed.FS
func (p *ProviderSQL) Migrate() {
func (p *ProviderSQL) Migrate() error {
// Get the embedded filesystem
migrationsDir, err := fs.Sub(migrationsFS, "migrations")
if err != nil {
p.logger.Error("Failed to get embedded migrations directory;", "error", err)
return fmt.Errorf("failed to get embedded migrations directory: %w", err)
}
// List all .up.sql files
files, err := migrationsFS.ReadDir("migrations")
if err != nil {
p.logger.Error("Failed to read migrations directory;", "error", err)
return fmt.Errorf("failed to read migrations directory: %w", err)
}
// Execute each .up.sql file
for _, file := range files {
@@ -27,11 +29,12 @@ func (p *ProviderSQL) Migrate() {
err := p.executeMigration(migrationsDir, file.Name())
if err != nil {
p.logger.Error("Failed to execute migration %s: %v", file.Name(), err)
panic(err)
return fmt.Errorf("failed to execute migration %s: %w", file.Name(), err)
}
}
}
p.logger.Debug("All migrations executed successfully!")
return nil
}
func (p *ProviderSQL) executeMigration(migrationsDir fs.FS, fileName string) error {

View File

@@ -103,7 +103,10 @@ func NewProviderSQL(dbPath string, logger *slog.Logger) FullRepo {
return nil
}
p := ProviderSQL{db: db, logger: logger}
p.Migrate()
if err := p.Migrate(); err != nil {
logger.Error("migration failed, app cannot start", "error", err)
return nil
}
return p
}

View File

@@ -147,9 +147,7 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
if err := store.RemoveChat(sc.ID); err != nil {
logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name)
}
if err := notifyUser("chat deleted", selectedChat+" was deleted"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("chat deleted", selectedChat+" was deleted")
// load last chat
chatBody.Messages = loadOldChatOrGetNew()
textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
@@ -159,27 +157,16 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
// save updated card
fi := strings.Index(selectedChat, "_")
agentName := selectedChat[fi+1:]
cc, ok := sysMap[agentName]
if !ok {
cc := GetCardByRole(agentName)
if cc == nil {
logger.Warn("no such card", "agent", agentName)
//no:lint
if err := notifyUser("error", "no such card: "+agentName); err != nil {
logger.Warn("failed ot notify", "error", err)
}
showToast("error", "no such card: "+agentName)
return
}
// if chatBody.Messages[0].Role != "system" || chatBody.Messages[1].Role != agentName {
// if err := notifyUser("error", "unexpected chat structure; card: "+agentName); err != nil {
// logger.Warn("failed ot notify", "error", err)
// }
// return
// }
// change sys_prompt + first msg
cc.SysPrompt = chatBody.Messages[0].Content
cc.FirstMsg = chatBody.Messages[1].Content
if err := pngmeta.WriteToPng(cc.ToSpec(cfg.UserRole), cc.FilePath, cc.FilePath); err != nil {
logger.Error("failed to write charcard",
"error", err)
logger.Error("failed to write charcard", "error", err)
}
return
case "move sysprompt onto 1st msg":
@@ -190,33 +177,29 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table {
pages.RemovePage(historyPage)
return
case "new_chat_from_card":
// Reread card from file and start fresh chat
fi := strings.Index(selectedChat, "_")
agentName := selectedChat[fi+1:]
cc, ok := sysMap[agentName]
if !ok {
cc := GetCardByRole(agentName)
if cc == nil {
logger.Warn("no such card", "agent", agentName)
if err := notifyUser("error", "no such card: "+agentName); err != nil {
logger.Warn("failed to notify", "error", err)
}
showToast("error", "no such card: "+agentName)
return
}
// Reload card from disk
newCard, err := pngmeta.ReadCard(cc.FilePath, cfg.UserRole)
if err != nil {
logger.Error("failed to reload charcard", "path", cc.FilePath, "error", err)
newCard, err = pngmeta.ReadCardJson(cc.FilePath)
if err != nil {
logger.Error("failed to reload charcard", "path", cc.FilePath, "error", err)
if err := notifyUser("error", "failed to reload card: "+cc.FilePath); err != nil {
logger.Warn("failed to notify", "error", err)
}
showToast("error", "failed to reload card: "+cc.FilePath)
return
}
}
// Update sysMap with fresh card data
sysMap[agentName] = newCard
// fetching sysprompt and first message anew from the card
if newCard.ID == "" {
newCard.ID = models.ComputeCardID(newCard.Role, newCard.FilePath)
}
sysMap[newCard.ID] = newCard
roleToID[newCard.Role] = newCard.ID
startNewChat(false)
pages.RemovePage(historyPage)
return
@@ -457,13 +440,13 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
go func() {
if err := ragger.LoadRAG(fpath); err != nil {
logger.Error("failed to embed file", "chat", fpath, "error", err)
_ = notifyUser("RAG", "failed to embed file; error: "+err.Error())
showToast("RAG", "failed to embed file; error: "+err.Error())
app.QueueUpdate(func() {
pages.RemovePage(RAGPage)
})
return
}
_ = notifyUser("RAG", "file loaded successfully")
showToast("RAG", "file loaded successfully")
app.QueueUpdate(func() {
pages.RemovePage(RAGPage)
})
@@ -474,13 +457,13 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
go func() {
if err := ragger.RemoveFile(f.name); err != nil {
logger.Error("failed to unload file from RAG", "filename", f.name, "error", err)
_ = notifyUser("RAG", "failed to unload file; error: "+err.Error())
showToast("RAG", "failed to unload file; error: "+err.Error())
app.QueueUpdate(func() {
pages.RemovePage(RAGPage)
})
return
}
_ = notifyUser("RAG", "file unloaded successfully")
showToast("RAG", "file unloaded successfully")
app.QueueUpdate(func() {
pages.RemovePage(RAGPage)
})
@@ -492,9 +475,7 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
logger.Error("failed to delete file", "filename", fpath, "error", err)
return
}
if err := notifyUser("chat deleted", fpath+" was deleted"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("chat deleted", fpath+" was deleted")
return
default:
pages.RemovePage(RAGPage)
@@ -529,8 +510,8 @@ func makeAgentTable(agentList []string) *tview.Table {
SetSelectable(false))
case 1:
if actions[c-1] == "filepath" {
cc, ok := sysMap[agentList[r]]
if !ok {
cc := GetCardByRole(agentList[r])
if cc == nil {
continue
}
chatActTable.SetCell(r, c,
@@ -603,9 +584,7 @@ func makeAgentTable(agentList []string) *tview.Table {
if err := store.RemoveChat(sc.ID); err != nil {
logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name)
}
if err := notifyUser("chat deleted", selected+" was deleted"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("chat deleted", selected+" was deleted")
pages.RemovePage(agentPage)
return
default:
@@ -676,13 +655,9 @@ func makeCodeBlockTable(codeBlocks []string) *tview.Table {
switch tc.Text {
case "copy":
if err := copyToClipboard(selected); err != nil {
if err := notifyUser("error", err.Error()); err != nil {
logger.Error("failed to send notification", "error", err)
}
}
if err := notifyUser("copied", selected); err != nil {
logger.Error("failed to send notification", "error", err)
showToast("error", err.Error())
}
showToast("copied", selected)
pages.RemovePage(codeBlockPage)
app.SetFocus(textArea)
return
@@ -775,9 +750,7 @@ func makeImportChatTable(filenames []string) *tview.Table {
if err := store.RemoveChat(sc.ID); err != nil {
logger.Error("failed to remove chat from db", "chat_id", sc.ID, "chat_name", sc.Name)
}
if err := notifyUser("chat deleted", selected+" was deleted"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("chat deleted", selected+" was deleted")
pages.RemovePage(historyPage)
return
default:

776
tools.go
View File

@@ -77,12 +77,17 @@ Your current tools:
{
"name":"file_create",
"args": ["path", "content"],
"when_to_use": "when asked to create a new file with optional content"
"when_to_use": "when there is a need to create a new file with optional content"
},
{
"name":"file_read",
"args": ["path"],
"when_to_use": "when asked to read the content of a file"
"when_to_use": "when you need to read the content of a file"
},
{
"name":"file_read_image",
"args": ["path"],
"when_to_use": "when you need to read or view an image file"
},
{
"name":"file_write",
@@ -92,7 +97,7 @@ Your current tools:
{
"name":"file_write_append",
"args": ["path", "content"],
"when_to_use": "when asked to append content to a file; use sed to edit content"
"when_to_use": "when you need append content to a file; use sed to edit content"
},
{
"name":"file_edit",
@@ -107,22 +112,22 @@ Your current tools:
{
"name":"file_move",
"args": ["src", "dst"],
"when_to_use": "when asked to move a file from source to destination"
"when_to_use": "when you need to move a file from source to destination"
},
{
"name":"file_copy",
"args": ["src", "dst"],
"when_to_use": "when asked to copy a file from source to destination"
"when_to_use": "copy a file from source to destination"
},
{
"name":"file_list",
"args": ["path"],
"when_to_use": "when asked to list files in a directory; path is optional (default: current directory)"
"when_to_use": "list files in a directory; path is optional (default: current directory)"
},
{
"name":"execute_command",
"args": ["command", "args"],
"when_to_use": "when asked to execute a system command; args is optional; allowed commands: grep, sed, awk, find, cat, head, tail, sort, uniq, wc, ls, echo, cut, tr, cp, mv, rm, mkdir, rmdir, pwd, df, free, ps, top, du, whoami, date, uname, go"
"when_to_use": "execute a system command; args is optional; allowed commands: grep, sed, awk, find, cat, head, tail, sort, uniq, wc, ls, echo, cut, tr, cp, mv, rm, mkdir, rmdir, pwd, df, free, ps, top, du, whoami, date, uname, go"
}
]
</tools>
@@ -157,40 +162,133 @@ After that you are free to respond to the user.
readURLSysPrompt = `Extract and summarize the content from the webpage. Provide key information, main points, and any relevant details.`
summarySysPrompt = `Please provide a concise summary of the following conversation. Focus on key points, decisions, and actions. Provide only the summary, no additional commentary.`
basicCard = &models.CharCard{
ID: models.ComputeCardID("assistant", "basic_sys"),
SysPrompt: basicSysMsg,
FirstMsg: defaultFirstMsg,
Role: "",
FilePath: "",
Role: "assistant",
FilePath: "basic_sys",
}
sysMap = map[string]*models.CharCard{"basic_sys": basicCard}
sysLabels = []string{"basic_sys"}
sysMap = map[string]*models.CharCard{}
roleToID = map[string]string{}
sysLabels = []string{"assistant"}
webAgentClient *agent.AgentClient
webAgentClientOnce sync.Once
webAgentsOnce sync.Once
)
var windowToolSysMsg = `
Additional window tools (available only if xdotool and maim are installed):
[
{
"name":"list_windows",
"args": [],
"when_to_use": "when asked to list visible windows; returns map of window ID to window name"
},
{
"name":"capture_window",
"args": ["window"],
"when_to_use": "when asked to take a screenshot of a specific window; saves to /tmp; window can be ID or name substring; returns file path"
},
{
"name":"capture_window_and_view",
"args": ["window"],
"when_to_use": "when asked to take a screenshot of a specific window and show it; saves to /tmp and returns image for viewing; window can be ID or name substring"
}
]
`
var WebSearcher searcher.WebSurfer
var (
windowToolsAvailable bool
xdotoolPath string
maimPath string
modelHasVision bool
)
func init() {
sysMap[basicCard.ID] = basicCard
roleToID["assistant"] = basicCard.ID
sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
if err != nil {
panic("failed to init seachagent; error: " + err.Error())
if logger != nil {
logger.Warn("search agent unavailable; web_search tool disabled", "error", err)
}
WebSearcher = nil
} else {
WebSearcher = sa
}
WebSearcher = sa
if err := rag.Init(cfg, logger, store); err != nil {
logger.Warn("failed to init rag; rag_search tool will not be available", "error", err)
}
checkWindowTools()
registerWindowTools()
}
func GetCardByRole(role string) *models.CharCard {
cardID, ok := roleToID[role]
if !ok {
return nil
}
return sysMap[cardID]
}
func checkWindowTools() {
xdotoolPath, _ = exec.LookPath("xdotool")
maimPath, _ = exec.LookPath("maim")
windowToolsAvailable = xdotoolPath != "" && maimPath != ""
if windowToolsAvailable {
logger.Info("window tools available: xdotool and maim found")
} else {
if xdotoolPath == "" {
logger.Warn("xdotool not found, window listing tools will not be available")
}
if maimPath == "" {
logger.Warn("maim not found, window capture tools will not be available")
}
}
}
func updateToolCapabilities() {
if !cfg.ToolUse {
return
}
modelHasVision = false
if cfg == nil || cfg.CurrentAPI == "" {
logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil")
registerWindowTools()
registerPlaywrightTools()
return
}
prevHasVision := modelHasVision
modelHasVision = ModelHasVision(cfg.CurrentAPI, cfg.CurrentModel)
if modelHasVision {
logger.Info("model has vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
} else {
logger.Info("model does not have vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
if windowToolsAvailable && !prevHasVision && !modelHasVision {
showToast("window tools", "Window capture-and-view unavailable: model lacks vision support")
}
}
registerWindowTools()
registerPlaywrightTools()
}
// getWebAgentClient returns a singleton AgentClient for web agents.
func getWebAgentClient() *agent.AgentClient {
webAgentClientOnce.Do(func() {
if cfg == nil {
panic("cfg not initialized")
if logger != nil {
logger.Warn("web agent client unavailable: config not initialized")
}
return
}
if logger == nil {
panic("logger not initialized")
if logger != nil {
logger.Warn("web agent client unavailable: logger not initialized")
}
return
}
getToken := func() string {
if chunkParser == nil {
@@ -469,6 +567,43 @@ func fileRead(args map[string]string) []byte {
return jsonResult
}
func fileReadImage(args map[string]string) []byte {
path, ok := args["path"]
if !ok || path == "" {
msg := "path not provided to file_read_image tool"
logger.Error(msg)
return []byte(msg)
}
path = resolvePath(path)
dataURL, err := models.CreateImageURLFromPath(path)
if err != nil {
msg := "failed to read image; error: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
// result := map[string]any{
// "type": "multimodal_content",
// "parts": []map[string]string{
// {"type": "text", "text": "Image at " + path},
// {"type": "image_url", "url": dataURL},
// },
// }
result := models.MultimodalToolResp{
Type: "multimodal_content",
Parts: []map[string]string{
{"type": "text", "text": "Image at " + path},
{"type": "image_url", "url": dataURL},
},
}
jsonResult, err := json.Marshal(result)
if err != nil {
msg := "failed to marshal result; error: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return jsonResult
}
func fileWrite(args map[string]string) []byte {
path, ok := args["path"]
if !ok || path == "" {
@@ -1088,6 +1223,142 @@ func summarizeChat(args map[string]string) []byte {
return []byte(chatText)
}
func windowIDToHex(decimalID string) string {
id, err := strconv.ParseInt(decimalID, 10, 64)
if err != nil {
return decimalID
}
return fmt.Sprintf("0x%x", id)
}
func listWindows(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
cmd := exec.Command(xdotoolPath, "search", "--name", ".")
output, err := cmd.Output()
if err != nil {
msg := "failed to list windows: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
windowIDs := strings.Fields(string(output))
windows := make(map[string]string)
for _, id := range windowIDs {
id = strings.TrimSpace(id)
if id == "" {
continue
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", id)
nameOutput, err := nameCmd.Output()
if err != nil {
continue
}
name := strings.TrimSpace(string(nameOutput))
windows[id] = name
}
data, err := json.Marshal(windows)
if err != nil {
msg := "failed to marshal window list: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return data
}
func captureWindow(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
window, ok := args["window"]
if !ok || window == "" {
return []byte("window parameter required (window ID or name)")
}
var windowID string
if _, err := strconv.Atoi(window); err == nil {
windowID = window
} else {
cmd := exec.Command(xdotoolPath, "search", "--name", window)
output, err := cmd.Output()
if err != nil || len(strings.Fields(string(output))) == 0 {
return []byte("window not found: " + window)
}
windowID = strings.Fields(string(output))[0]
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
nameOutput, _ := nameCmd.Output()
windowName := strings.TrimSpace(string(nameOutput))
windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
if windowName == "" {
windowName = "window"
}
timestamp := time.Now().Unix()
filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
cmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
if err := cmd.Run(); err != nil {
msg := "failed to capture window: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return []byte("screenshot saved: " + filename)
}
func captureWindowAndView(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
window, ok := args["window"]
if !ok || window == "" {
return []byte("window parameter required (window ID or name)")
}
var windowID string
if _, err := strconv.Atoi(window); err == nil {
windowID = window
} else {
cmd := exec.Command(xdotoolPath, "search", "--name", window)
output, err := cmd.Output()
if err != nil || len(strings.Fields(string(output))) == 0 {
return []byte("window not found: " + window)
}
windowID = strings.Fields(string(output))[0]
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
nameOutput, _ := nameCmd.Output()
windowName := strings.TrimSpace(string(nameOutput))
windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
if windowName == "" {
windowName = "window"
}
timestamp := time.Now().Unix()
filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
captureCmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
if err := captureCmd.Run(); err != nil {
msg := "failed to capture window: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
dataURL, err := models.CreateImageURLFromPath(filename)
if err != nil {
msg := "failed to create image URL: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
result := models.MultimodalToolResp{
Type: "multimodal_content",
Parts: []map[string]string{
{"type": "text", "text": "Screenshot saved: " + filename},
{"type": "image_url", "url": dataURL},
},
}
jsonResult, err := json.Marshal(result)
if err != nil {
msg := "failed to marshal result: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return jsonResult
}
type fnSig func(map[string]string) []byte
var fnMap = map[string]fnSig{
@@ -1101,6 +1372,7 @@ var fnMap = map[string]fnSig{
"read_url_raw": readURLRaw,
"file_create": fileCreate,
"file_read": fileRead,
"file_read_image": fileReadImage,
"file_write": fileWrite,
"file_write_append": fileWriteAppend,
"file_edit": fileEdit,
@@ -1116,6 +1388,409 @@ var fnMap = map[string]fnSig{
"summarize_chat": summarizeChat,
}
func removeWindowToolsFromBaseTools() {
windowToolNames := map[string]bool{
"list_windows": true,
"capture_window": true,
"capture_window_and_view": true,
}
var filtered []models.Tool
for _, tool := range baseTools {
if !windowToolNames[tool.Function.Name] {
filtered = append(filtered, tool)
}
}
baseTools = filtered
delete(fnMap, "list_windows")
delete(fnMap, "capture_window")
delete(fnMap, "capture_window_and_view")
}
func removePlaywrightToolsFromBaseTools() {
playwrightToolNames := map[string]bool{
"pw_start": true,
"pw_stop": true,
"pw_is_running": true,
"pw_navigate": true,
"pw_click": true,
"pw_click_at": true,
"pw_fill": true,
"pw_extract_text": true,
"pw_screenshot": true,
"pw_screenshot_and_view": true,
"pw_wait_for_selector": true,
"pw_drag": true,
}
var filtered []models.Tool
for _, tool := range baseTools {
if !playwrightToolNames[tool.Function.Name] {
filtered = append(filtered, tool)
}
}
baseTools = filtered
delete(fnMap, "pw_start")
delete(fnMap, "pw_stop")
delete(fnMap, "pw_is_running")
delete(fnMap, "pw_navigate")
delete(fnMap, "pw_click")
delete(fnMap, "pw_click_at")
delete(fnMap, "pw_fill")
delete(fnMap, "pw_extract_text")
delete(fnMap, "pw_screenshot")
delete(fnMap, "pw_screenshot_and_view")
delete(fnMap, "pw_wait_for_selector")
delete(fnMap, "pw_drag")
}
func registerWindowTools() {
removeWindowToolsFromBaseTools()
if windowToolsAvailable {
fnMap["list_windows"] = listWindows
fnMap["capture_window"] = captureWindow
windowTools := []models.Tool{
{
Type: "function",
Function: models.ToolFunc{
Name: "list_windows",
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window",
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
}
if modelHasVision {
fnMap["capture_window_and_view"] = captureWindowAndView
windowTools = append(windowTools, models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window_and_view",
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
})
}
baseTools = append(baseTools, windowTools...)
toolSysMsg += windowToolSysMsg
}
}
func registerPlaywrightTools() {
removePlaywrightToolsFromBaseTools()
if cfg != nil && cfg.PlaywrightEnabled {
fnMap["pw_start"] = pwStart
fnMap["pw_stop"] = pwStop
fnMap["pw_is_running"] = pwIsRunning
fnMap["pw_navigate"] = pwNavigate
fnMap["pw_click"] = pwClick
fnMap["pw_click_at"] = pwClickAt
fnMap["pw_fill"] = pwFill
fnMap["pw_extract_text"] = pwExtractText
fnMap["pw_screenshot"] = pwScreenshot
fnMap["pw_screenshot_and_view"] = pwScreenshotAndView
fnMap["pw_wait_for_selector"] = pwWaitForSelector
fnMap["pw_drag"] = pwDrag
fnMap["pw_get_html"] = pwGetHTML
fnMap["pw_get_dom"] = pwGetDOM
fnMap["pw_search_elements"] = pwSearchElements
playwrightTools := []models.Tool{
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_start",
Description: "Start a Playwright browser instance. Call this first before using other pw_ tools. Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_stop",
Description: "Stop the Playwright browser instance. Call when done with browser automation.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_is_running",
Description: "Check if Playwright browser is currently running.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_navigate",
Description: "Navigate to a URL in the browser.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"url"},
Properties: map[string]models.ToolArgProps{
"url": models.ToolArgProps{
Type: "string",
Description: "URL to navigate to",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_click",
Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"selector"},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector for the element to click",
},
"index": models.ToolArgProps{
Type: "string",
Description: "optional index for multiple matches (default 0)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_fill",
Description: "Fill an input field with text using CSS selector.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"selector", "text"},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector for the input element",
},
"text": models.ToolArgProps{
Type: "string",
Description: "text to fill into the input",
},
"index": models.ToolArgProps{
Type: "string",
Description: "optional index for multiple matches (default 0)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_extract_text",
Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"selector"},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector (use 'body' for all page text)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_screenshot",
Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector for element to screenshot",
},
"full_page": models.ToolArgProps{
Type: "string",
Description: "optional: 'true' to capture full page (default false)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_screenshot_and_view",
Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector for element to screenshot",
},
"full_page": models.ToolArgProps{
Type: "string",
Description: "optional: 'true' to capture full page (default false)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_wait_for_selector",
Description: "Wait for an element to appear on the page.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"selector"},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector to wait for",
},
"timeout": models.ToolArgProps{
Type: "string",
Description: "optional timeout in ms (default 30000)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_drag",
Description: "Drag the mouse from one point to another.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"x1", "y1", "x2", "y2"},
Properties: map[string]models.ToolArgProps{
"x1": models.ToolArgProps{
Type: "string",
Description: "starting X coordinate",
},
"y1": models.ToolArgProps{
Type: "string",
Description: "starting Y coordinate",
},
"x2": models.ToolArgProps{
Type: "string",
Description: "ending X coordinate",
},
"y2": models.ToolArgProps{
Type: "string",
Description: "ending Y coordinate",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_get_html",
Description: "Get the HTML content of the page or a specific element.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector (default: body)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_get_dom",
Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector (default: body)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_search_elements",
Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"text": models.ToolArgProps{
Type: "string",
Description: "text to search for in elements",
},
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector to search for",
},
},
},
},
},
}
baseTools = append(baseTools, playwrightTools...)
toolSysMsg += browserToolSysMsg
}
}
// callToolWithAgent calls the tool and applies any registered agent.
func callToolWithAgent(name string, args map[string]string) []byte {
registerWebAgents()
@@ -1327,6 +2002,24 @@ var baseTools = []models.Tool{
},
},
},
// file_read_image
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "file_read_image",
Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"path"},
Properties: map[string]models.ToolArgProps{
"path": models.ToolArgProps{
Type: "string",
Description: "path of the image file to read",
},
},
},
},
},
// file_write
models.Tool{
Type: "function",
@@ -1580,3 +2273,56 @@ var baseTools = []models.Tool{
},
},
}
func init() {
if windowToolsAvailable {
baseTools = append(baseTools,
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "list_windows",
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window",
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window_and_view",
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
)
}
}

653
tools_playwright.go Normal file
View File

@@ -0,0 +1,653 @@
package main
import (
"encoding/json"
"fmt"
"gf-lt/models"
"os"
"strconv"
"strings"
"sync"
"github.com/playwright-community/playwright-go"
)
var browserToolSysMsg = `
Additional browser automation tools (Playwright):
[
{
"name": "pw_start",
"args": [],
"when_to_use": "start a browser instance before doing any browser automation. Must be called first."
},
{
"name": "pw_stop",
"args": [],
"when_to_use": "stop the browser instance when done with automation."
},
{
"name": "pw_is_running",
"args": [],
"when_to_use": "check if browser is currently running."
},
{
"name": "pw_navigate",
"args": ["url"],
"when_to_use": "open a specific URL in the web browser."
},
{
"name": "pw_click",
"args": ["selector", "index"],
"when_to_use": "click on an element on the current webpage. Use 'index' for multiple matches (default 0)."
},
{
"name": "pw_fill",
"args": ["selector", "text", "index"],
"when_to_use": "type text into an input field. Use 'index' for multiple matches (default 0)."
},
{
"name": "pw_extract_text",
"args": ["selector"],
"when_to_use": "extract text content from the page or specific elements. Use selector 'body' for all page text."
},
{
"name": "pw_screenshot",
"args": ["selector", "full_page"],
"when_to_use": "take a screenshot of the page or a specific element. Returns a file path to the image. Use to verify actions or inspect visual state."
},
{
"name": "pw_screenshot_and_view",
"args": ["selector", "full_page"],
"when_to_use": "take a screenshot and return the image for viewing. Use to visually verify page state."
},
{
"name": "pw_wait_for_selector",
"args": ["selector", "timeout"],
"when_to_use": "wait for an element to appear on the page before proceeding with further actions."
},
{
"name": "pw_drag",
"args": ["x1", "y1", "x2", "y2"],
"when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)."
},
{
"name": "pw_click_at",
"args": ["x", "y"],
"when_to_use": "click at specific X,Y coordinates on the page. Use when you know the exact position."
},
{
"name": "pw_get_html",
"args": ["selector"],
"when_to_use": "get the HTML content of the page or a specific element. Use to understand page structure or extract raw HTML."
},
{
"name": "pw_get_dom",
"args": ["selector"],
"when_to_use": "get a structured DOM representation with tag, attributes, text, and children. Use to inspect element hierarchy and properties."
},
{
"name": "pw_search_elements",
"args": ["text", "selector"],
"when_to_use": "search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML."
}
]
`
var (
pw *playwright.Playwright
browser playwright.Browser
browserStarted bool
browserStartMu sync.Mutex
page playwright.Page
)
func pwShutDown() error {
if pw == nil {
return nil
}
pwStop(nil)
return pw.Stop()
}
func installPW() error {
err := playwright.Install(&playwright.RunOptions{Verbose: false})
if err != nil {
logger.Warn("playwright not available", "error", err)
return err
}
return nil
}
func checkPlaywright() error {
var err error
pw, err = playwright.Run()
if err != nil {
logger.Warn("playwright not available", "error", err)
return err
}
return nil
}
func pwStart(args map[string]string) []byte {
browserStartMu.Lock()
defer browserStartMu.Unlock()
if browserStarted {
return []byte(`{"error": "Browser already started"}`)
}
var err error
browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(!cfg.PlaywrightDebug),
})
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error()))
}
page, err = browser.NewPage()
if err != nil {
browser.Close()
return []byte(fmt.Sprintf(`{"error": "failed to create page: %s"}`, err.Error()))
}
browserStarted = true
return []byte(`{"success": true, "message": "Browser started"}`)
}
func pwStop(args map[string]string) []byte {
browserStartMu.Lock()
defer browserStartMu.Unlock()
if !browserStarted {
return []byte(`{"success": true, "message": "Browser was not running"}`)
}
if page != nil {
page.Close()
page = nil
}
if browser != nil {
browser.Close()
browser = nil
}
browserStarted = false
return []byte(`{"success": true, "message": "Browser stopped"}`)
}
func pwIsRunning(args map[string]string) []byte {
if browserStarted {
return []byte(`{"running": true, "message": "Browser is running"}`)
}
return []byte(`{"running": false, "message": "Browser is not running"}`)
}
func pwNavigate(args map[string]string) []byte {
url, ok := args["url"]
if !ok || url == "" {
return []byte(`{"error": "url not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
_, err := page.Goto(url)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to navigate: %s"}`, err.Error()))
}
title, _ := page.Title()
pageURL := page.URL()
return []byte(fmt.Sprintf(`{"success": true, "title": "%s", "url": "%s"}`, title, pageURL))
}
func pwClick(args map[string]string) []byte {
selector, ok := args["selector"]
if !ok || selector == "" {
return []byte(`{"error": "selector not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
index := 0
if args["index"] != "" {
if i, err := strconv.Atoi(args["index"]); err != nil {
logger.Warn("failed to parse index", "value", args["index"], "error", err)
} else {
index = i
}
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if index >= count {
return []byte(fmt.Sprintf(`{"error": "Element not found at index %d (found %d elements)"}`, index, count))
}
err = locator.Nth(index).Click()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
}
return []byte(`{"success": true, "message": "Clicked element"}`)
}
func pwFill(args map[string]string) []byte {
selector, ok := args["selector"]
if !ok || selector == "" {
return []byte(`{"error": "selector not provided"}`)
}
text := args["text"]
if text == "" {
text = ""
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
index := 0
if args["index"] != "" {
if i, err := strconv.Atoi(args["index"]); err != nil {
logger.Warn("failed to parse index", "value", args["index"], "error", err)
} else {
index = i
}
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if index >= count {
return []byte(fmt.Sprintf(`{"error": "Element not found at index %d"}`, index))
}
err = locator.Nth(index).Fill(text)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to fill: %s"}`, err.Error()))
}
return []byte(`{"success": true, "message": "Filled input"}`)
}
func pwExtractText(args map[string]string) []byte {
selector := args["selector"]
if selector == "" {
selector = "body"
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"error": "No elements found"}`)
}
if selector == "body" {
text, err := page.Locator("body").TextContent()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to get text: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"text": "%s"}`, text))
}
var texts []string
for i := 0; i < count; i++ {
text, err := locator.Nth(i).TextContent()
if err != nil {
continue
}
texts = append(texts, text)
}
return []byte(fmt.Sprintf(`{"text": "%s"}`, joinLines(texts)))
}
func joinLines(lines []string) string {
var sb strings.Builder
for i, line := range lines {
if i > 0 {
sb.WriteString("\n")
}
sb.WriteString(line)
}
return sb.String()
}
func pwScreenshot(args map[string]string) []byte {
selector := args["selector"]
fullPage := args["full_page"] == "true"
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
var err error
if selector != "" && selector != "body" {
locator := page.Locator(selector)
_, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
Path: playwright.String(path),
})
} else {
_, err = page.Screenshot(playwright.PageScreenshotOptions{
Path: playwright.String(path),
FullPage: playwright.Bool(fullPage),
})
}
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"path": "%s"}`, path))
}
func pwScreenshotAndView(args map[string]string) []byte {
selector := args["selector"]
fullPage := args["full_page"] == "true"
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
var err error
if selector != "" && selector != "body" {
locator := page.Locator(selector)
_, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
Path: playwright.String(path),
})
} else {
_, err = page.Screenshot(playwright.PageScreenshotOptions{
Path: playwright.String(path),
FullPage: playwright.Bool(fullPage),
})
}
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
}
dataURL, err := models.CreateImageURLFromPath(path)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to create image URL: %s"}`, err.Error()))
}
resp := models.MultimodalToolResp{
Type: "multimodal_content",
Parts: []map[string]string{
{"type": "text", "text": "Screenshot saved: " + path},
{"type": "image_url", "url": dataURL},
},
}
jsonResult, err := json.Marshal(resp)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to marshal result: %s"}`, err.Error()))
}
return jsonResult
}
func pwWaitForSelector(args map[string]string) []byte {
selector, ok := args["selector"]
if !ok || selector == "" {
return []byte(`{"error": "selector not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
timeout := 30000
if args["timeout"] != "" {
if t, err := strconv.Atoi(args["timeout"]); err != nil {
logger.Warn("failed to parse timeout", "value", args["timeout"], "error", err)
} else {
timeout = t
}
}
locator := page.Locator(selector)
err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(float64(timeout)),
})
if err != nil {
return []byte(fmt.Sprintf(`{"error": "element not found: %s"}`, err.Error()))
}
return []byte(`{"success": true, "message": "Element found"}`)
}
func pwDrag(args map[string]string) []byte {
x1, ok := args["x1"]
if !ok {
return []byte(`{"error": "x1 not provided"}`)
}
y1, ok := args["y1"]
if !ok {
return []byte(`{"error": "y1 not provided"}`)
}
x2, ok := args["x2"]
if !ok {
return []byte(`{"error": "x2 not provided"}`)
}
y2, ok := args["y2"]
if !ok {
return []byte(`{"error": "y2 not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
var fx1, fy1, fx2, fy2 float64
if parsedX1, err := strconv.ParseFloat(x1, 64); err != nil {
logger.Warn("failed to parse x1", "value", x1, "error", err)
} else {
fx1 = parsedX1
}
if parsedY1, err := strconv.ParseFloat(y1, 64); err != nil {
logger.Warn("failed to parse y1", "value", y1, "error", err)
} else {
fy1 = parsedY1
}
if parsedX2, err := strconv.ParseFloat(x2, 64); err != nil {
logger.Warn("failed to parse x2", "value", x2, "error", err)
} else {
fx2 = parsedX2
}
if parsedY2, err := strconv.ParseFloat(y2, 64); err != nil {
logger.Warn("failed to parse y2", "value", y2, "error", err)
} else {
fy2 = parsedY2
}
mouse := page.Mouse()
err := mouse.Move(fx1, fy1)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
}
err = mouse.Down()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error()))
}
err = mouse.Move(fx2, fy2)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
}
err = mouse.Up()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2))
}
func pwClickAt(args map[string]string) []byte {
x, ok := args["x"]
if !ok {
return []byte(`{"error": "x not provided"}`)
}
y, ok := args["y"]
if !ok {
return []byte(`{"error": "y not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
fx, err := strconv.ParseFloat(x, 64)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to parse x: %s"}`, err.Error()))
}
fy, err := strconv.ParseFloat(y, 64)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to parse y: %s"}`, err.Error()))
}
mouse := page.Mouse()
err = mouse.Click(fx, fy)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"success": true, "message": "Clicked at (%s,%s)"}`, x, y))
}
func pwGetHTML(args map[string]string) []byte {
selector := args["selector"]
if selector == "" {
selector = "body"
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"error": "No elements found"}`)
}
html, err := locator.First().InnerHTML()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html)))
}
type DOMElement struct {
Tag string `json:"tag,omitempty"`
Attributes map[string]string `json:"attributes,omitempty"`
Text string `json:"text,omitempty"`
Children []DOMElement `json:"children,omitempty"`
Selector string `json:"selector,omitempty"`
InnerHTML string `json:"innerHTML,omitempty"`
}
func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) {
var results []DOMElement
count, err := locator.Count()
if err != nil {
return nil, err
}
for i := 0; i < count; i++ {
el := locator.Nth(i)
dom, err := elementToDOM(el)
if err != nil {
continue
}
results = append(results, dom)
}
return results, nil
}
func elementToDOM(el playwright.Locator) (DOMElement, error) {
dom := DOMElement{}
tag, err := el.Evaluate(`el => el.nodeName`, nil)
if err == nil {
dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag))
}
attributes := make(map[string]string)
attrs, err := el.Evaluate(`el => {
let attrs = {};
for (let i = 0; i < el.attributes.length; i++) {
let attr = el.attributes[i];
attrs[attr.name] = attr.value;
}
return attrs;
}`, nil)
if err == nil {
if amap, ok := attrs.(map[string]any); ok {
for k, v := range amap {
if vs, ok := v.(string); ok {
attributes[k] = vs
}
}
}
}
if len(attributes) > 0 {
dom.Attributes = attributes
}
text, err := el.TextContent()
if err == nil && text != "" {
dom.Text = text
}
innerHTML, err := el.InnerHTML()
if err == nil && innerHTML != "" {
dom.InnerHTML = innerHTML
}
childCount, _ := el.Count()
if childCount > 0 {
childrenLocator := el.Locator("*")
children, err := buildDOMTree(childrenLocator)
if err == nil && len(children) > 0 {
dom.Children = children
}
}
return dom, nil
}
func pwGetDOM(args map[string]string) []byte {
selector := args["selector"]
if selector == "" {
selector = "body"
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"error": "No elements found"}`)
}
dom, err := elementToDOM(locator.First())
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error()))
}
data, err := json.Marshal(dom)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"dom": %s}`, string(data)))
}
func pwSearchElements(args map[string]string) []byte {
text := args["text"]
selector := args["selector"]
if text == "" && selector == "" {
return []byte(`{"error": "text or selector not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
var locator playwright.Locator
if text != "" {
locator = page.GetByText(text)
} else {
locator = page.Locator(selector)
}
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"elements": []}`)
}
var results []map[string]string
for i := 0; i < count; i++ {
el := locator.Nth(i)
tag, _ := el.Evaluate(`el => el.nodeName`, nil)
text, _ := el.TextContent()
html, _ := el.InnerHTML()
results = append(results, map[string]string{
"index": strconv.Itoa(i),
"tag": strings.ToLower(fmt.Sprintf("%v", tag)),
"text": text,
"html": html,
})
}
data, err := json.Marshal(results)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"elements": %s}`, string(data)))
}
func jsonString(s string) string {
b, _ := json.Marshal(s)
return string(b)
}

274
tui.go
View File

@@ -10,6 +10,7 @@ import (
"path"
"strconv"
"strings"
"time"
"github.com/gdamore/tcell/v2"
"github.com/rivo/tview"
@@ -21,7 +22,6 @@ func isFullScreenPageActive() bool {
}
var (
app *tview.Application
pages *tview.Pages
textArea *tview.TextArea
editArea *tview.TextArea
@@ -35,6 +35,8 @@ var (
renameWindow *tview.InputField
roleEditWindow *tview.InputField
shellInput *tview.InputField
confirmModal *tview.Modal
confirmPageName = "confirm"
fullscreenMode bool
positionVisible bool = true
scrollToEndEnabled bool = true
@@ -135,6 +137,55 @@ func setShellMode(enabled bool) {
}()
}
// showToast displays a temporary message in the topright corner.
// It autohides after 3 seconds and disappears when clicked.
func showToast(title, message string) {
sanitize := func(s string, maxLen int) string {
sanitized := strings.Map(func(r rune) rune {
if r < 32 && r != '\t' {
return -1
}
return r
}, s)
if len(sanitized) > maxLen {
sanitized = sanitized[:maxLen-3] + "..."
}
return sanitized
}
title = sanitize(title, 50)
message = sanitize(message, 197)
notification := tview.NewTextView().
SetTextAlign(tview.AlignCenter).
SetDynamicColors(true).
SetRegions(true).
SetText(fmt.Sprintf("[yellow]%s[-]\n", message)).
SetChangedFunc(func() {
app.Draw()
})
notification.SetTitleAlign(tview.AlignLeft).
SetBorder(true).
SetTitle(title)
// Wrap it in a fullscreen Flex to position it in the topright corner.
// Outer Flex (row) pushes content to the top; inner Flex (column) pushes to the right.
background := tview.NewFlex().SetDirection(tview.FlexRow).
AddItem(nil, 0, 1, false). // top spacer
AddItem(tview.NewFlex().SetDirection(tview.FlexColumn).
AddItem(nil, 0, 1, false). // left spacer
AddItem(notification, 40, 1, true), // notification width 40
5, 1, false) // notification height 5
// Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
pages.AddPage(pageName, background, true, true)
// Autodismiss after 3 seconds.
time.AfterFunc(3*time.Second, func() {
app.QueueUpdateDraw(func() {
if pages.HasPage(pageName) {
pages.RemovePage(pageName)
}
})
})
}
func init() {
// Start background goroutine to update model color cache
startModelColorUpdater()
@@ -195,6 +246,39 @@ func init() {
}
return event
})
confirmModal = tview.NewModal().
SetText("You are trying to send an empty message.\nIt makes sense if the last message in the chat is from you.\nAre you sure?").
AddButtons([]string{"Yes", "No"}).
SetButtonBackgroundColor(tcell.ColorBlack).
SetButtonTextColor(tcell.ColorWhite).
SetDoneFunc(func(buttonIndex int, buttonLabel string) {
if buttonLabel == "Yes" {
persona := cfg.UserRole
if cfg.WriteNextMsgAs != "" {
persona = cfg.WriteNextMsgAs
}
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: ""}
} // In both Yes and No, go back to the main page
pages.SwitchToPage("main") // or whatever your main page is named
})
confirmModal.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
if event.Key() == tcell.KeyRune {
switch event.Rune() {
case 'y', 'Y':
persona := cfg.UserRole
if cfg.WriteNextMsgAs != "" {
persona = cfg.WriteNextMsgAs
}
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: ""}
pages.SwitchToPage("main")
return nil
case 'n', 'N', 'x', 'X':
pages.SwitchToPage("main")
return nil
}
}
return event
})
textArea = tview.NewTextArea().
SetPlaceholder("input is multiline; press <Enter> to start the next line;\npress <Esc> to send the message.")
textArea.SetBorder(true).SetTitle("input")
@@ -292,9 +376,7 @@ func init() {
defer colorText()
editedMsg := editArea.GetText()
if editedMsg == "" {
if err := notifyUser("edit", "no edit provided"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("edit", "no edit provided")
pages.RemovePage(editMsgPage)
return nil
}
@@ -324,9 +406,7 @@ func init() {
case tcell.KeyEnter:
newRole := roleEditWindow.GetText()
if newRole == "" {
if err := notifyUser("edit", "no role provided"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("edit", "no role provided")
pages.RemovePage(roleEditPage)
return
}
@@ -353,9 +433,7 @@ func init() {
siInt, err := strconv.Atoi(si)
if err != nil {
logger.Error("failed to convert provided index", "error", err, "si", si)
if err := notifyUser("cancel", "no index provided, copying user input"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("cancel", "no index provided, copying user input")
if err := copyToClipboard(textArea.GetText()); err != nil {
logger.Error("failed to copy to clipboard", "error", err)
}
@@ -366,9 +444,7 @@ func init() {
if len(chatBody.Messages)-1 < selectedIndex || selectedIndex < 0 {
msg := "chosen index is out of bounds, will copy user input"
logger.Warn(msg, "index", selectedIndex)
if err := notifyUser("error", msg); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("error", msg)
if err := copyToClipboard(textArea.GetText()); err != nil {
logger.Error("failed to copy to clipboard", "error", err)
}
@@ -394,9 +470,7 @@ func init() {
}
previewLen := min(30, len(msgText))
notification := fmt.Sprintf("msg '%s' was copied to the clipboard", msgText[:previewLen])
if err := notifyUser("copied", notification); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("copied", notification)
hideIndexBar() // Hide overlay after copying
}
return nil
@@ -428,9 +502,7 @@ func init() {
logger.Error("failed to upsert chat", "error", err, "chat", currentChat)
}
notification := fmt.Sprintf("renamed chat to '%s'", activeChatName)
if err := notifyUser("renamed", notification); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("renamed", notification)
}
return event
})
@@ -540,9 +612,7 @@ func init() {
if scrollToEndEnabled {
status = "enabled"
}
if err := notifyUser("autoscroll", "Auto-scrolling "+status); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("autoscroll", "Auto-scrolling "+status)
updateStatusLine()
}
// Handle Alt+7 to toggle injectRole
@@ -559,9 +629,7 @@ func init() {
if thinkingCollapsed {
status = "collapsed"
}
if err := notifyUser("thinking", "Thinking blocks "+status); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("thinking", "Thinking blocks "+status)
return nil
}
// Handle Ctrl+T to toggle tool call/response visibility
@@ -573,9 +641,7 @@ func init() {
if toolCollapsed {
status = "collapsed"
}
if err := notifyUser("tools", "Tool calls/responses "+status); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("tools", "Tool calls/responses "+status)
return nil
}
if event.Key() == tcell.KeyRune && event.Rune() == 'i' && event.Modifiers()&tcell.ModAlt != 0 {
@@ -595,9 +661,7 @@ func init() {
// Check if there are no chats for this agent
if len(chatList) == 0 {
notification := "no chats found for agent: " + cfg.AssistantRole
if err := notifyUser("info", notification); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("info", notification)
return nil
}
chatMap := make(map[string]models.Chat)
@@ -615,9 +679,7 @@ func init() {
if event.Key() == tcell.KeyF2 && !botRespMode {
// regen last msg
if len(chatBody.Messages) == 0 {
if err := notifyUser("info", "no messages to regenerate"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("info", "no messages to regenerate")
return nil
}
chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1]
@@ -643,9 +705,7 @@ func init() {
return nil
}
if len(chatBody.Messages) == 0 {
if err := notifyUser("info", "no messages to delete"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("info", "no messages to delete")
return nil
}
chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1]
@@ -691,6 +751,7 @@ func init() {
if event.Key() == tcell.KeyF6 {
interruptResp = true
botRespMode = false
toolRunningMode = false
return nil
}
if event.Key() == tcell.KeyF7 {
@@ -703,9 +764,7 @@ func init() {
}
previewLen := min(30, len(msgText))
notification := fmt.Sprintf("msg '%s' was copied to the clipboard", msgText[:previewLen])
if err := notifyUser("copied", notification); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("copied", notification)
return nil
}
if event.Key() == tcell.KeyF8 {
@@ -719,9 +778,7 @@ func init() {
text := textView.GetText(false)
cb := codeBlockRE.FindAllString(text, -1)
if len(cb) == 0 {
if err := notifyUser("notify", "no code blocks in chat"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("notify", "no code blocks in chat")
return nil
}
table := makeCodeBlockTable(cb)
@@ -736,9 +793,7 @@ func init() {
// read files in chat_exports
filelist, err := os.ReadDir(exportDir)
if err != nil {
if err := notifyUser("failed to load exports", err.Error()); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("failed to load exports", err.Error())
return nil
}
fli := []string{}
@@ -768,9 +823,7 @@ func init() {
logger.Error("failed to export chat;", "error", err, "chat_name", activeChatName)
return nil
}
if err := notifyUser("exported chat", "chat: "+activeChatName+" was exported"); err != nil {
logger.Error("failed to send notification", "error", err)
}
showToast("exported chat", "chat: "+activeChatName+" was exported")
return nil
}
if event.Key() == tcell.KeyCtrlP {
@@ -809,9 +862,7 @@ func init() {
labels, err := initSysCards()
if err != nil {
logger.Error("failed to read sys dir", "error", err)
if err := notifyUser("error", "failed to read: "+cfg.SysDir); err != nil {
logger.Debug("failed to notify user", "error", err)
}
showToast("error", "failed to read: "+cfg.SysDir)
return nil
}
at := makeAgentTable(labels)
@@ -824,6 +875,7 @@ func init() {
if event.Key() == tcell.KeyCtrlK {
// add message from tools
cfg.ToolUse = !cfg.ToolUse
updateToolCapabilities()
updateStatusLine()
return nil
}
@@ -835,21 +887,27 @@ func init() {
if err != nil {
logger.Error("failed to open attached image", "path", lastImg, "error", err)
// Fall back to showing agent image
loadImage()
if err := loadImage(); err != nil {
logger.Warn("failed to load agent image", "error", err)
}
} else {
defer file.Close()
img, _, err := image.Decode(file)
if err != nil {
logger.Error("failed to decode attached image", "path", lastImg, "error", err)
// Fall back to showing agent image
loadImage()
if err := loadImage(); err != nil {
logger.Warn("failed to load agent image", "error", err)
}
} else {
imgView.SetImage(img)
}
}
} else {
// No attached image, show agent image as before
loadImage()
if err := loadImage(); err != nil {
logger.Warn("failed to load agent image", "error", err)
}
}
pages.AddPage(imgPage, imgView, true, true)
return nil
@@ -861,9 +919,7 @@ func init() {
if err != nil {
msg := "failed to inference user speech; error:" + err.Error()
logger.Error(msg)
if err := notifyUser("stt error", msg); err != nil {
logger.Error("failed to notify user", "error", err)
}
showToast("stt error", msg)
return nil
}
if userSpeech != "" {
@@ -921,6 +977,17 @@ func init() {
showBotRoleSelectionPopup()
return nil
}
// INFO: shutdown
if event.Key() == tcell.KeyCtrlC {
logger.Info("caught Ctrl+C via tcell event")
go func() {
if err := pwShutDown(); err != nil {
logger.Error("shutdown failed", "err", err)
}
app.Stop()
}()
return nil // swallow the event
}
if event.Key() == tcell.KeyCtrlG {
// cfg.RAGDir is the directory with files to use with RAG
// rag load
@@ -932,26 +999,20 @@ func init() {
// Create the RAG directory if it doesn't exist
if mkdirErr := os.MkdirAll(cfg.RAGDir, 0755); mkdirErr != nil {
logger.Error("failed to create RAG directory", "dir", cfg.RAGDir, "error", mkdirErr)
if notifyerr := notifyUser("failed to create RAG directory", mkdirErr.Error()); notifyerr != nil {
logger.Error("failed to send notification", "error", notifyerr)
}
showToast("failed to create RAG directory", mkdirErr.Error())
return nil
}
// Now try to read the directory again after creating it
files, err = os.ReadDir(cfg.RAGDir)
if err != nil {
logger.Error("failed to read dir after creating it", "dir", cfg.RAGDir, "error", err)
if notifyerr := notifyUser("failed to read RAG directory", err.Error()); notifyerr != nil {
logger.Error("failed to send notification", "error", notifyerr)
}
showToast("failed to read RAG directory", err.Error())
return nil
}
} else {
// Other error (permissions, etc.)
logger.Error("failed to read dir", "dir", cfg.RAGDir, "error", err)
if notifyerr := notifyUser("failed to open RAG files dir", err.Error()); notifyerr != nil {
logger.Error("failed to send notification", "error", notifyerr)
}
showToast("failed to open RAG files dir", err.Error())
return nil
}
}
@@ -981,9 +1042,7 @@ func init() {
if event.Key() == tcell.KeyRune && event.Modifiers() == tcell.ModAlt && event.Rune() == '9' {
// Warm up (load) the currently selected model
go warmUpModel()
if err := notifyUser("model warmup", "loading model: "+chatBody.Model); err != nil {
logger.Debug("failed to notify user", "error", err)
}
showToast("model warmup", "loading model: "+chatBody.Model)
return nil
}
// cannot send msg in editMode or botRespMode
@@ -997,41 +1056,54 @@ func init() {
return nil
}
msgText := textArea.GetText()
nl := "\n\n" // keep empty lines between messages
prevText := textView.GetText(true)
persona := cfg.UserRole
// strings.LastIndex()
// newline is not needed is prev msg ends with one
if strings.HasSuffix(prevText, nl) {
nl = ""
} else if strings.HasSuffix(prevText, "\n") {
nl = "\n" // only one newline, add another
}
if msgText != "" {
nl := "\n\n" // keep empty lines between messages
prevText := textView.GetText(true)
persona := cfg.UserRole
// strings.LastIndex()
// newline is not needed is prev msg ends with one
if strings.HasSuffix(prevText, nl) {
nl = ""
} else if strings.HasSuffix(prevText, "\n") {
nl = "\n" // only one newline, add another
// as what char user sends msg?
if cfg.WriteNextMsgAs != "" {
persona = cfg.WriteNextMsgAs
}
if msgText != "" {
// as what char user sends msg?
if cfg.WriteNextMsgAs != "" {
persona = cfg.WriteNextMsgAs
// check if plain text
if !injectRole {
matches := roleRE.FindStringSubmatch(msgText)
if len(matches) > 1 {
persona = matches[1]
msgText = strings.TrimLeft(msgText[len(matches[0]):], " ")
}
// check if plain text
if !injectRole {
matches := roleRE.FindStringSubmatch(msgText)
if len(matches) > 1 {
persona = matches[1]
msgText = strings.TrimLeft(msgText[len(matches[0]):], " ")
}
}
// add user icon before user msg
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
nl, len(chatBody.Messages), persona, msgText)
textArea.SetText("", true)
if scrollToEndEnabled {
textView.ScrollToEnd()
}
colorText()
}
// go chatRound(msgText, persona, textView, false, false)
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
// add user icon before user msg
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
nl, len(chatBody.Messages), persona, msgText)
textArea.SetText("", true)
if scrollToEndEnabled {
textView.ScrollToEnd()
}
colorText()
} else {
pages.AddPage(confirmPageName, confirmModal, true, true)
return nil
}
// go chatRound(msgText, persona, textView, false, false)
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
return nil
}
if event.Key() == tcell.KeyTab {
currentF := app.GetFocus()
if currentF == textArea {
currentText := textArea.GetText()
atIndex := strings.LastIndex(currentText, "@")
if atIndex >= 0 {
filter := currentText[atIndex+1:]
showTextAreaFileCompletionPopup(filter)
}
}
return nil
}