14 Commits

Author SHA1 Message Date
Grail Finder
3389b1d83b Fix: linter complaints 2026-03-02 11:39:55 +03:00
Grail Finder
4f6000a43a Enha: check if model has vision before giving it vision tools 2026-03-02 11:25:20 +03:00
Grail Finder
9ba46b40cc Feat: screencapture for completion 2026-03-02 11:12:04 +03:00
Grail Finder
5bb456272e Feat: capture window (screenshot) 2026-03-02 10:33:41 +03:00
Grail Finder
8999f48fb9 Fix (completion): handle multiple images in history 2026-03-02 09:23:22 +03:00
Grail Finder
b2f280a7f1 Feat: read img for completion 2026-03-02 07:46:08 +03:00
Grail Finder
65cbd5d6a6 Fix (ctrl+v): trim loaded mark from the model 2026-03-02 07:19:21 +03:00
Grail Finder
caac1d397a Feat: read img tool for chat endpoint 2026-03-02 07:12:28 +03:00
Grail Finder
742f1ca838 Enha: modal affirmation popup on sending empty msg 2026-03-01 16:21:18 +03:00
Grail Finder
e36bade353 Fix: escape with empty textarea not generating response 2026-03-01 13:33:25 +03:00
Grail Finder
01d8bcdbf5 Enha: avoid \n\n in tool collapse 2026-03-01 12:28:23 +03:00
Grail Finder
f6a395bce9 Fix: todo_update 2026-03-01 12:16:17 +03:00
Grail Finder
dc34c63256 Feat: handle llm's cd use 2026-03-01 11:44:43 +03:00
Grail Finder
cdfccf9a24 Enha (llama.cpp): show loaded model on startup 2026-03-01 08:22:02 +03:00
10 changed files with 728 additions and 161 deletions

108
bot.go
View File

@@ -379,22 +379,22 @@ func fetchLCPModels() ([]string, error) {
// fetchLCPModelsWithLoadStatus returns models with "(loaded)" indicator for loaded models
func fetchLCPModelsWithLoadStatus() ([]string, error) {
models, err := fetchLCPModelsWithStatus()
modelList, err := fetchLCPModelsWithStatus()
if err != nil {
return nil, err
}
result := make([]string, 0, len(models.Data))
result := make([]string, 0, len(modelList.Data))
li := 0 // loaded index
for i, m := range models.Data {
for i, m := range modelList.Data {
modelName := m.ID
if m.Status.Value == "loaded" {
modelName = "(loaded) " + modelName
modelName = models.LoadedMark + modelName
li = i
}
result = append(result, modelName)
}
if li == 0 {
return result, nil // no loaded models
return result, nil // no loaded modelList
}
loadedModel := result[li]
result = append(result[:li], result[li+1:]...)
@@ -433,6 +433,33 @@ func isModelLoaded(modelID string) (bool, error) {
return false, nil
}
func ModelHasVision(api, modelID string) bool {
switch {
case strings.Contains(api, "deepseek"):
return false
case strings.Contains(api, "openrouter"):
resp, err := http.Get("https://openrouter.ai/api/v1/models")
if err != nil {
logger.Warn("failed to fetch OR models for vision check", "error", err)
return false
}
defer resp.Body.Close()
orm := &models.ORModels{}
if err := json.NewDecoder(resp.Body).Decode(orm); err != nil {
logger.Warn("failed to decode OR models for vision check", "error", err)
return false
}
return orm.HasVision(modelID)
default:
models, err := fetchLCPModelsWithStatus()
if err != nil {
logger.Warn("failed to fetch LCP models for vision check", "error", err)
return false
}
return models.HasVision(modelID)
}
}
// monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded.
func monitorModelLoad(modelID string) {
go func() {
@@ -1174,17 +1201,59 @@ func findCall(msg, toolCall string) bool {
toolRunningMode = false
toolMsg := string(resp)
logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolMsg)
// Create tool response message with the proper tool_call_id
// Mark shell commands as always visible
isShellCommand := fc.Name == "execute_command"
toolResponseMsg := models.RoleMsg{
// Check if response is multimodal content (image)
var toolResponseMsg models.RoleMsg
if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) {
// Parse multimodal content response
multimodalResp := models.MultimodalToolResp{}
if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" {
// Create RoleMsg with ContentParts
var contentParts []any
for _, part := range multimodalResp.Parts {
partType := part["type"]
switch partType {
case "text":
contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]})
case "image_url":
contentParts = append(contentParts, models.ImageContentPart{
Type: "image_url",
ImageURL: struct {
URL string `json:"url"`
}{URL: part["url"]},
})
default:
continue
}
}
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole,
ContentParts: contentParts,
HasContentParts: true,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
}
} else {
// Fallback to regular content
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole,
Content: toolMsg,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
}
}
} else {
toolResponseMsg = models.RoleMsg{
Role: cfg.ToolRole,
Content: toolMsg,
ToolCallID: lastToolCall.ID,
IsShellCommand: isShellCommand,
}
}
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
// Clear the stored tool call ID after using it
@@ -1207,11 +1276,11 @@ func chatToTextSlice(messages []models.RoleMsg, showSys bool) []string {
// This is a tool call indicator - show collapsed
if toolCollapsed {
toolName := messages[i].ToolCall.Name
resp[i] = fmt.Sprintf("%s\n[yellow::i][tool call: %s (press Ctrl+T to expand)][-:-:-]\n", icon, toolName)
resp[i] = strings.ReplaceAll(fmt.Sprintf("%s\n%s\n[yellow::i][tool call: %s (press Ctrl+T to expand)][-:-:-]\n", icon, messages[i].GetText(), toolName), "\n\n", "\n")
} else {
// Show full tool call info
toolName := messages[i].ToolCall.Name
resp[i] = fmt.Sprintf("%s\n%s\n[yellow::i][tool call: %s][-:-:-]\nargs: %s\nid: %s\n", icon, messages[i].GetText(), toolName, messages[i].ToolCall.Args, messages[i].ToolCall.ID)
resp[i] = strings.ReplaceAll(fmt.Sprintf("%s\n%s\n[yellow::i][tool call: %s][-:-:-]\nargs: %s\nid: %s\n", icon, messages[i].GetText(), toolName, messages[i].ToolCall.Args, messages[i].ToolCall.ID), "\n\n", "\n")
}
continue
}
@@ -1323,11 +1392,28 @@ func updateModelLists() {
}
// if llama.cpp started after gf-lt?
localModelsMu.Lock()
LocalModels, err = fetchLCPModels()
LocalModels, err = fetchLCPModelsWithLoadStatus()
localModelsMu.Unlock()
if err != nil {
logger.Warn("failed to fetch llama.cpp models", "error", err)
}
// set already loaded model in llama.cpp
if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") {
localModelsMu.Lock()
defer localModelsMu.Unlock()
for i := range LocalModels {
if strings.Contains(LocalModels[i], models.LoadedMark) {
m := strings.TrimPrefix(LocalModels[i], models.LoadedMark)
cfg.CurrentModel = m
chatBody.Model = m
cachedModelColor = "green"
updateStatusLine()
UpdateToolCapabilities()
app.Draw()
return
}
}
}
}
func refreshLocalModelsIfEmpty() {

View File

@@ -27,7 +27,6 @@ func startModelColorUpdater() {
go func() {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
// Initial check
updateCachedModelColor()
for range ticker.C {
@@ -42,7 +41,6 @@ func updateCachedModelColor() {
cachedModelColor = "orange"
return
}
// Check if model is loaded
loaded, err := isModelLoaded(chatBody.Model)
if err != nil {

60
llm.go
View File

@@ -3,7 +3,6 @@ package main
import (
"bytes"
"encoding/json"
"errors"
"gf-lt/models"
"io"
"strings"
@@ -119,25 +118,22 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
logger.Debug("formmsg lcpcompletion", "link", cfg.CurrentAPI)
localImageAttachmentPath := imageAttachmentPath
var multimodalData []string
if msg != "" { // otherwise let the bot to continue
var newMsg models.RoleMsg
if localImageAttachmentPath != "" {
newMsg = models.NewMultimodalMsg(role, []any{})
newMsg.AddTextPart(msg)
imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
if err != nil {
logger.Error("failed to create image URL from path for completion",
"error", err, "path", localImageAttachmentPath)
return nil, err
}
// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
parts := strings.SplitN(imageURL, ",", 2)
if len(parts) == 2 {
multimodalData = append(multimodalData, parts[1])
} else {
logger.Error("invalid image data URL format", "url", imageURL)
return nil, errors.New("invalid image data URL format")
}
newMsg.AddImagePart(imageURL, localImageAttachmentPath)
imageAttachmentPath = "" // Clear the attachment after use
} else { // not a multimodal msg or image passed in tool call
newMsg = models.RoleMsg{Role: role, Content: msg}
}
if msg != "" { // otherwise let the bot to continue
newMsg := models.RoleMsg{Role: role, Content: msg}
newMsg = *processMessageTag(&newMsg)
chatBody.Messages = append(chatBody.Messages, newMsg)
}
@@ -146,22 +142,40 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
}
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages)
// Build prompt and extract images inline as we process each message
messages := make([]string, len(filteredMessages))
for i := range filteredMessages {
messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt()
m := stripThinkingFromMsg(&filteredMessages[i])
messages[i] = m.ToPrompt()
// Extract images from this message and add marker inline
if len(m.ContentParts) > 0 {
for _, part := range m.ContentParts {
var imgURL string
// Check for struct type
if imgPart, ok := part.(models.ImageContentPart); ok {
imgURL = imgPart.ImageURL.URL
} else if partMap, ok := part.(map[string]any); ok {
// Check for map type (from JSON unmarshaling)
if partType, exists := partMap["type"]; exists && partType == "image_url" {
if imgURLMap, ok := partMap["image_url"].(map[string]any); ok {
if url, ok := imgURLMap["url"].(string); ok {
imgURL = url
}
}
}
}
if imgURL != "" {
// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
parts := strings.SplitN(imgURL, ",", 2)
if len(parts) == 2 {
multimodalData = append(multimodalData, parts[1])
messages[i] += " <__media__>"
}
}
}
}
}
prompt := strings.Join(messages, "\n")
// Add multimodal media markers to the prompt text when multimodal data is present
// This is required by llama.cpp multimodal models so they know where to insert media
if len(multimodalData) > 0 {
// Add a media marker for each item in the multimodal data
var sb strings.Builder
sb.WriteString(prompt)
for range multimodalData {
sb.WriteString(" <__media__>") // llama.cpp default multimodal marker
}
prompt = sb.String()
}
// needs to be after <__media__> if there are images
if !resume {
botMsgStart := "\n" + botPersona + ":\n"

13
models/consts.go Normal file
View File

@@ -0,0 +1,13 @@
package models
const (
LoadedMark = "(loaded) "
ToolRespMultyType = "multimodel_content"
)
type APIType int
const (
APITypeChat APIType = iota
APITypeCompletion
)

View File

@@ -391,7 +391,6 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
if err != nil {
return "", err
}
// Determine the image format based on file extension
var mimeType string
switch {
@@ -408,10 +407,8 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
default:
mimeType = "image/jpeg" // default
}
// Encode to base64
encoded := base64.StdEncoding.EncodeToString(data)
// Create data URL
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
}
@@ -519,24 +516,6 @@ type OpenAIReq struct {
// ===
// type LLMModels struct {
// Object string `json:"object"`
// Data []struct {
// ID string `json:"id"`
// Object string `json:"object"`
// Created int `json:"created"`
// OwnedBy string `json:"owned_by"`
// Meta struct {
// VocabType int `json:"vocab_type"`
// NVocab int `json:"n_vocab"`
// NCtxTrain int `json:"n_ctx_train"`
// NEmbd int `json:"n_embd"`
// NParams int64 `json:"n_params"`
// Size int64 `json:"size"`
// } `json:"meta"`
// } `json:"data"`
// }
type LlamaCPPReq struct {
Model string `json:"model"`
Stream bool `json:"stream"`
@@ -629,6 +608,20 @@ func (lcp *LCPModels) ListModels() []string {
return resp
}
func (lcp *LCPModels) HasVision(modelID string) bool {
for _, m := range lcp.Data {
if m.ID == modelID {
args := m.Status.Args
for i := 0; i < len(args)-1; i++ {
if args[i] == "--mmproj" {
return true
}
}
}
}
return false
}
type ResponseStats struct {
Tokens int
Duration float64
@@ -642,9 +635,7 @@ type ChatRoundReq struct {
Resume bool
}
type APIType int
const (
APITypeChat APIType = iota
APITypeCompletion
)
type MultimodalToolResp struct {
Type string `json:"type"`
Parts []map[string]string `json:"parts"`
}

View File

@@ -172,3 +172,16 @@ func (orm *ORModels) ListModels(free bool) []string {
}
return resp
}
func (orm *ORModels) HasVision(modelID string) bool {
for i := range orm.Data {
if orm.Data[i].ID == modelID {
for _, mod := range orm.Data[i].Architecture.InputModalities {
if mod == "image" {
return true
}
}
}
}
return false
}

View File

@@ -1,6 +1,7 @@
package main
import (
"gf-lt/models"
"slices"
"strings"
@@ -51,7 +52,7 @@ func showModelSelectionPopup() {
// Find the current model index to set as selected
currentModelIndex := -1
for i, model := range modelList {
if strings.TrimPrefix(model, "(loaded) ") == chatBody.Model {
if strings.TrimPrefix(model, models.LoadedMark) == chatBody.Model {
currentModelIndex = i
}
modelListWidget.AddItem(model, "", 0, nil)
@@ -61,7 +62,7 @@ func showModelSelectionPopup() {
modelListWidget.SetCurrentItem(currentModelIndex)
}
modelListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
modelName := strings.TrimPrefix(mainText, "(loaded) ")
modelName := strings.TrimPrefix(mainText, models.LoadedMark)
chatBody.Model = modelName
cfg.CurrentModel = chatBody.Model
pages.RemovePage("modelSelectionPopup")
@@ -142,6 +143,7 @@ func showAPILinkSelectionPopup() {
apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
// Update the API in config
cfg.CurrentAPI = mainText
UpdateToolCapabilities()
// Update model list based on new API
// Helper function to get model list for a given API (same as in props_table.go)
getModelListForAPI := func(api string) []string {
@@ -159,8 +161,9 @@ func showAPILinkSelectionPopup() {
newModelList := getModelListForAPI(cfg.CurrentAPI)
// Ensure chatBody.Model is in the new list; if not, set to first available model
if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) {
chatBody.Model = newModelList[0]
chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark)
cfg.CurrentModel = chatBody.Model
UpdateToolCapabilities()
}
pages.RemovePage("apiLinkSelectionPopup")
app.SetFocus(textArea)

View File

@@ -1,6 +1,6 @@
{
"sys_prompt": "You are an expert software engineering assistant. Your goal is to help users with coding tasks, debugging, refactoring, and software development.\n\n## Core Principles\n1. **Security First**: Never expose secrets, keys, or credentials. Never commit sensitive data.\n2. **No Git Actions**: You can READ git info (status, log, diff) for context, but NEVER perform git actions (commit, add, push, checkout, reset, rm, etc.). Let the user handle all git operations.\n3. **Explore Before Execute**: Always understand the codebase structure before making changes.\n4. **Follow Conventions**: Match existing code style, patterns, and frameworks used in the project.\n5. **Be Concise**: Minimize output tokens while maintaining quality. Avoid unnecessary explanations.\n\n## Workflow for Complex Tasks\nFor multi-step tasks, ALWAYS use the todo system to track progress:\n\n1. **Create Todo List**: At the start of complex tasks, use `todo_create` to break down work into actionable items.\n2. **Update Progress**: Mark items as `in_progress` when working on them, and `completed` when done.\n3. **Check Status**: Use `todo_read` to review your progress.\n\nExample workflow:\n- User: \"Add user authentication to this app\"\n- You: Create todos: [\"Analyze existing auth structure\", \"Check frameworks in use\", \"Implement auth middleware\", \"Add login endpoints\", \"Test implementation\"]\n\n## Task Execution Flow\n\n### Phase 1: Exploration (Always First)\n- Use `file_list` to understand directory structure (path defaults to FilePickerDir if not specified)\n- Use `file_read` to examine relevant files (paths are relative to FilePickerDir unless starting with `/`)\n- Use `execute_command` with `grep`/`find` to search for patterns\n- Check `README` or documentation files\n- Identify: frameworks, conventions, testing approach\n- **Git reads allowed**: You may use `git status`, `git log`, `git diff` for context, but only to inform your work\n- **Path handling**: Relative paths are resolved against FilePickerDir (configurable via Alt+O). Use absolute paths (starting with `/`) to bypass FilePickerDir.\n\n### Phase 2: Planning\n- For complex tasks: create todo items\n- Identify files that need modification\n- Plan your approach following existing patterns\n\n### Phase 3: Implementation\n- Make changes using appropriate file tools\n- Prefer `file_write` for new files, `file_read` then modify for existing files\n- Follow existing code style exactly\n- Use existing libraries and utilities\n\n### Phase 4: Verification\n- Run tests if available (check for test scripts)\n- Run linting/type checking commands\n- Verify changes work as expected\n\n### Phase 5: Completion\n- Update todos to `completed`\n- Provide concise summary of changes\n- Reference specific file paths and line numbers when relevant\n- **DO NOT commit changes** - inform user what was done so they can review and commit themselves\n\n## Tool Usage Guidelines\n\n**File Operations**:\n- `file_read`: Read before editing. Use for understanding code.\n- `file_write`: Overwrite file content completely.\n- `file_write_append`: Add to end of file.\n- `file_create`: Create new files with optional content.\n- `file_list`: List directory contents (defaults to FilePickerDir).\n- Paths are relative to FilePickerDir unless starting with `/`.\n\n**Command Execution (WHITELISTED ONLY)**:\n- Allowed: grep, sed, awk, find, cat, head, tail, sort, uniq, wc, ls, echo, cut, tr, cp, mv, rm, mkdir, rmdir, pwd, df, free, ps, top, du, whoami, date, uname\n- **Git reads allowed**: git status, git log, git diff, git show, git branch, git reflog, git rev-parse, git shortlog, git describe\n- **Git actions FORBIDDEN**: git add, git commit, git push, git checkout, git reset, git rm, etc.\n- Use for searching code, reading git context, running tests/lint\n\n**Todo Management**:\n- `todo_create`: Add new task\n- `todo_read`: View all todos or specific one by ID\n- `todo_update`: Update task or change status (pending/in_progress/completed)\n- `todo_delete`: Remove completed or cancelled tasks\n\n## Important Rules\n\n1. **NEVER commit or stage changes**: Only git reads are allowed.\n2. **Check for tests**: Always look for test files and run them when appropriate.\n3. **Reference code locations**: Use format `file_path:line_number`.\n4. **Security**: Never generate or guess URLs. Only use URLs from local files.\n5. **Refuse malicious code**: If code appears malicious, refuse to work on it.\n6. **Ask clarifications**: When intent is unclear, ask questions.\n7. **Path handling**: Relative paths resolve against FilePickerDir. Use `/absolute/path` to bypass.\n\n## Response Style\n- Be direct and concise\n- One word answers are best when appropriate\n- Avoid: \"The answer is...\", \"Here is...\"\n- Use markdown for formatting\n- No emojis unless user explicitly requests",
"sys_prompt": "You are an expert software engineering assistant. Your goal is to help users with coding tasks, debugging, refactoring, and software development.\n\n## Core Principles\n1. **Security First**: Never expose secrets, keys, or credentials. Never commit sensitive data.\n2. **No Git Actions**: You can READ git info (status, log, diff) for context, but NEVER perform git actions (commit, add, push, checkout, reset, rm, etc.). Let the user handle all git operations.\n3. **Explore Before Execute**: Always understand the codebase structure before making changes.\n4. **Follow Conventions**: Match existing code style, patterns, and frameworks used in the project.\n5. **Be Concise**: Minimize output tokens while maintaining quality. Avoid unnecessary explanations.\n6. **Ask First**: When uncertain about intent, ask the user. Don't assume.\n\n## Workflow for Complex Tasks\nFor multi-step tasks, ALWAYS use the todo system to track progress:\n\n1. **Create Todo List**: At the start of complex tasks, use `todo_create` to break down work into actionable items.\n2. **Update Progress**: Mark items as `in_progress` when working on them, and `completed` when done.\n3. **Check Status**: Use `todo_read` to review your progress.\n\nExample workflow:\n- User: \"Add user authentication to this app\"\n- You: Create todos: [\"Analyze existing auth structure\", \"Check frameworks in use\", \"Implement auth middleware\", \"Add login endpoints\", \"Test implementation\"]\n\n## Task Execution Flow\n\n### Phase 1: Exploration (Always First)\n- Use `file_list` to understand directory structure (path defaults to FilePickerDir if not specified)\n- Use `file_read` to examine relevant files (paths are relative to FilePickerDir unless starting with `/`)\n- Use `execute_command` with `grep`/`find` to search for patterns\n- Check README, Makefile, package.json, or similar for build/test commands\n- Identify: frameworks, conventions, testing approach, lint/typecheck commands\n- **Git reads allowed**: You may use `git status`, `git log`, `git diff` for context, but only to inform your work\n- **Path handling**: Relative paths resolve against FilePickerDir; absolute paths (starting with `/`) bypass it\n\n### Phase 2: Planning\n- For complex tasks: create todo items\n- Identify files that need modification\n- Plan your approach following existing patterns\n\n### Phase 3: Implementation\n- Make changes using appropriate file tools\n- Prefer `file_write` for new files, `file_read` then edit for existing files\n- Follow existing code style exactly\n- Use existing libraries and utilities\n\n### Phase 4: Verification\n- Run tests if available (check for test scripts in README/Makefile)\n- Run linting/type checking commands\n- Verify changes work as expected\n\n### Phase 5: Completion\n- Update todos to `completed`\n- Provide concise summary of changes\n- Reference specific file paths and line numbers when relevant\n- **DO NOT commit changes** - inform user what was done so they can review and commit themselves\n\n## Command Execution\n- Use `execute_command` with a single string containing command and arguments (e.g., `go run main.go`, `ls -la`, `cd /tmp`)\n- Use `cd /path` to change the working directory for file operations",
"role": "CodingAssistant",
"filepath": "sysprompts/coding_assistant.json",
"first_msg": "Hello! I'm your coding assistant. I can help you with software engineering tasks like writing code, debugging, refactoring, and exploring codebases. I work best when you give me specific tasks, and for complex work, I'll create a todo list to track my progress. What would you like to work on?"
"first_msg": "Hello! I'm your coding assistant. Give me a specific task and I'll get started. For complex work, I'll track progress with todos."
}

503
tools.go
View File

@@ -85,6 +85,11 @@ Your current tools:
"when_to_use": "when asked to read the content of a file"
},
{
"name":"file_read_image",
"args": ["path"],
"when_to_use": "when asked to read or view an image file"
},
{
"name":"file_write",
"args": ["path", "content"],
"when_to_use": "when needed to overwrite content to a file"
@@ -170,8 +175,36 @@ After that you are free to respond to the user.
webAgentsOnce sync.Once
)
var windowToolSysMsg = `
Additional window tools (available only if xdotool and maim are installed):
[
{
"name":"list_windows",
"args": [],
"when_to_use": "when asked to list visible windows; returns map of window ID to window name"
},
{
"name":"capture_window",
"args": ["window"],
"when_to_use": "when asked to take a screenshot of a specific window; saves to /tmp; window can be ID or name substring; returns file path"
},
{
"name":"capture_window_and_view",
"args": ["window"],
"when_to_use": "when asked to take a screenshot of a specific window and show it; saves to /tmp and returns image for viewing; window can be ID or name substring"
}
]
`
var WebSearcher searcher.WebSurfer
var (
windowToolsAvailable bool
xdotoolPath string
maimPath string
modelHasVision bool
)
func init() {
sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
if err != nil {
@@ -181,6 +214,47 @@ func init() {
if err := rag.Init(cfg, logger, store); err != nil {
logger.Warn("failed to init rag; rag_search tool will not be available", "error", err)
}
checkWindowTools()
registerWindowTools()
}
func checkWindowTools() {
xdotoolPath, _ = exec.LookPath("xdotool")
maimPath, _ = exec.LookPath("maim")
windowToolsAvailable = xdotoolPath != "" && maimPath != ""
if windowToolsAvailable {
logger.Info("window tools available: xdotool and maim found")
} else {
if xdotoolPath == "" {
logger.Warn("xdotool not found, window listing tools will not be available")
}
if maimPath == "" {
logger.Warn("maim not found, window capture tools will not be available")
}
}
}
func UpdateToolCapabilities() {
if !cfg.ToolUse {
return
}
modelHasVision = false
if cfg == nil || cfg.CurrentAPI == "" {
logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil")
registerWindowTools()
return
}
prevHasVision := modelHasVision
modelHasVision = ModelHasVision(cfg.CurrentAPI, cfg.CurrentModel)
if modelHasVision {
logger.Info("model has vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
} else {
logger.Info("model does not have vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
if windowToolsAvailable && !prevHasVision && !modelHasVision {
_ = notifyUser("window tools", "Window capture-and-view unavailable: model lacks vision support")
}
}
registerWindowTools()
}
// getWebAgentClient returns a singleton AgentClient for web agents.
@@ -469,6 +543,43 @@ func fileRead(args map[string]string) []byte {
return jsonResult
}
func fileReadImage(args map[string]string) []byte {
path, ok := args["path"]
if !ok || path == "" {
msg := "path not provided to file_read_image tool"
logger.Error(msg)
return []byte(msg)
}
path = resolvePath(path)
dataURL, err := models.CreateImageURLFromPath(path)
if err != nil {
msg := "failed to read image; error: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
// result := map[string]any{
// "type": "multimodal_content",
// "parts": []map[string]string{
// {"type": "text", "text": "Image at " + path},
// {"type": "image_url", "url": dataURL},
// },
// }
result := models.MultimodalToolResp{
Type: "multimodal_content",
Parts: []map[string]string{
{"type": "text", "text": "Image at " + path},
{"type": "image_url", "url": dataURL},
},
}
jsonResult, err := json.Marshal(result)
if err != nil {
msg := "failed to marshal result; error: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return jsonResult
}
func fileWrite(args map[string]string) []byte {
path, ok := args["path"]
if !ok || path == "" {
@@ -519,21 +630,17 @@ func fileEdit(args map[string]string) []byte {
return []byte(msg)
}
path = resolvePath(path)
oldString, ok := args["oldString"]
if !ok || oldString == "" {
msg := "oldString not provided to file_edit tool"
logger.Error(msg)
return []byte(msg)
}
newString, ok := args["newString"]
if !ok {
newString = ""
}
lineNumberStr, hasLineNumber := args["lineNumber"]
// Read file content
content, err := os.ReadFile(path)
if err != nil {
@@ -541,10 +648,8 @@ func fileEdit(args map[string]string) []byte {
logger.Error(msg)
return []byte(msg)
}
fileContent := string(content)
var replacementCount int
if hasLineNumber && lineNumberStr != "" {
// Line-number based edit
lineNum, err := strconv.Atoi(lineNumberStr)
@@ -579,13 +684,11 @@ func fileEdit(args map[string]string) []byte {
fileContent = strings.ReplaceAll(fileContent, oldString, newString)
replacementCount = strings.Count(fileContent, newString)
}
if err := os.WriteFile(path, []byte(fileContent), 0644); err != nil {
msg := "failed to write file: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
msg := fmt.Sprintf("file edited successfully at %s (%d replacement(s))", path, replacementCount)
return []byte(msg)
}
@@ -765,45 +868,31 @@ func listDirectory(path string) ([]string, error) {
// Command Execution Tool
func executeCommand(args map[string]string) []byte {
command, ok := args["command"]
if !ok || command == "" {
commandStr := args["command"]
if commandStr == "" {
msg := "command not provided to execute_command tool"
logger.Error(msg)
return []byte(msg)
}
// Get arguments - handle both single arg and multiple args
var cmdArgs []string
if args["args"] != "" {
// If args is provided as a single string, split by spaces
cmdArgs = strings.Fields(args["args"])
} else {
// If individual args are provided, collect them
argNum := 1
for {
argKey := fmt.Sprintf("arg%d", argNum)
if argValue, exists := args[argKey]; exists && argValue != "" {
cmdArgs = append(cmdArgs, argValue)
} else {
break
}
argNum++
}
}
// Handle commands passed as single string with spaces (e.g., "go run main.go")
// Handle commands passed as single string with spaces (e.g., "go run main.go" or "cd /tmp")
// Split into base command and arguments
if strings.Contains(command, " ") {
parts := strings.Fields(command)
baseCmd := parts[0]
extraArgs := parts[1:]
// Prepend extra args to cmdArgs
cmdArgs = append(extraArgs, cmdArgs...)
command = baseCmd
parts := strings.Fields(commandStr)
if len(parts) == 0 {
msg := "command not provided to execute_command tool"
logger.Error(msg)
return []byte(msg)
}
command := parts[0]
cmdArgs := parts[1:]
if !isCommandAllowed(command, cmdArgs...) {
msg := fmt.Sprintf("command '%s' is not allowed", command)
logger.Error(msg)
return []byte(msg)
}
// Special handling for cd command - update FilePickerDir
if command == "cd" {
return handleCdCommand(cmdArgs)
}
// Execute with timeout for safety
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
@@ -817,12 +906,58 @@ func executeCommand(args map[string]string) []byte {
}
// Check if output is empty and return success message
if len(output) == 0 {
successMsg := fmt.Sprintf("command '%s %s' executed successfully and exited with code 0", command, strings.Join(cmdArgs, " "))
successMsg := fmt.Sprintf("command '%s' executed successfully and exited with code 0", commandStr)
return []byte(successMsg)
}
return output
}
// handleCdCommand handles the cd command to update FilePickerDir
func handleCdCommand(args []string) []byte {
var targetDir string
if len(args) == 0 {
// cd with no args goes to home directory
homeDir, err := os.UserHomeDir()
if err != nil {
msg := "cd: cannot determine home directory: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
targetDir = homeDir
} else {
targetDir = args[0]
}
// Resolve relative paths against current FilePickerDir
if !filepath.IsAbs(targetDir) {
targetDir = filepath.Join(cfg.FilePickerDir, targetDir)
}
// Verify the directory exists
info, err := os.Stat(targetDir)
if err != nil {
msg := "cd: " + targetDir + ": " + err.Error()
logger.Error(msg)
return []byte(msg)
}
if !info.IsDir() {
msg := "cd: " + targetDir + ": not a directory"
logger.Error(msg)
return []byte(msg)
}
// Update FilePickerDir
absDir, err := filepath.Abs(targetDir)
if err != nil {
msg := "cd: failed to resolve path: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
cfg.FilePickerDir = absDir
msg := "FilePickerDir changed to: " + absDir
return []byte(msg)
}
// Helper functions for command execution
// Todo structure
type TodoItem struct {
@@ -1010,6 +1145,7 @@ var gitReadSubcommands = map[string]bool{
func isCommandAllowed(command string, args ...string) bool {
allowedCommands := map[string]bool{
"cd": true,
"grep": true,
"sed": true,
"awk": true,
@@ -1063,6 +1199,142 @@ func summarizeChat(args map[string]string) []byte {
return []byte(chatText)
}
func windowIDToHex(decimalID string) string {
id, err := strconv.ParseInt(decimalID, 10, 64)
if err != nil {
return decimalID
}
return fmt.Sprintf("0x%x", id)
}
func listWindows(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
cmd := exec.Command(xdotoolPath, "search", "--name", ".")
output, err := cmd.Output()
if err != nil {
msg := "failed to list windows: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
windowIDs := strings.Fields(string(output))
windows := make(map[string]string)
for _, id := range windowIDs {
id = strings.TrimSpace(id)
if id == "" {
continue
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", id)
nameOutput, err := nameCmd.Output()
if err != nil {
continue
}
name := strings.TrimSpace(string(nameOutput))
windows[id] = name
}
data, err := json.Marshal(windows)
if err != nil {
msg := "failed to marshal window list: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return data
}
func captureWindow(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
window, ok := args["window"]
if !ok || window == "" {
return []byte("window parameter required (window ID or name)")
}
var windowID string
if _, err := strconv.Atoi(window); err == nil {
windowID = window
} else {
cmd := exec.Command(xdotoolPath, "search", "--name", window)
output, err := cmd.Output()
if err != nil || len(strings.Fields(string(output))) == 0 {
return []byte("window not found: " + window)
}
windowID = strings.Fields(string(output))[0]
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
nameOutput, _ := nameCmd.Output()
windowName := strings.TrimSpace(string(nameOutput))
windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
if windowName == "" {
windowName = "window"
}
timestamp := time.Now().Unix()
filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
cmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
if err := cmd.Run(); err != nil {
msg := "failed to capture window: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return []byte("screenshot saved: " + filename)
}
func captureWindowAndView(args map[string]string) []byte {
if !windowToolsAvailable {
return []byte("window tools not available: xdotool or maim not found")
}
window, ok := args["window"]
if !ok || window == "" {
return []byte("window parameter required (window ID or name)")
}
var windowID string
if _, err := strconv.Atoi(window); err == nil {
windowID = window
} else {
cmd := exec.Command(xdotoolPath, "search", "--name", window)
output, err := cmd.Output()
if err != nil || len(strings.Fields(string(output))) == 0 {
return []byte("window not found: " + window)
}
windowID = strings.Fields(string(output))[0]
}
nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
nameOutput, _ := nameCmd.Output()
windowName := strings.TrimSpace(string(nameOutput))
windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
if windowName == "" {
windowName = "window"
}
timestamp := time.Now().Unix()
filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
captureCmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
if err := captureCmd.Run(); err != nil {
msg := "failed to capture window: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
dataURL, err := models.CreateImageURLFromPath(filename)
if err != nil {
msg := "failed to create image URL: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
result := models.MultimodalToolResp{
Type: "multimodal_content",
Parts: []map[string]string{
{"type": "text", "text": "Screenshot saved: " + filename},
{"type": "image_url", "url": dataURL},
},
}
jsonResult, err := json.Marshal(result)
if err != nil {
msg := "failed to marshal result: " + err.Error()
logger.Error(msg)
return []byte(msg)
}
return jsonResult
}
type fnSig func(map[string]string) []byte
var fnMap = map[string]fnSig{
@@ -1076,6 +1348,7 @@ var fnMap = map[string]fnSig{
"read_url_raw": readURLRaw,
"file_create": fileCreate,
"file_read": fileRead,
"file_read_image": fileReadImage,
"file_write": fileWrite,
"file_write_append": fileWriteAppend,
"file_edit": fileEdit,
@@ -1091,6 +1364,66 @@ var fnMap = map[string]fnSig{
"summarize_chat": summarizeChat,
}
func registerWindowTools() {
if windowToolsAvailable {
fnMap["list_windows"] = listWindows
fnMap["capture_window"] = captureWindow
windowTools := []models.Tool{
{
Type: "function",
Function: models.ToolFunc{
Name: "list_windows",
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window",
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
}
if modelHasVision {
fnMap["capture_window_and_view"] = captureWindowAndView
windowTools = append(windowTools, models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window_and_view",
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
})
}
baseTools = append(baseTools, windowTools...)
toolSysMsg += windowToolSysMsg
}
}
// callToolWithAgent calls the tool and applies any registered agent.
func callToolWithAgent(name string, args map[string]string) []byte {
registerWebAgents()
@@ -1302,6 +1635,24 @@ var baseTools = []models.Tool{
},
},
},
// file_read_image
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "file_read_image",
Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"path"},
Properties: map[string]models.ToolArgProps{
"path": models.ToolArgProps{
Type: "string",
Description: "path of the image file to read",
},
},
},
},
},
// file_write
models.Tool{
Type: "function",
@@ -1461,18 +1812,14 @@ var baseTools = []models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "execute_command",
Description: "Execute a shell command safely. Use when you need to run system commands like grep sed awk find cat head tail sort uniq wc ls echo cut tr cp mv rm mkdir rmdir pwd df free ps top du whoami date uname go. Git is allowed for read-only operations: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe.",
Description: "Execute a shell command safely. Use when you need to run system commands like cd grep sed awk find cat head tail sort uniq wc ls echo cut tr cp mv rm mkdir rmdir pwd df free ps top du whoami date uname go git. Git is allowed for read-only operations: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe. Use 'cd /path' to change working directory.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"command"},
Properties: map[string]models.ToolArgProps{
"command": models.ToolArgProps{
Type: "string",
Description: "command to execute (only commands from whitelist are allowed: grep sed awk find cat head tail sort uniq wc ls echo cut tr cp mv rm mkdir rmdir pwd df free ps top du whoami date uname go; git allowed for reads: status log diff show branch reflog rev-parse shortlog describe)",
},
"args": models.ToolArgProps{
Type: "string",
Description: "command arguments as a single string (e.g., '-la {path}')",
Description: "command to execute with arguments (e.g., 'go run main.go', 'ls -la /tmp', 'cd /home/user'). Use a single string; arguments should be space-separated after the command.",
},
},
},
@@ -1522,8 +1869,21 @@ var baseTools = []models.Tool{
Description: "Update a todo item by ID with new task or status. Status must be one of: pending, in_progress, completed.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
Required: []string{"id"},
Properties: map[string]models.ToolArgProps{
"id": models.ToolArgProps{
Type: "string",
Description: "id of the todo item to update",
},
"task": models.ToolArgProps{
Type: "string",
Description: "new task description (optional)",
},
"status": models.ToolArgProps{
Type: "string",
Description: "new status: pending, in_progress, or completed (optional)",
},
},
},
},
},
@@ -1546,3 +1906,56 @@ var baseTools = []models.Tool{
},
},
}
func init() {
if windowToolsAvailable {
baseTools = append(baseTools,
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "list_windows",
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{},
},
},
},
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window",
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window_and_view",
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"window"},
Properties: map[string]models.ToolArgProps{
"window": models.ToolArgProps{
Type: "string",
Description: "window ID or window name (partial match)",
},
},
},
},
},
)
}
}

40
tui.go
View File

@@ -35,6 +35,8 @@ var (
renameWindow *tview.InputField
roleEditWindow *tview.InputField
shellInput *tview.InputField
confirmModal *tview.Modal
confirmPageName = "confirm"
fullscreenMode bool
positionVisible bool = true
scrollToEndEnabled bool = true
@@ -195,6 +197,39 @@ func init() {
}
return event
})
confirmModal = tview.NewModal().
SetText("You are trying to send an empty message.\nIt makes sense if the last message in the chat is from you.\nAre you sure?").
AddButtons([]string{"Yes", "No"}).
SetButtonBackgroundColor(tcell.ColorBlack).
SetButtonTextColor(tcell.ColorWhite).
SetDoneFunc(func(buttonIndex int, buttonLabel string) {
if buttonLabel == "Yes" {
persona := cfg.UserRole
if cfg.WriteNextMsgAs != "" {
persona = cfg.WriteNextMsgAs
}
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: ""}
} // In both Yes and No, go back to the main page
pages.SwitchToPage("main") // or whatever your main page is named
})
confirmModal.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
if event.Key() == tcell.KeyRune {
switch event.Rune() {
case 'y', 'Y':
persona := cfg.UserRole
if cfg.WriteNextMsgAs != "" {
persona = cfg.WriteNextMsgAs
}
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: ""}
pages.SwitchToPage("main")
return nil
case 'n', 'N', 'x', 'X':
pages.SwitchToPage("main")
return nil
}
}
return event
})
textArea = tview.NewTextArea().
SetPlaceholder("input is multiline; press <Enter> to start the next line;\npress <Esc> to send the message.")
textArea.SetBorder(true).SetTitle("input")
@@ -997,7 +1032,6 @@ func init() {
return nil
}
msgText := textArea.GetText()
if msgText != "" {
nl := "\n\n" // keep empty lines between messages
prevText := textView.GetText(true)
persona := cfg.UserRole
@@ -1029,10 +1063,12 @@ func init() {
textView.ScrollToEnd()
}
colorText()
} else {
pages.AddPage(confirmPageName, confirmModal, true, true)
return nil
}
// go chatRound(msgText, persona, textView, false, false)
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
}
return nil
}
if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn {