Fix: linter complaints

Enha: check if model has vision before giving it vision tools
Feat: screencapture for completion
2026-03-02 11:39:55 +03:00 · 2026-03-02 11:25:20 +03:00 · 2026-03-02 11:12:04 +03:00 · 2026-03-02 10:33:41 +03:00 · 2026-03-02 09:23:22 +03:00 · 2026-03-02 07:46:08 +03:00
10 changed files with 728 additions and 161 deletions
--- a/bot.go
+++ b/bot.go
@@ -379,22 +379,22 @@ func fetchLCPModels() ([]string, error) {

 // fetchLCPModelsWithLoadStatus returns models with "(loaded)" indicator for loaded models
 func fetchLCPModelsWithLoadStatus() ([]string, error) {
-	models, err := fetchLCPModelsWithStatus()
+	modelList, err := fetchLCPModelsWithStatus()
 	if err != nil {
 		return nil, err
 	}
-	result := make([]string, 0, len(models.Data))
+	result := make([]string, 0, len(modelList.Data))
 	li := 0 // loaded index
-	for i, m := range models.Data {
+	for i, m := range modelList.Data {
 		modelName := m.ID
 		if m.Status.Value == "loaded" {
-			modelName = "(loaded) " + modelName
+			modelName = models.LoadedMark + modelName
 			li = i
 		}
 		result = append(result, modelName)
 	}
 	if li == 0 {
-		return result, nil // no loaded models
+		return result, nil // no loaded modelList
 	}
 	loadedModel := result[li]
 	result = append(result[:li], result[li+1:]...)
@@ -433,6 +433,33 @@ func isModelLoaded(modelID string) (bool, error) {
 	return false, nil
 }

+func ModelHasVision(api, modelID string) bool {
+	switch {
+	case strings.Contains(api, "deepseek"):
+		return false
+	case strings.Contains(api, "openrouter"):
+		resp, err := http.Get("https://openrouter.ai/api/v1/models")
+		if err != nil {
+			logger.Warn("failed to fetch OR models for vision check", "error", err)
+			return false
+		}
+		defer resp.Body.Close()
+		orm := &models.ORModels{}
+		if err := json.NewDecoder(resp.Body).Decode(orm); err != nil {
+			logger.Warn("failed to decode OR models for vision check", "error", err)
+			return false
+		}
+		return orm.HasVision(modelID)
+	default:
+		models, err := fetchLCPModelsWithStatus()
+		if err != nil {
+			logger.Warn("failed to fetch LCP models for vision check", "error", err)
+			return false
+		}
+		return models.HasVision(modelID)
+	}
+}
+
 // monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded.
 func monitorModelLoad(modelID string) {
 	go func() {
@@ -1174,17 +1201,59 @@ func findCall(msg, toolCall string) bool {
 	toolRunningMode = false
 	toolMsg := string(resp)
 	logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
-	fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
-		"\n\n", len(chatBody.Messages), cfg.ToolRole, toolMsg)
 	// Create tool response message with the proper tool_call_id
 	// Mark shell commands as always visible
 	isShellCommand := fc.Name == "execute_command"
-	toolResponseMsg := models.RoleMsg{
+	// Check if response is multimodal content (image)
+	var toolResponseMsg models.RoleMsg
+	if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) {
+		// Parse multimodal content response
+		multimodalResp := models.MultimodalToolResp{}
+		if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" {
+			// Create RoleMsg with ContentParts
+			var contentParts []any
+			for _, part := range multimodalResp.Parts {
+				partType := part["type"]
+				switch partType {
+				case "text":
+					contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]})
+				case "image_url":
+					contentParts = append(contentParts, models.ImageContentPart{
+						Type: "image_url",
+						ImageURL: struct {
+							URL string `json:"url"`
+						}{URL: part["url"]},
+					})
+				default:
+					continue
+				}
+			}
+			toolResponseMsg = models.RoleMsg{
+				Role:            cfg.ToolRole,
+				ContentParts:    contentParts,
+				HasContentParts: true,
+				ToolCallID:      lastToolCall.ID,
+				IsShellCommand:  isShellCommand,
+			}
+		} else {
+			// Fallback to regular content
+			toolResponseMsg = models.RoleMsg{
 				Role:           cfg.ToolRole,
 				Content:        toolMsg,
 				ToolCallID:     lastToolCall.ID,
 				IsShellCommand: isShellCommand,
 			}
+		}
+	} else {
+		toolResponseMsg = models.RoleMsg{
+			Role:           cfg.ToolRole,
+			Content:        toolMsg,
+			ToolCallID:     lastToolCall.ID,
+			IsShellCommand: isShellCommand,
+		}
+	}
+	fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
+		"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
 	chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
 	logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
 	// Clear the stored tool call ID after using it
@@ -1207,11 +1276,11 @@ func chatToTextSlice(messages []models.RoleMsg, showSys bool) []string {
 			// This is a tool call indicator - show collapsed
 			if toolCollapsed {
 				toolName := messages[i].ToolCall.Name
-				resp[i] = fmt.Sprintf("%s\n[yellow::i][tool call: %s (press Ctrl+T to expand)][-:-:-]\n", icon, toolName)
+				resp[i] = strings.ReplaceAll(fmt.Sprintf("%s\n%s\n[yellow::i][tool call: %s (press Ctrl+T to expand)][-:-:-]\n", icon, messages[i].GetText(), toolName), "\n\n", "\n")
 			} else {
 				// Show full tool call info
 				toolName := messages[i].ToolCall.Name
-				resp[i] = fmt.Sprintf("%s\n%s\n[yellow::i][tool call: %s][-:-:-]\nargs: %s\nid: %s\n", icon, messages[i].GetText(), toolName, messages[i].ToolCall.Args, messages[i].ToolCall.ID)
+				resp[i] = strings.ReplaceAll(fmt.Sprintf("%s\n%s\n[yellow::i][tool call: %s][-:-:-]\nargs: %s\nid: %s\n", icon, messages[i].GetText(), toolName, messages[i].ToolCall.Args, messages[i].ToolCall.ID), "\n\n", "\n")
 			}
 			continue
 		}
@@ -1323,11 +1392,28 @@ func updateModelLists() {
 	}
 	// if llama.cpp started after gf-lt?
 	localModelsMu.Lock()
-	LocalModels, err = fetchLCPModels()
+	LocalModels, err = fetchLCPModelsWithLoadStatus()
 	localModelsMu.Unlock()
 	if err != nil {
 		logger.Warn("failed to fetch llama.cpp models", "error", err)
 	}
+	// set already loaded model in llama.cpp
+	if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") {
+		localModelsMu.Lock()
+		defer localModelsMu.Unlock()
+		for i := range LocalModels {
+			if strings.Contains(LocalModels[i], models.LoadedMark) {
+				m := strings.TrimPrefix(LocalModels[i], models.LoadedMark)
+				cfg.CurrentModel = m
+				chatBody.Model = m
+				cachedModelColor = "green"
+				updateStatusLine()
+				UpdateToolCapabilities()
+				app.Draw()
+				return
+			}
+		}
+	}
 }

 func refreshLocalModelsIfEmpty() {
--- a/helpfuncs.go
+++ b/helpfuncs.go
@@ -27,7 +27,6 @@ func startModelColorUpdater() {
 	go func() {
 		ticker := time.NewTicker(5 * time.Second)
 		defer ticker.Stop()
-
 		// Initial check
 		updateCachedModelColor()
 		for range ticker.C {
@@ -42,7 +41,6 @@ func updateCachedModelColor() {
 		cachedModelColor = "orange"
 		return
 	}
-
 	// Check if model is loaded
 	loaded, err := isModelLoaded(chatBody.Model)
 	if err != nil {
--- a/llm.go
+++ b/llm.go
@@ -3,7 +3,6 @@ package main
 import (
 	"bytes"
 	"encoding/json"
-	"errors"
 	"gf-lt/models"
 	"io"
 	"strings"
@@ -119,25 +118,22 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
 	logger.Debug("formmsg lcpcompletion", "link", cfg.CurrentAPI)
 	localImageAttachmentPath := imageAttachmentPath
 	var multimodalData []string
+	if msg != "" { // otherwise let the bot to continue
+		var newMsg models.RoleMsg
 		if localImageAttachmentPath != "" {
+			newMsg = models.NewMultimodalMsg(role, []any{})
+			newMsg.AddTextPart(msg)
 			imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
 			if err != nil {
 				logger.Error("failed to create image URL from path for completion",
 					"error", err, "path", localImageAttachmentPath)
 				return nil, err
 			}
-		// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
-		parts := strings.SplitN(imageURL, ",", 2)
-		if len(parts) == 2 {
-			multimodalData = append(multimodalData, parts[1])
-		} else {
-			logger.Error("invalid image data URL format", "url", imageURL)
-			return nil, errors.New("invalid image data URL format")
-		}
+			newMsg.AddImagePart(imageURL, localImageAttachmentPath)
 			imageAttachmentPath = "" // Clear the attachment after use
+		} else { // not a multimodal msg or image passed in tool call
+			newMsg = models.RoleMsg{Role: role, Content: msg}
 		}
-	if msg != "" { // otherwise let the bot to continue
-		newMsg := models.RoleMsg{Role: role, Content: msg}
 		newMsg = *processMessageTag(&newMsg)
 		chatBody.Messages = append(chatBody.Messages, newMsg)
 	}
@@ -146,22 +142,40 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
 		chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
 	}
 	filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages)
+	// Build prompt and extract images inline as we process each message
 	messages := make([]string, len(filteredMessages))
 	for i := range filteredMessages {
-		messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt()
+		m := stripThinkingFromMsg(&filteredMessages[i])
+		messages[i] = m.ToPrompt()
+		// Extract images from this message and add marker inline
+		if len(m.ContentParts) > 0 {
+			for _, part := range m.ContentParts {
+				var imgURL string
+				// Check for struct type
+				if imgPart, ok := part.(models.ImageContentPart); ok {
+					imgURL = imgPart.ImageURL.URL
+				} else if partMap, ok := part.(map[string]any); ok {
+					// Check for map type (from JSON unmarshaling)
+					if partType, exists := partMap["type"]; exists && partType == "image_url" {
+						if imgURLMap, ok := partMap["image_url"].(map[string]any); ok {
+							if url, ok := imgURLMap["url"].(string); ok {
+								imgURL = url
+							}
+						}
+					}
+				}
+				if imgURL != "" {
+					// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
+					parts := strings.SplitN(imgURL, ",", 2)
+					if len(parts) == 2 {
+						multimodalData = append(multimodalData, parts[1])
+						messages[i] += " <__media__>"
+					}
+				}
+			}
+		}
 	}
 	prompt := strings.Join(messages, "\n")
-	// Add multimodal media markers to the prompt text when multimodal data is present
-	// This is required by llama.cpp multimodal models so they know where to insert media
-	if len(multimodalData) > 0 {
-		// Add a media marker for each item in the multimodal data
-		var sb strings.Builder
-		sb.WriteString(prompt)
-		for range multimodalData {
-			sb.WriteString(" <__media__>") // llama.cpp default multimodal marker
-		}
-		prompt = sb.String()
-	}
 	// needs to be after <__media__> if there are images
 	if !resume {
 		botMsgStart := "\n" + botPersona + ":\n"
--- a/models/consts.go
+++ b/models/consts.go
@@ -0,0 +1,13 @@
+package models
+
+const (
+	LoadedMark        = "(loaded) "
+	ToolRespMultyType = "multimodel_content"
+)
+
+type APIType int
+
+const (
+	APITypeChat APIType = iota
+	APITypeCompletion
+)
--- a/models/models.go
+++ b/models/models.go
@@ -391,7 +391,6 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
 	if err != nil {
 		return "", err
 	}
-
 	// Determine the image format based on file extension
 	var mimeType string
 	switch {
@@ -408,10 +407,8 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
 	default:
 		mimeType = "image/jpeg" // default
 	}
-
 	// Encode to base64
 	encoded := base64.StdEncoding.EncodeToString(data)
-
 	// Create data URL
 	return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
 }
@@ -519,24 +516,6 @@ type OpenAIReq struct {

 // ===

-// type LLMModels struct {
-// 	Object string `json:"object"`
-// 	Data   []struct {
-// 		ID      string `json:"id"`
-// 		Object  string `json:"object"`
-// 		Created int    `json:"created"`
-// 		OwnedBy string `json:"owned_by"`
-// 		Meta    struct {
-// 			VocabType int   `json:"vocab_type"`
-// 			NVocab    int   `json:"n_vocab"`
-// 			NCtxTrain int   `json:"n_ctx_train"`
-// 			NEmbd     int   `json:"n_embd"`
-// 			NParams   int64 `json:"n_params"`
-// 			Size      int64 `json:"size"`
-// 		} `json:"meta"`
-// 	} `json:"data"`
-// }
-
 type LlamaCPPReq struct {
 	Model  string `json:"model"`
 	Stream bool   `json:"stream"`
@@ -629,6 +608,20 @@ func (lcp *LCPModels) ListModels() []string {
 	return resp
 }

+func (lcp *LCPModels) HasVision(modelID string) bool {
+	for _, m := range lcp.Data {
+		if m.ID == modelID {
+			args := m.Status.Args
+			for i := 0; i < len(args)-1; i++ {
+				if args[i] == "--mmproj" {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
 type ResponseStats struct {
 	Tokens       int
 	Duration     float64
@@ -642,9 +635,7 @@ type ChatRoundReq struct {
 	Resume  bool
 }

-type APIType int
-
-const (
-	APITypeChat APIType = iota
-	APITypeCompletion
-)
+type MultimodalToolResp struct {
+	Type  string              `json:"type"`
+	Parts []map[string]string `json:"parts"`
+}
--- a/models/openrouter.go
+++ b/models/openrouter.go
@@ -172,3 +172,16 @@ func (orm *ORModels) ListModels(free bool) []string {
 	}
 	return resp
 }
+
+func (orm *ORModels) HasVision(modelID string) bool {
+	for i := range orm.Data {
+		if orm.Data[i].ID == modelID {
+			for _, mod := range orm.Data[i].Architecture.InputModalities {
+				if mod == "image" {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
--- a/popups.go
+++ b/popups.go
@@ -1,6 +1,7 @@
 package main

 import (
+	"gf-lt/models"
 	"slices"
 	"strings"

@@ -51,7 +52,7 @@ func showModelSelectionPopup() {
 	// Find the current model index to set as selected
 	currentModelIndex := -1
 	for i, model := range modelList {
-		if strings.TrimPrefix(model, "(loaded) ") == chatBody.Model {
+		if strings.TrimPrefix(model, models.LoadedMark) == chatBody.Model {
 			currentModelIndex = i
 		}
 		modelListWidget.AddItem(model, "", 0, nil)
@@ -61,7 +62,7 @@ func showModelSelectionPopup() {
 		modelListWidget.SetCurrentItem(currentModelIndex)
 	}
 	modelListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
-		modelName := strings.TrimPrefix(mainText, "(loaded) ")
+		modelName := strings.TrimPrefix(mainText, models.LoadedMark)
 		chatBody.Model = modelName
 		cfg.CurrentModel = chatBody.Model
 		pages.RemovePage("modelSelectionPopup")
@@ -142,6 +143,7 @@ func showAPILinkSelectionPopup() {
 	apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
 		// Update the API in config
 		cfg.CurrentAPI = mainText
+		UpdateToolCapabilities()
 		// Update model list based on new API
 		// Helper function to get model list for a given API (same as in props_table.go)
 		getModelListForAPI := func(api string) []string {
@@ -159,8 +161,9 @@ func showAPILinkSelectionPopup() {
 		newModelList := getModelListForAPI(cfg.CurrentAPI)
 		// Ensure chatBody.Model is in the new list; if not, set to first available model
 		if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) {
-			chatBody.Model = newModelList[0]
+			chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark)
 			cfg.CurrentModel = chatBody.Model
+			UpdateToolCapabilities()
 		}
 		pages.RemovePage("apiLinkSelectionPopup")
 		app.SetFocus(textArea)
--- a/sysprompts/coding_assistant.json
+++ b/sysprompts/coding_assistant.json
@@ -1,6 +1,6 @@
 {
-  "sys_prompt": "You are an expert software engineering assistant. Your goal is to help users with coding tasks, debugging, refactoring, and software development.\n\n## Core Principles\n1. **Security First**: Never expose secrets, keys, or credentials. Never commit sensitive data.\n2. **No Git Actions**: You can READ git info (status, log, diff) for context, but NEVER perform git actions (commit, add, push, checkout, reset, rm, etc.). Let the user handle all git operations.\n3. **Explore Before Execute**: Always understand the codebase structure before making changes.\n4. **Follow Conventions**: Match existing code style, patterns, and frameworks used in the project.\n5. **Be Concise**: Minimize output tokens while maintaining quality. Avoid unnecessary explanations.\n\n## Workflow for Complex Tasks\nFor multi-step tasks, ALWAYS use the todo system to track progress:\n\n1. **Create Todo List**: At the start of complex tasks, use `todo_create` to break down work into actionable items.\n2. **Update Progress**: Mark items as `in_progress` when working on them, and `completed` when done.\n3. **Check Status**: Use `todo_read` to review your progress.\n\nExample workflow:\n- User: \"Add user authentication to this app\"\n- You: Create todos: [\"Analyze existing auth structure\", \"Check frameworks in use\", \"Implement auth middleware\", \"Add login endpoints\", \"Test implementation\"]\n\n## Task Execution Flow\n\n### Phase 1: Exploration (Always First)\n- Use `file_list` to understand directory structure (path defaults to FilePickerDir if not specified)\n- Use `file_read` to examine relevant files (paths are relative to FilePickerDir unless starting with `/`)\n- Use `execute_command` with `grep`/`find` to search for patterns\n- Check `README` or documentation files\n- Identify: frameworks, conventions, testing approach\n- **Git reads allowed**: You may use `git status`, `git log`, `git diff` for context, but only to inform your work\n- **Path handling**: Relative paths are resolved against FilePickerDir (configurable via Alt+O). Use absolute paths (starting with `/`) to bypass FilePickerDir.\n\n### Phase 2: Planning\n- For complex tasks: create todo items\n- Identify files that need modification\n- Plan your approach following existing patterns\n\n### Phase 3: Implementation\n- Make changes using appropriate file tools\n- Prefer `file_write` for new files, `file_read` then modify for existing files\n- Follow existing code style exactly\n- Use existing libraries and utilities\n\n### Phase 4: Verification\n- Run tests if available (check for test scripts)\n- Run linting/type checking commands\n- Verify changes work as expected\n\n### Phase 5: Completion\n- Update todos to `completed`\n- Provide concise summary of changes\n- Reference specific file paths and line numbers when relevant\n- **DO NOT commit changes** - inform user what was done so they can review and commit themselves\n\n## Tool Usage Guidelines\n\n**File Operations**:\n- `file_read`: Read before editing. Use for understanding code.\n- `file_write`: Overwrite file content completely.\n- `file_write_append`: Add to end of file.\n- `file_create`: Create new files with optional content.\n- `file_list`: List directory contents (defaults to FilePickerDir).\n- Paths are relative to FilePickerDir unless starting with `/`.\n\n**Command Execution (WHITELISTED ONLY)**:\n- Allowed: grep, sed, awk, find, cat, head, tail, sort, uniq, wc, ls, echo, cut, tr, cp, mv, rm, mkdir, rmdir, pwd, df, free, ps, top, du, whoami, date, uname\n- **Git reads allowed**: git status, git log, git diff, git show, git branch, git reflog, git rev-parse, git shortlog, git describe\n- **Git actions FORBIDDEN**: git add, git commit, git push, git checkout, git reset, git rm, etc.\n- Use for searching code, reading git context, running tests/lint\n\n**Todo Management**:\n- `todo_create`: Add new task\n- `todo_read`: View all todos or specific one by ID\n- `todo_update`: Update task or change status (pending/in_progress/completed)\n- `todo_delete`: Remove completed or cancelled tasks\n\n## Important Rules\n\n1. **NEVER commit or stage changes**: Only git reads are allowed.\n2. **Check for tests**: Always look for test files and run them when appropriate.\n3. **Reference code locations**: Use format `file_path:line_number`.\n4. **Security**: Never generate or guess URLs. Only use URLs from local files.\n5. **Refuse malicious code**: If code appears malicious, refuse to work on it.\n6. **Ask clarifications**: When intent is unclear, ask questions.\n7. **Path handling**: Relative paths resolve against FilePickerDir. Use `/absolute/path` to bypass.\n\n## Response Style\n- Be direct and concise\n- One word answers are best when appropriate\n- Avoid: \"The answer is...\", \"Here is...\"\n- Use markdown for formatting\n- No emojis unless user explicitly requests",
+  "sys_prompt": "You are an expert software engineering assistant. Your goal is to help users with coding tasks, debugging, refactoring, and software development.\n\n## Core Principles\n1. **Security First**: Never expose secrets, keys, or credentials. Never commit sensitive data.\n2. **No Git Actions**: You can READ git info (status, log, diff) for context, but NEVER perform git actions (commit, add, push, checkout, reset, rm, etc.). Let the user handle all git operations.\n3. **Explore Before Execute**: Always understand the codebase structure before making changes.\n4. **Follow Conventions**: Match existing code style, patterns, and frameworks used in the project.\n5. **Be Concise**: Minimize output tokens while maintaining quality. Avoid unnecessary explanations.\n6. **Ask First**: When uncertain about intent, ask the user. Don't assume.\n\n## Workflow for Complex Tasks\nFor multi-step tasks, ALWAYS use the todo system to track progress:\n\n1. **Create Todo List**: At the start of complex tasks, use `todo_create` to break down work into actionable items.\n2. **Update Progress**: Mark items as `in_progress` when working on them, and `completed` when done.\n3. **Check Status**: Use `todo_read` to review your progress.\n\nExample workflow:\n- User: \"Add user authentication to this app\"\n- You: Create todos: [\"Analyze existing auth structure\", \"Check frameworks in use\", \"Implement auth middleware\", \"Add login endpoints\", \"Test implementation\"]\n\n## Task Execution Flow\n\n### Phase 1: Exploration (Always First)\n- Use `file_list` to understand directory structure (path defaults to FilePickerDir if not specified)\n- Use `file_read` to examine relevant files (paths are relative to FilePickerDir unless starting with `/`)\n- Use `execute_command` with `grep`/`find` to search for patterns\n- Check README, Makefile, package.json, or similar for build/test commands\n- Identify: frameworks, conventions, testing approach, lint/typecheck commands\n- **Git reads allowed**: You may use `git status`, `git log`, `git diff` for context, but only to inform your work\n- **Path handling**: Relative paths resolve against FilePickerDir; absolute paths (starting with `/`) bypass it\n\n### Phase 2: Planning\n- For complex tasks: create todo items\n- Identify files that need modification\n- Plan your approach following existing patterns\n\n### Phase 3: Implementation\n- Make changes using appropriate file tools\n- Prefer `file_write` for new files, `file_read` then edit for existing files\n- Follow existing code style exactly\n- Use existing libraries and utilities\n\n### Phase 4: Verification\n- Run tests if available (check for test scripts in README/Makefile)\n- Run linting/type checking commands\n- Verify changes work as expected\n\n### Phase 5: Completion\n- Update todos to `completed`\n- Provide concise summary of changes\n- Reference specific file paths and line numbers when relevant\n- **DO NOT commit changes** - inform user what was done so they can review and commit themselves\n\n## Command Execution\n- Use `execute_command` with a single string containing command and arguments (e.g., `go run main.go`, `ls -la`, `cd /tmp`)\n- Use `cd /path` to change the working directory for file operations",
  "role": "CodingAssistant",
  "filepath": "sysprompts/coding_assistant.json",
-  "first_msg": "Hello! I'm your coding assistant. I can help you with software engineering tasks like writing code, debugging, refactoring, and exploring codebases. I work best when you give me specific tasks, and for complex work, I'll create a todo list to track my progress. What would you like to work on?"
+  "first_msg": "Hello! I'm your coding assistant. Give me a specific task and I'll get started. For complex work, I'll track progress with todos."
 }
--- a/tools.go
+++ b/tools.go
@@ -85,6 +85,11 @@ Your current tools:
 "when_to_use": "when asked to read the content of a file"
 },
 {
+"name":"file_read_image",
+"args": ["path"],
+"when_to_use": "when asked to read or view an image file"
+},
+{
 "name":"file_write",
 "args": ["path", "content"],
 "when_to_use": "when needed to overwrite content to a file"
@@ -170,8 +175,36 @@ After that you are free to respond to the user.
 	webAgentsOnce      sync.Once
 )

+var windowToolSysMsg = `
+Additional window tools (available only if xdotool and maim are installed):
+[
+{
+"name":"list_windows",
+"args": [],
+"when_to_use": "when asked to list visible windows; returns map of window ID to window name"
+},
+{
+"name":"capture_window",
+"args": ["window"],
+"when_to_use": "when asked to take a screenshot of a specific window; saves to /tmp; window can be ID or name substring; returns file path"
+},
+{
+"name":"capture_window_and_view",
+"args": ["window"],
+"when_to_use": "when asked to take a screenshot of a specific window and show it; saves to /tmp and returns image for viewing; window can be ID or name substring"
+}
+]
+`
+
 var WebSearcher searcher.WebSurfer

+var (
+	windowToolsAvailable bool
+	xdotoolPath          string
+	maimPath             string
+	modelHasVision       bool
+)
+
 func init() {
 	sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
 	if err != nil {
@@ -181,6 +214,47 @@ func init() {
 	if err := rag.Init(cfg, logger, store); err != nil {
 		logger.Warn("failed to init rag; rag_search tool will not be available", "error", err)
 	}
+	checkWindowTools()
+	registerWindowTools()
+}
+
+func checkWindowTools() {
+	xdotoolPath, _ = exec.LookPath("xdotool")
+	maimPath, _ = exec.LookPath("maim")
+	windowToolsAvailable = xdotoolPath != "" && maimPath != ""
+	if windowToolsAvailable {
+		logger.Info("window tools available: xdotool and maim found")
+	} else {
+		if xdotoolPath == "" {
+			logger.Warn("xdotool not found, window listing tools will not be available")
+		}
+		if maimPath == "" {
+			logger.Warn("maim not found, window capture tools will not be available")
+		}
+	}
+}
+
+func UpdateToolCapabilities() {
+	if !cfg.ToolUse {
+		return
+	}
+	modelHasVision = false
+	if cfg == nil || cfg.CurrentAPI == "" {
+		logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil")
+		registerWindowTools()
+		return
+	}
+	prevHasVision := modelHasVision
+	modelHasVision = ModelHasVision(cfg.CurrentAPI, cfg.CurrentModel)
+	if modelHasVision {
+		logger.Info("model has vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
+	} else {
+		logger.Info("model does not have vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
+		if windowToolsAvailable && !prevHasVision && !modelHasVision {
+			_ = notifyUser("window tools", "Window capture-and-view unavailable: model lacks vision support")
+		}
+	}
+	registerWindowTools()
 }

 // getWebAgentClient returns a singleton AgentClient for web agents.
@@ -469,6 +543,43 @@ func fileRead(args map[string]string) []byte {
 	return jsonResult
 }

+func fileReadImage(args map[string]string) []byte {
+	path, ok := args["path"]
+	if !ok || path == "" {
+		msg := "path not provided to file_read_image tool"
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	path = resolvePath(path)
+	dataURL, err := models.CreateImageURLFromPath(path)
+	if err != nil {
+		msg := "failed to read image; error: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	// result := map[string]any{
+	// 	"type": "multimodal_content",
+	// 	"parts": []map[string]string{
+	// 		{"type": "text", "text": "Image at " + path},
+	// 		{"type": "image_url", "url": dataURL},
+	// 	},
+	// }
+	result := models.MultimodalToolResp{
+		Type: "multimodal_content",
+		Parts: []map[string]string{
+			{"type": "text", "text": "Image at " + path},
+			{"type": "image_url", "url": dataURL},
+		},
+	}
+	jsonResult, err := json.Marshal(result)
+	if err != nil {
+		msg := "failed to marshal result; error: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	return jsonResult
+}
+
 func fileWrite(args map[string]string) []byte {
 	path, ok := args["path"]
 	if !ok || path == "" {
@@ -519,21 +630,17 @@ func fileEdit(args map[string]string) []byte {
 		return []byte(msg)
 	}
 	path = resolvePath(path)
-
 	oldString, ok := args["oldString"]
 	if !ok || oldString == "" {
 		msg := "oldString not provided to file_edit tool"
 		logger.Error(msg)
 		return []byte(msg)
 	}
-
 	newString, ok := args["newString"]
 	if !ok {
 		newString = ""
 	}
-
 	lineNumberStr, hasLineNumber := args["lineNumber"]
-
 	// Read file content
 	content, err := os.ReadFile(path)
 	if err != nil {
@@ -541,10 +648,8 @@ func fileEdit(args map[string]string) []byte {
 		logger.Error(msg)
 		return []byte(msg)
 	}
-
 	fileContent := string(content)
 	var replacementCount int
-
 	if hasLineNumber && lineNumberStr != "" {
 		// Line-number based edit
 		lineNum, err := strconv.Atoi(lineNumberStr)
@@ -579,13 +684,11 @@ func fileEdit(args map[string]string) []byte {
 		fileContent = strings.ReplaceAll(fileContent, oldString, newString)
 		replacementCount = strings.Count(fileContent, newString)
 	}
-
 	if err := os.WriteFile(path, []byte(fileContent), 0644); err != nil {
 		msg := "failed to write file: " + err.Error()
 		logger.Error(msg)
 		return []byte(msg)
 	}
-
 	msg := fmt.Sprintf("file edited successfully at %s (%d replacement(s))", path, replacementCount)
 	return []byte(msg)
 }
@@ -765,45 +868,31 @@ func listDirectory(path string) ([]string, error) {

 // Command Execution Tool
 func executeCommand(args map[string]string) []byte {
-	command, ok := args["command"]
-	if !ok || command == "" {
+	commandStr := args["command"]
+	if commandStr == "" {
 		msg := "command not provided to execute_command tool"
 		logger.Error(msg)
 		return []byte(msg)
 	}
-	// Get arguments - handle both single arg and multiple args
-	var cmdArgs []string
-	if args["args"] != "" {
-		// If args is provided as a single string, split by spaces
-		cmdArgs = strings.Fields(args["args"])
-	} else {
-		// If individual args are provided, collect them
-		argNum := 1
-		for {
-			argKey := fmt.Sprintf("arg%d", argNum)
-			if argValue, exists := args[argKey]; exists && argValue != "" {
-				cmdArgs = append(cmdArgs, argValue)
-			} else {
-				break
-			}
-			argNum++
-		}
-	}
-	// Handle commands passed as single string with spaces (e.g., "go run main.go")
+	// Handle commands passed as single string with spaces (e.g., "go run main.go" or "cd /tmp")
 	// Split into base command and arguments
-	if strings.Contains(command, " ") {
-		parts := strings.Fields(command)
-		baseCmd := parts[0]
-		extraArgs := parts[1:]
-		// Prepend extra args to cmdArgs
-		cmdArgs = append(extraArgs, cmdArgs...)
-		command = baseCmd
+	parts := strings.Fields(commandStr)
+	if len(parts) == 0 {
+		msg := "command not provided to execute_command tool"
+		logger.Error(msg)
+		return []byte(msg)
 	}
+	command := parts[0]
+	cmdArgs := parts[1:]
 	if !isCommandAllowed(command, cmdArgs...) {
 		msg := fmt.Sprintf("command '%s' is not allowed", command)
 		logger.Error(msg)
 		return []byte(msg)
 	}
+	// Special handling for cd command - update FilePickerDir
+	if command == "cd" {
+		return handleCdCommand(cmdArgs)
+	}
 	// Execute with timeout for safety
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()
@@ -817,12 +906,58 @@ func executeCommand(args map[string]string) []byte {
 	}
 	// Check if output is empty and return success message
 	if len(output) == 0 {
-		successMsg := fmt.Sprintf("command '%s %s' executed successfully and exited with code 0", command, strings.Join(cmdArgs, " "))
+		successMsg := fmt.Sprintf("command '%s' executed successfully and exited with code 0", commandStr)
 		return []byte(successMsg)
 	}
 	return output
 }

+// handleCdCommand handles the cd command to update FilePickerDir
+func handleCdCommand(args []string) []byte {
+	var targetDir string
+	if len(args) == 0 {
+		// cd with no args goes to home directory
+		homeDir, err := os.UserHomeDir()
+		if err != nil {
+			msg := "cd: cannot determine home directory: " + err.Error()
+			logger.Error(msg)
+			return []byte(msg)
+		}
+		targetDir = homeDir
+	} else {
+		targetDir = args[0]
+	}
+
+	// Resolve relative paths against current FilePickerDir
+	if !filepath.IsAbs(targetDir) {
+		targetDir = filepath.Join(cfg.FilePickerDir, targetDir)
+	}
+
+	// Verify the directory exists
+	info, err := os.Stat(targetDir)
+	if err != nil {
+		msg := "cd: " + targetDir + ": " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	if !info.IsDir() {
+		msg := "cd: " + targetDir + ": not a directory"
+		logger.Error(msg)
+		return []byte(msg)
+	}
+
+	// Update FilePickerDir
+	absDir, err := filepath.Abs(targetDir)
+	if err != nil {
+		msg := "cd: failed to resolve path: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	cfg.FilePickerDir = absDir
+	msg := "FilePickerDir changed to: " + absDir
+	return []byte(msg)
+}
+
 // Helper functions for command execution
 // Todo structure
 type TodoItem struct {
@@ -1010,6 +1145,7 @@ var gitReadSubcommands = map[string]bool{

 func isCommandAllowed(command string, args ...string) bool {
 	allowedCommands := map[string]bool{
+		"cd":     true,
 		"grep":   true,
 		"sed":    true,
 		"awk":    true,
@@ -1063,6 +1199,142 @@ func summarizeChat(args map[string]string) []byte {
 	return []byte(chatText)
 }

+func windowIDToHex(decimalID string) string {
+	id, err := strconv.ParseInt(decimalID, 10, 64)
+	if err != nil {
+		return decimalID
+	}
+	return fmt.Sprintf("0x%x", id)
+}
+
+func listWindows(args map[string]string) []byte {
+	if !windowToolsAvailable {
+		return []byte("window tools not available: xdotool or maim not found")
+	}
+	cmd := exec.Command(xdotoolPath, "search", "--name", ".")
+	output, err := cmd.Output()
+	if err != nil {
+		msg := "failed to list windows: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	windowIDs := strings.Fields(string(output))
+	windows := make(map[string]string)
+	for _, id := range windowIDs {
+		id = strings.TrimSpace(id)
+		if id == "" {
+			continue
+		}
+		nameCmd := exec.Command(xdotoolPath, "getwindowname", id)
+		nameOutput, err := nameCmd.Output()
+		if err != nil {
+			continue
+		}
+		name := strings.TrimSpace(string(nameOutput))
+		windows[id] = name
+	}
+	data, err := json.Marshal(windows)
+	if err != nil {
+		msg := "failed to marshal window list: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	return data
+}
+
+func captureWindow(args map[string]string) []byte {
+	if !windowToolsAvailable {
+		return []byte("window tools not available: xdotool or maim not found")
+	}
+	window, ok := args["window"]
+	if !ok || window == "" {
+		return []byte("window parameter required (window ID or name)")
+	}
+	var windowID string
+	if _, err := strconv.Atoi(window); err == nil {
+		windowID = window
+	} else {
+		cmd := exec.Command(xdotoolPath, "search", "--name", window)
+		output, err := cmd.Output()
+		if err != nil || len(strings.Fields(string(output))) == 0 {
+			return []byte("window not found: " + window)
+		}
+		windowID = strings.Fields(string(output))[0]
+	}
+	nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
+	nameOutput, _ := nameCmd.Output()
+	windowName := strings.TrimSpace(string(nameOutput))
+	windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
+	if windowName == "" {
+		windowName = "window"
+	}
+	timestamp := time.Now().Unix()
+	filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
+	cmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
+	if err := cmd.Run(); err != nil {
+		msg := "failed to capture window: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	return []byte("screenshot saved: " + filename)
+}
+
+func captureWindowAndView(args map[string]string) []byte {
+	if !windowToolsAvailable {
+		return []byte("window tools not available: xdotool or maim not found")
+	}
+	window, ok := args["window"]
+	if !ok || window == "" {
+		return []byte("window parameter required (window ID or name)")
+	}
+	var windowID string
+	if _, err := strconv.Atoi(window); err == nil {
+		windowID = window
+	} else {
+		cmd := exec.Command(xdotoolPath, "search", "--name", window)
+		output, err := cmd.Output()
+		if err != nil || len(strings.Fields(string(output))) == 0 {
+			return []byte("window not found: " + window)
+		}
+		windowID = strings.Fields(string(output))[0]
+	}
+	nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
+	nameOutput, _ := nameCmd.Output()
+	windowName := strings.TrimSpace(string(nameOutput))
+	windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
+	if windowName == "" {
+		windowName = "window"
+	}
+	timestamp := time.Now().Unix()
+	filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
+	captureCmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
+	if err := captureCmd.Run(); err != nil {
+		msg := "failed to capture window: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	dataURL, err := models.CreateImageURLFromPath(filename)
+	if err != nil {
+		msg := "failed to create image URL: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	result := models.MultimodalToolResp{
+		Type: "multimodal_content",
+		Parts: []map[string]string{
+			{"type": "text", "text": "Screenshot saved: " + filename},
+			{"type": "image_url", "url": dataURL},
+		},
+	}
+	jsonResult, err := json.Marshal(result)
+	if err != nil {
+		msg := "failed to marshal result: " + err.Error()
+		logger.Error(msg)
+		return []byte(msg)
+	}
+	return jsonResult
+}
+
 type fnSig func(map[string]string) []byte

 var fnMap = map[string]fnSig{
@@ -1076,6 +1348,7 @@ var fnMap = map[string]fnSig{
 	"read_url_raw":      readURLRaw,
 	"file_create":       fileCreate,
 	"file_read":         fileRead,
+	"file_read_image":   fileReadImage,
 	"file_write":        fileWrite,
 	"file_write_append": fileWriteAppend,
 	"file_edit":         fileEdit,
@@ -1091,6 +1364,66 @@ var fnMap = map[string]fnSig{
 	"summarize_chat":    summarizeChat,
 }

+func registerWindowTools() {
+	if windowToolsAvailable {
+		fnMap["list_windows"] = listWindows
+		fnMap["capture_window"] = captureWindow
+		windowTools := []models.Tool{
+			{
+				Type: "function",
+				Function: models.ToolFunc{
+					Name:        "list_windows",
+					Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
+					Parameters: models.ToolFuncParams{
+						Type:       "object",
+						Required:   []string{},
+						Properties: map[string]models.ToolArgProps{},
+					},
+				},
+			},
+			{
+				Type: "function",
+				Function: models.ToolFunc{
+					Name:        "capture_window",
+					Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
+					Parameters: models.ToolFuncParams{
+						Type:     "object",
+						Required: []string{"window"},
+						Properties: map[string]models.ToolArgProps{
+							"window": models.ToolArgProps{
+								Type:        "string",
+								Description: "window ID or window name (partial match)",
+							},
+						},
+					},
+				},
+			},
+		}
+		if modelHasVision {
+			fnMap["capture_window_and_view"] = captureWindowAndView
+			windowTools = append(windowTools, models.Tool{
+				Type: "function",
+				Function: models.ToolFunc{
+					Name:        "capture_window_and_view",
+					Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
+					Parameters: models.ToolFuncParams{
+						Type:     "object",
+						Required: []string{"window"},
+						Properties: map[string]models.ToolArgProps{
+							"window": models.ToolArgProps{
+								Type:        "string",
+								Description: "window ID or window name (partial match)",
+							},
+						},
+					},
+				},
+			})
+		}
+		baseTools = append(baseTools, windowTools...)
+		toolSysMsg += windowToolSysMsg
+	}
+}
+
 // callToolWithAgent calls the tool and applies any registered agent.
 func callToolWithAgent(name string, args map[string]string) []byte {
 	registerWebAgents()
@@ -1302,6 +1635,24 @@ var baseTools = []models.Tool{
 			},
 		},
 	},
+	// file_read_image
+	models.Tool{
+		Type: "function",
+		Function: models.ToolFunc{
+			Name:        "file_read_image",
+			Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.",
+			Parameters: models.ToolFuncParams{
+				Type:     "object",
+				Required: []string{"path"},
+				Properties: map[string]models.ToolArgProps{
+					"path": models.ToolArgProps{
+						Type:        "string",
+						Description: "path of the image file to read",
+					},
+				},
+			},
+		},
+	},
 	// file_write
 	models.Tool{
 		Type: "function",
@@ -1461,18 +1812,14 @@ var baseTools = []models.Tool{
 		Type: "function",
 		Function: models.ToolFunc{
 			Name:        "execute_command",
-			Description: "Execute a shell command safely. Use when you need to run system commands like grep sed awk find cat head tail sort uniq wc ls echo cut tr cp mv rm mkdir rmdir pwd df free ps top du whoami date uname go. Git is allowed for read-only operations: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe.",
+			Description: "Execute a shell command safely. Use when you need to run system commands like cd grep sed awk find cat head tail sort uniq wc ls echo cut tr cp mv rm mkdir rmdir pwd df free ps top du whoami date uname go git. Git is allowed for read-only operations: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe. Use 'cd /path' to change working directory.",
 			Parameters: models.ToolFuncParams{
 				Type:     "object",
 				Required: []string{"command"},
 				Properties: map[string]models.ToolArgProps{
 					"command": models.ToolArgProps{
 						Type:        "string",
-						Description: "command to execute (only commands from whitelist are allowed: grep sed awk find cat head tail sort uniq wc ls echo cut tr cp mv rm mkdir rmdir pwd df free ps top du whoami date uname go; git allowed for reads: status log diff show branch reflog rev-parse shortlog describe)",
-					},
-					"args": models.ToolArgProps{
-						Type:        "string",
-						Description: "command arguments as a single string (e.g., '-la {path}')",
+						Description: "command to execute with arguments (e.g., 'go run main.go', 'ls -la /tmp', 'cd /home/user'). Use a single string; arguments should be space-separated after the command.",
 					},
 				},
 			},
@@ -1522,8 +1869,21 @@ var baseTools = []models.Tool{
 			Description: "Update a todo item by ID with new task or status. Status must be one of: pending, in_progress, completed.",
 			Parameters: models.ToolFuncParams{
 				Type:     "object",
-				Required:   []string{},
-				Properties: map[string]models.ToolArgProps{},
+				Required: []string{"id"},
+				Properties: map[string]models.ToolArgProps{
+					"id": models.ToolArgProps{
+						Type:        "string",
+						Description: "id of the todo item to update",
+					},
+					"task": models.ToolArgProps{
+						Type:        "string",
+						Description: "new task description (optional)",
+					},
+					"status": models.ToolArgProps{
+						Type:        "string",
+						Description: "new status: pending, in_progress, or completed (optional)",
+					},
+				},
 			},
 		},
 	},
@@ -1546,3 +1906,56 @@ var baseTools = []models.Tool{
 		},
 	},
 }
+
+func init() {
+	if windowToolsAvailable {
+		baseTools = append(baseTools,
+			models.Tool{
+				Type: "function",
+				Function: models.ToolFunc{
+					Name:        "list_windows",
+					Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
+					Parameters: models.ToolFuncParams{
+						Type:       "object",
+						Required:   []string{},
+						Properties: map[string]models.ToolArgProps{},
+					},
+				},
+			},
+			models.Tool{
+				Type: "function",
+				Function: models.ToolFunc{
+					Name:        "capture_window",
+					Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
+					Parameters: models.ToolFuncParams{
+						Type:     "object",
+						Required: []string{"window"},
+						Properties: map[string]models.ToolArgProps{
+							"window": models.ToolArgProps{
+								Type:        "string",
+								Description: "window ID or window name (partial match)",
+							},
+						},
+					},
+				},
+			},
+			models.Tool{
+				Type: "function",
+				Function: models.ToolFunc{
+					Name:        "capture_window_and_view",
+					Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
+					Parameters: models.ToolFuncParams{
+						Type:     "object",
+						Required: []string{"window"},
+						Properties: map[string]models.ToolArgProps{
+							"window": models.ToolArgProps{
+								Type:        "string",
+								Description: "window ID or window name (partial match)",
+							},
+						},
+					},
+				},
+			},
+		)
+	}
+}
--- a/tui.go
+++ b/tui.go
@@ -35,6 +35,8 @@ var (
 	renameWindow       *tview.InputField
 	roleEditWindow     *tview.InputField
 	shellInput         *tview.InputField
+	confirmModal       *tview.Modal
+	confirmPageName    = "confirm"
 	fullscreenMode     bool
 	positionVisible    bool = true
 	scrollToEndEnabled bool = true
@@ -195,6 +197,39 @@ func init() {
 		}
 		return event
 	})
+	confirmModal = tview.NewModal().
+		SetText("You are trying to send an empty message.\nIt makes sense if the last message in the chat is from you.\nAre you sure?").
+		AddButtons([]string{"Yes", "No"}).
+		SetButtonBackgroundColor(tcell.ColorBlack).
+		SetButtonTextColor(tcell.ColorWhite).
+		SetDoneFunc(func(buttonIndex int, buttonLabel string) {
+			if buttonLabel == "Yes" {
+				persona := cfg.UserRole
+				if cfg.WriteNextMsgAs != "" {
+					persona = cfg.WriteNextMsgAs
+				}
+				chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: ""}
+			} // In both Yes and No, go back to the main page
+			pages.SwitchToPage("main") // or whatever your main page is named
+		})
+	confirmModal.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
+		if event.Key() == tcell.KeyRune {
+			switch event.Rune() {
+			case 'y', 'Y':
+				persona := cfg.UserRole
+				if cfg.WriteNextMsgAs != "" {
+					persona = cfg.WriteNextMsgAs
+				}
+				chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: ""}
+				pages.SwitchToPage("main")
+				return nil
+			case 'n', 'N', 'x', 'X':
+				pages.SwitchToPage("main")
+				return nil
+			}
+		}
+		return event
+	})
 	textArea = tview.NewTextArea().
 		SetPlaceholder("input is multiline; press <Enter> to start the next line;\npress <Esc> to send the message.")
 	textArea.SetBorder(true).SetTitle("input")
@@ -997,7 +1032,6 @@ func init() {
 				return nil
 			}
 			msgText := textArea.GetText()
-			if msgText != "" {
 			nl := "\n\n" // keep empty lines between messages
 			prevText := textView.GetText(true)
 			persona := cfg.UserRole
@@ -1029,10 +1063,12 @@ func init() {
 					textView.ScrollToEnd()
 				}
 				colorText()
+			} else {
+				pages.AddPage(confirmPageName, confirmModal, true, true)
+				return nil
 			}
 			// go chatRound(msgText, persona, textView, false, false)
 			chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
-			}
 			return nil
 		}
 		if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn {
Author	SHA1	Message	Date
Grail Finder	3389b1d83b	Fix: linter complaints	2026-03-02 11:39:55 +03:00
Grail Finder	4f6000a43a	Enha: check if model has vision before giving it vision tools	2026-03-02 11:25:20 +03:00
Grail Finder	9ba46b40cc	Feat: screencapture for completion	2026-03-02 11:12:04 +03:00
Grail Finder	5bb456272e	Feat: capture window (screenshot)	2026-03-02 10:33:41 +03:00
Grail Finder	8999f48fb9	Fix (completion): handle multiple images in history	2026-03-02 09:23:22 +03:00
Grail Finder	b2f280a7f1	Feat: read img for completion	2026-03-02 07:46:08 +03:00
Grail Finder	65cbd5d6a6	Fix (ctrl+v): trim loaded mark from the model	2026-03-02 07:19:21 +03:00
Grail Finder	caac1d397a	Feat: read img tool for chat endpoint	2026-03-02 07:12:28 +03:00
Grail Finder	742f1ca838	Enha: modal affirmation popup on sending empty msg	2026-03-01 16:21:18 +03:00
Grail Finder	e36bade353	Fix: escape with empty textarea not generating response	2026-03-01 13:33:25 +03:00
Grail Finder	01d8bcdbf5	Enha: avoid \n\n in tool collapse	2026-03-01 12:28:23 +03:00
Grail Finder	f6a395bce9	Fix: todo_update	2026-03-01 12:16:17 +03:00
Grail Finder	dc34c63256	Feat: handle llm's cd use	2026-03-01 11:44:43 +03:00
Grail Finder	cdfccf9a24	Enha (llama.cpp): show loaded model on startup	2026-03-01 08:22:02 +03:00