Compare commits
14 Commits
enha/codin
...
4bddce3700
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4bddce3700 | ||
|
|
fcc71987bf | ||
|
|
8458edf5a8 | ||
|
|
07b06bb0d3 | ||
|
|
3389b1d83b | ||
|
|
4f6000a43a | ||
|
|
9ba46b40cc | ||
|
|
5bb456272e | ||
|
|
8999f48fb9 | ||
|
|
b2f280a7f1 | ||
|
|
65cbd5d6a6 | ||
|
|
caac1d397a | ||
|
|
742f1ca838 | ||
|
|
e36bade353 |
96
bot.go
96
bot.go
@@ -63,7 +63,9 @@ var (
|
||||
"google/gemma-3-27b-it:free",
|
||||
"meta-llama/llama-3.3-70b-instruct:free",
|
||||
}
|
||||
LocalModels = []string{}
|
||||
LocalModels = []string{}
|
||||
localModelsData *models.LCPModels
|
||||
orModelsData *models.ORModels
|
||||
)
|
||||
|
||||
var thinkBlockRE = regexp.MustCompile(`(?s)<think>.*?</think>`)
|
||||
@@ -355,6 +357,7 @@ func fetchORModels(free bool) ([]string, error) {
|
||||
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
orModelsData = data
|
||||
freeModels := data.ListModels(free)
|
||||
return freeModels, nil
|
||||
}
|
||||
@@ -416,6 +419,7 @@ func fetchLCPModelsWithStatus() (*models.LCPModels, error) {
|
||||
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
localModelsData = data
|
||||
return data, nil
|
||||
}
|
||||
|
||||
@@ -433,6 +437,33 @@ func isModelLoaded(modelID string) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func ModelHasVision(api, modelID string) bool {
|
||||
switch {
|
||||
case strings.Contains(api, "deepseek"):
|
||||
return false
|
||||
case strings.Contains(api, "openrouter"):
|
||||
resp, err := http.Get("https://openrouter.ai/api/v1/models")
|
||||
if err != nil {
|
||||
logger.Warn("failed to fetch OR models for vision check", "error", err)
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
orm := &models.ORModels{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(orm); err != nil {
|
||||
logger.Warn("failed to decode OR models for vision check", "error", err)
|
||||
return false
|
||||
}
|
||||
return orm.HasVision(modelID)
|
||||
default:
|
||||
models, err := fetchLCPModelsWithStatus()
|
||||
if err != nil {
|
||||
logger.Warn("failed to fetch LCP models for vision check", "error", err)
|
||||
return false
|
||||
}
|
||||
return models.HasVision(modelID)
|
||||
}
|
||||
}
|
||||
|
||||
// monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded.
|
||||
func monitorModelLoad(modelID string) {
|
||||
go func() {
|
||||
@@ -718,7 +749,7 @@ func sendMsgToLLM(body io.Reader) {
|
||||
}
|
||||
interrupt:
|
||||
if interruptResp { // read bytes, so it would not get into beginning of the next req
|
||||
interruptResp = false
|
||||
// interruptResp = false
|
||||
logger.Info("interrupted bot response", "chunk_counter", counter)
|
||||
streamDone <- true
|
||||
break
|
||||
@@ -772,6 +803,7 @@ func showSpinner() {
|
||||
}
|
||||
|
||||
func chatRound(r *models.ChatRoundReq) error {
|
||||
interruptResp = false
|
||||
botRespMode = true
|
||||
go showSpinner()
|
||||
updateStatusLine()
|
||||
@@ -937,6 +969,9 @@ out:
|
||||
}
|
||||
// Strip think blocks before parsing for tool calls
|
||||
respTextNoThink := thinkBlockRE.ReplaceAllString(respText.String(), "")
|
||||
if interruptResp {
|
||||
return nil
|
||||
}
|
||||
if findCall(respTextNoThink, toolResp.String()) {
|
||||
return nil
|
||||
}
|
||||
@@ -1174,17 +1209,59 @@ func findCall(msg, toolCall string) bool {
|
||||
toolRunningMode = false
|
||||
toolMsg := string(resp)
|
||||
logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
|
||||
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
||||
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolMsg)
|
||||
// Create tool response message with the proper tool_call_id
|
||||
// Mark shell commands as always visible
|
||||
isShellCommand := fc.Name == "execute_command"
|
||||
toolResponseMsg := models.RoleMsg{
|
||||
Role: cfg.ToolRole,
|
||||
Content: toolMsg,
|
||||
ToolCallID: lastToolCall.ID,
|
||||
IsShellCommand: isShellCommand,
|
||||
// Check if response is multimodal content (image)
|
||||
var toolResponseMsg models.RoleMsg
|
||||
if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) {
|
||||
// Parse multimodal content response
|
||||
multimodalResp := models.MultimodalToolResp{}
|
||||
if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" {
|
||||
// Create RoleMsg with ContentParts
|
||||
var contentParts []any
|
||||
for _, part := range multimodalResp.Parts {
|
||||
partType := part["type"]
|
||||
switch partType {
|
||||
case "text":
|
||||
contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]})
|
||||
case "image_url":
|
||||
contentParts = append(contentParts, models.ImageContentPart{
|
||||
Type: "image_url",
|
||||
ImageURL: struct {
|
||||
URL string `json:"url"`
|
||||
}{URL: part["url"]},
|
||||
})
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
toolResponseMsg = models.RoleMsg{
|
||||
Role: cfg.ToolRole,
|
||||
ContentParts: contentParts,
|
||||
HasContentParts: true,
|
||||
ToolCallID: lastToolCall.ID,
|
||||
IsShellCommand: isShellCommand,
|
||||
}
|
||||
} else {
|
||||
// Fallback to regular content
|
||||
toolResponseMsg = models.RoleMsg{
|
||||
Role: cfg.ToolRole,
|
||||
Content: toolMsg,
|
||||
ToolCallID: lastToolCall.ID,
|
||||
IsShellCommand: isShellCommand,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
toolResponseMsg = models.RoleMsg{
|
||||
Role: cfg.ToolRole,
|
||||
Content: toolMsg,
|
||||
ToolCallID: lastToolCall.ID,
|
||||
IsShellCommand: isShellCommand,
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
||||
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
|
||||
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
|
||||
logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
|
||||
// Clear the stored tool call ID after using it
|
||||
@@ -1339,6 +1416,7 @@ func updateModelLists() {
|
||||
chatBody.Model = m
|
||||
cachedModelColor = "green"
|
||||
updateStatusLine()
|
||||
UpdateToolCapabilities()
|
||||
app.Draw()
|
||||
return
|
||||
}
|
||||
|
||||
82
helpfuncs.go
82
helpfuncs.go
@@ -11,6 +11,7 @@ import (
|
||||
"path"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
@@ -376,9 +377,90 @@ func makeStatusLine() string {
|
||||
roleInject := fmt.Sprintf(" | [%s:-:b]role injection[-:-:-] (alt+7)", boolColors[injectRole])
|
||||
statusLine += roleInject
|
||||
}
|
||||
// context tokens
|
||||
contextTokens := getContextTokens()
|
||||
maxCtx := getMaxContextTokens()
|
||||
if maxCtx == 0 {
|
||||
maxCtx = 16384
|
||||
}
|
||||
if contextTokens > 0 {
|
||||
contextInfo := fmt.Sprintf(" | context-estim: [orange:-:b]%d/%d[-:-:-]", contextTokens, maxCtx)
|
||||
statusLine += contextInfo
|
||||
}
|
||||
return statusLine + imageInfo + shellModeInfo
|
||||
}
|
||||
|
||||
func getContextTokens() int {
|
||||
if chatBody == nil || chatBody.Messages == nil {
|
||||
return 0
|
||||
}
|
||||
total := 0
|
||||
messages := chatBody.Messages
|
||||
for i := range messages {
|
||||
msg := &messages[i]
|
||||
if msg.Stats != nil && msg.Stats.Tokens > 0 {
|
||||
total += msg.Stats.Tokens
|
||||
} else if msg.GetText() != "" {
|
||||
total += len(msg.GetText()) / 4
|
||||
}
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
const deepseekContext = 128000
|
||||
|
||||
func getMaxContextTokens() int {
|
||||
if chatBody == nil || chatBody.Model == "" {
|
||||
return 0
|
||||
}
|
||||
modelName := chatBody.Model
|
||||
switch {
|
||||
case strings.Contains(cfg.CurrentAPI, "openrouter"):
|
||||
if orModelsData != nil {
|
||||
for i := range orModelsData.Data {
|
||||
m := &orModelsData.Data[i]
|
||||
if m.ID == modelName {
|
||||
return m.ContextLength
|
||||
}
|
||||
}
|
||||
}
|
||||
case strings.Contains(cfg.CurrentAPI, "deepseek"):
|
||||
return deepseekContext
|
||||
default:
|
||||
if localModelsData != nil {
|
||||
for i := range localModelsData.Data {
|
||||
m := &localModelsData.Data[i]
|
||||
if m.ID == modelName {
|
||||
for _, arg := range m.Status.Args {
|
||||
if strings.HasPrefix(arg, "--ctx-size") {
|
||||
if strings.Contains(arg, "=") {
|
||||
val := strings.Split(arg, "=")[1]
|
||||
if n, err := strconv.Atoi(val); err == nil {
|
||||
return n
|
||||
}
|
||||
} else {
|
||||
idx := -1
|
||||
for j, a := range m.Status.Args {
|
||||
if a == "--ctx-size" && j+1 < len(m.Status.Args) {
|
||||
idx = j + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
if idx != -1 {
|
||||
if n, err := strconv.Atoi(m.Status.Args[idx]); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// set of roles within card definition and mention in chat history
|
||||
func listChatRoles() []string {
|
||||
currentChat, ok := chatMap[activeChatName]
|
||||
|
||||
76
llm.go
76
llm.go
@@ -3,7 +3,6 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"gf-lt/models"
|
||||
"io"
|
||||
"strings"
|
||||
@@ -119,25 +118,22 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
||||
logger.Debug("formmsg lcpcompletion", "link", cfg.CurrentAPI)
|
||||
localImageAttachmentPath := imageAttachmentPath
|
||||
var multimodalData []string
|
||||
if localImageAttachmentPath != "" {
|
||||
imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
|
||||
if err != nil {
|
||||
logger.Error("failed to create image URL from path for completion",
|
||||
"error", err, "path", localImageAttachmentPath)
|
||||
return nil, err
|
||||
}
|
||||
// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
|
||||
parts := strings.SplitN(imageURL, ",", 2)
|
||||
if len(parts) == 2 {
|
||||
multimodalData = append(multimodalData, parts[1])
|
||||
} else {
|
||||
logger.Error("invalid image data URL format", "url", imageURL)
|
||||
return nil, errors.New("invalid image data URL format")
|
||||
}
|
||||
imageAttachmentPath = "" // Clear the attachment after use
|
||||
}
|
||||
if msg != "" { // otherwise let the bot to continue
|
||||
newMsg := models.RoleMsg{Role: role, Content: msg}
|
||||
var newMsg models.RoleMsg
|
||||
if localImageAttachmentPath != "" {
|
||||
newMsg = models.NewMultimodalMsg(role, []any{})
|
||||
newMsg.AddTextPart(msg)
|
||||
imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
|
||||
if err != nil {
|
||||
logger.Error("failed to create image URL from path for completion",
|
||||
"error", err, "path", localImageAttachmentPath)
|
||||
return nil, err
|
||||
}
|
||||
newMsg.AddImagePart(imageURL, localImageAttachmentPath)
|
||||
imageAttachmentPath = "" // Clear the attachment after use
|
||||
} else { // not a multimodal msg or image passed in tool call
|
||||
newMsg = models.RoleMsg{Role: role, Content: msg}
|
||||
}
|
||||
newMsg = *processMessageTag(&newMsg)
|
||||
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||
}
|
||||
@@ -146,22 +142,40 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
||||
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{Role: cfg.ToolRole, Content: toolSysMsg})
|
||||
}
|
||||
filteredMessages, botPersona := filterMessagesForCurrentCharacter(chatBody.Messages)
|
||||
// Build prompt and extract images inline as we process each message
|
||||
messages := make([]string, len(filteredMessages))
|
||||
for i := range filteredMessages {
|
||||
messages[i] = stripThinkingFromMsg(&filteredMessages[i]).ToPrompt()
|
||||
m := stripThinkingFromMsg(&filteredMessages[i])
|
||||
messages[i] = m.ToPrompt()
|
||||
// Extract images from this message and add marker inline
|
||||
if len(m.ContentParts) > 0 {
|
||||
for _, part := range m.ContentParts {
|
||||
var imgURL string
|
||||
// Check for struct type
|
||||
if imgPart, ok := part.(models.ImageContentPart); ok {
|
||||
imgURL = imgPart.ImageURL.URL
|
||||
} else if partMap, ok := part.(map[string]any); ok {
|
||||
// Check for map type (from JSON unmarshaling)
|
||||
if partType, exists := partMap["type"]; exists && partType == "image_url" {
|
||||
if imgURLMap, ok := partMap["image_url"].(map[string]any); ok {
|
||||
if url, ok := imgURLMap["url"].(string); ok {
|
||||
imgURL = url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if imgURL != "" {
|
||||
// Extract base64 part from data URL (e.g., "data:image/jpeg;base64,...")
|
||||
parts := strings.SplitN(imgURL, ",", 2)
|
||||
if len(parts) == 2 {
|
||||
multimodalData = append(multimodalData, parts[1])
|
||||
messages[i] += " <__media__>"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
prompt := strings.Join(messages, "\n")
|
||||
// Add multimodal media markers to the prompt text when multimodal data is present
|
||||
// This is required by llama.cpp multimodal models so they know where to insert media
|
||||
if len(multimodalData) > 0 {
|
||||
// Add a media marker for each item in the multimodal data
|
||||
var sb strings.Builder
|
||||
sb.WriteString(prompt)
|
||||
for range multimodalData {
|
||||
sb.WriteString(" <__media__>") // llama.cpp default multimodal marker
|
||||
}
|
||||
prompt = sb.String()
|
||||
}
|
||||
// needs to be after <__media__> if there are images
|
||||
if !resume {
|
||||
botMsgStart := "\n" + botPersona + ":\n"
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
package models
|
||||
|
||||
const (
|
||||
LoadedMark = "(loaded) "
|
||||
LoadedMark = "(loaded) "
|
||||
ToolRespMultyType = "multimodel_content"
|
||||
)
|
||||
|
||||
type APIType int
|
||||
|
||||
@@ -391,7 +391,6 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Determine the image format based on file extension
|
||||
var mimeType string
|
||||
switch {
|
||||
@@ -408,10 +407,8 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
|
||||
default:
|
||||
mimeType = "image/jpeg" // default
|
||||
}
|
||||
|
||||
// Encode to base64
|
||||
encoded := base64.StdEncoding.EncodeToString(data)
|
||||
|
||||
// Create data URL
|
||||
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
|
||||
}
|
||||
@@ -611,6 +608,20 @@ func (lcp *LCPModels) ListModels() []string {
|
||||
return resp
|
||||
}
|
||||
|
||||
func (lcp *LCPModels) HasVision(modelID string) bool {
|
||||
for _, m := range lcp.Data {
|
||||
if m.ID == modelID {
|
||||
args := m.Status.Args
|
||||
for i := 0; i < len(args)-1; i++ {
|
||||
if args[i] == "--mmproj" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type ResponseStats struct {
|
||||
Tokens int
|
||||
Duration float64
|
||||
@@ -623,3 +634,8 @@ type ChatRoundReq struct {
|
||||
Regen bool
|
||||
Resume bool
|
||||
}
|
||||
|
||||
type MultimodalToolResp struct {
|
||||
Type string `json:"type"`
|
||||
Parts []map[string]string `json:"parts"`
|
||||
}
|
||||
|
||||
@@ -172,3 +172,16 @@ func (orm *ORModels) ListModels(free bool) []string {
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
func (orm *ORModels) HasVision(modelID string) bool {
|
||||
for i := range orm.Data {
|
||||
if orm.Data[i].ID == modelID {
|
||||
for _, mod := range orm.Data[i].Architecture.InputModalities {
|
||||
if mod == "image" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
64
popups.go
64
popups.go
@@ -143,6 +143,7 @@ func showAPILinkSelectionPopup() {
|
||||
apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
|
||||
// Update the API in config
|
||||
cfg.CurrentAPI = mainText
|
||||
UpdateToolCapabilities()
|
||||
// Update model list based on new API
|
||||
// Helper function to get model list for a given API (same as in props_table.go)
|
||||
getModelListForAPI := func(api string) []string {
|
||||
@@ -160,8 +161,9 @@ func showAPILinkSelectionPopup() {
|
||||
newModelList := getModelListForAPI(cfg.CurrentAPI)
|
||||
// Ensure chatBody.Model is in the new list; if not, set to first available model
|
||||
if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) {
|
||||
chatBody.Model = newModelList[0]
|
||||
chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark)
|
||||
cfg.CurrentModel = chatBody.Model
|
||||
UpdateToolCapabilities()
|
||||
}
|
||||
pages.RemovePage("apiLinkSelectionPopup")
|
||||
app.SetFocus(textArea)
|
||||
@@ -404,6 +406,66 @@ func showShellFileCompletionPopup(filter string) {
|
||||
app.SetFocus(widget)
|
||||
}
|
||||
|
||||
func showTextAreaFileCompletionPopup(filter string) {
|
||||
baseDir := cfg.FilePickerDir
|
||||
if baseDir == "" {
|
||||
baseDir = "."
|
||||
}
|
||||
complMatches := scanFiles(baseDir, filter)
|
||||
if len(complMatches) == 0 {
|
||||
return
|
||||
}
|
||||
if len(complMatches) == 1 {
|
||||
currentText := textArea.GetText()
|
||||
atIdx := strings.LastIndex(currentText, "@")
|
||||
if atIdx >= 0 {
|
||||
before := currentText[:atIdx]
|
||||
textArea.SetText(before+complMatches[0], true)
|
||||
}
|
||||
return
|
||||
}
|
||||
widget := tview.NewList().ShowSecondaryText(false).
|
||||
SetSelectedBackgroundColor(tcell.ColorGray)
|
||||
widget.SetTitle("file completion").SetBorder(true)
|
||||
for _, m := range complMatches {
|
||||
widget.AddItem(m, "", 0, nil)
|
||||
}
|
||||
widget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
|
||||
currentText := textArea.GetText()
|
||||
atIdx := strings.LastIndex(currentText, "@")
|
||||
if atIdx >= 0 {
|
||||
before := currentText[:atIdx]
|
||||
textArea.SetText(before+mainText, true)
|
||||
}
|
||||
pages.RemovePage("textAreaFileCompletionPopup")
|
||||
app.SetFocus(textArea)
|
||||
})
|
||||
widget.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
|
||||
if event.Key() == tcell.KeyEscape {
|
||||
pages.RemovePage("textAreaFileCompletionPopup")
|
||||
app.SetFocus(textArea)
|
||||
return nil
|
||||
}
|
||||
if event.Key() == tcell.KeyRune && event.Rune() == 'x' {
|
||||
pages.RemovePage("textAreaFileCompletionPopup")
|
||||
app.SetFocus(textArea)
|
||||
return nil
|
||||
}
|
||||
return event
|
||||
})
|
||||
modal := func(p tview.Primitive, width, height int) tview.Primitive {
|
||||
return tview.NewFlex().
|
||||
AddItem(nil, 0, 1, false).
|
||||
AddItem(tview.NewFlex().SetDirection(tview.FlexRow).
|
||||
AddItem(nil, 0, 1, false).
|
||||
AddItem(p, height, 1, true).
|
||||
AddItem(nil, 0, 1, false), width, 1, true).
|
||||
AddItem(nil, 0, 1, false)
|
||||
}
|
||||
pages.AddPage("textAreaFileCompletionPopup", modal(widget, 80, 20), true, true)
|
||||
app.SetFocus(widget)
|
||||
}
|
||||
|
||||
func updateWidgetColors(theme *tview.Theme) {
|
||||
bgColor := theme.PrimitiveBackgroundColor
|
||||
fgColor := theme.PrimaryTextColor
|
||||
|
||||
379
tools.go
379
tools.go
@@ -85,6 +85,11 @@ Your current tools:
|
||||
"when_to_use": "when asked to read the content of a file"
|
||||
},
|
||||
{
|
||||
"name":"file_read_image",
|
||||
"args": ["path"],
|
||||
"when_to_use": "when asked to read or view an image file"
|
||||
},
|
||||
{
|
||||
"name":"file_write",
|
||||
"args": ["path", "content"],
|
||||
"when_to_use": "when needed to overwrite content to a file"
|
||||
@@ -170,8 +175,36 @@ After that you are free to respond to the user.
|
||||
webAgentsOnce sync.Once
|
||||
)
|
||||
|
||||
var windowToolSysMsg = `
|
||||
Additional window tools (available only if xdotool and maim are installed):
|
||||
[
|
||||
{
|
||||
"name":"list_windows",
|
||||
"args": [],
|
||||
"when_to_use": "when asked to list visible windows; returns map of window ID to window name"
|
||||
},
|
||||
{
|
||||
"name":"capture_window",
|
||||
"args": ["window"],
|
||||
"when_to_use": "when asked to take a screenshot of a specific window; saves to /tmp; window can be ID or name substring; returns file path"
|
||||
},
|
||||
{
|
||||
"name":"capture_window_and_view",
|
||||
"args": ["window"],
|
||||
"when_to_use": "when asked to take a screenshot of a specific window and show it; saves to /tmp and returns image for viewing; window can be ID or name substring"
|
||||
}
|
||||
]
|
||||
`
|
||||
|
||||
var WebSearcher searcher.WebSurfer
|
||||
|
||||
var (
|
||||
windowToolsAvailable bool
|
||||
xdotoolPath string
|
||||
maimPath string
|
||||
modelHasVision bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
|
||||
if err != nil {
|
||||
@@ -181,6 +214,47 @@ func init() {
|
||||
if err := rag.Init(cfg, logger, store); err != nil {
|
||||
logger.Warn("failed to init rag; rag_search tool will not be available", "error", err)
|
||||
}
|
||||
checkWindowTools()
|
||||
registerWindowTools()
|
||||
}
|
||||
|
||||
func checkWindowTools() {
|
||||
xdotoolPath, _ = exec.LookPath("xdotool")
|
||||
maimPath, _ = exec.LookPath("maim")
|
||||
windowToolsAvailable = xdotoolPath != "" && maimPath != ""
|
||||
if windowToolsAvailable {
|
||||
logger.Info("window tools available: xdotool and maim found")
|
||||
} else {
|
||||
if xdotoolPath == "" {
|
||||
logger.Warn("xdotool not found, window listing tools will not be available")
|
||||
}
|
||||
if maimPath == "" {
|
||||
logger.Warn("maim not found, window capture tools will not be available")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func UpdateToolCapabilities() {
|
||||
if !cfg.ToolUse {
|
||||
return
|
||||
}
|
||||
modelHasVision = false
|
||||
if cfg == nil || cfg.CurrentAPI == "" {
|
||||
logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil")
|
||||
registerWindowTools()
|
||||
return
|
||||
}
|
||||
prevHasVision := modelHasVision
|
||||
modelHasVision = ModelHasVision(cfg.CurrentAPI, cfg.CurrentModel)
|
||||
if modelHasVision {
|
||||
logger.Info("model has vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
|
||||
} else {
|
||||
logger.Info("model does not have vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
|
||||
if windowToolsAvailable && !prevHasVision && !modelHasVision {
|
||||
_ = notifyUser("window tools", "Window capture-and-view unavailable: model lacks vision support")
|
||||
}
|
||||
}
|
||||
registerWindowTools()
|
||||
}
|
||||
|
||||
// getWebAgentClient returns a singleton AgentClient for web agents.
|
||||
@@ -469,6 +543,43 @@ func fileRead(args map[string]string) []byte {
|
||||
return jsonResult
|
||||
}
|
||||
|
||||
func fileReadImage(args map[string]string) []byte {
|
||||
path, ok := args["path"]
|
||||
if !ok || path == "" {
|
||||
msg := "path not provided to file_read_image tool"
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
path = resolvePath(path)
|
||||
dataURL, err := models.CreateImageURLFromPath(path)
|
||||
if err != nil {
|
||||
msg := "failed to read image; error: " + err.Error()
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
// result := map[string]any{
|
||||
// "type": "multimodal_content",
|
||||
// "parts": []map[string]string{
|
||||
// {"type": "text", "text": "Image at " + path},
|
||||
// {"type": "image_url", "url": dataURL},
|
||||
// },
|
||||
// }
|
||||
result := models.MultimodalToolResp{
|
||||
Type: "multimodal_content",
|
||||
Parts: []map[string]string{
|
||||
{"type": "text", "text": "Image at " + path},
|
||||
{"type": "image_url", "url": dataURL},
|
||||
},
|
||||
}
|
||||
jsonResult, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
msg := "failed to marshal result; error: " + err.Error()
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
return jsonResult
|
||||
}
|
||||
|
||||
func fileWrite(args map[string]string) []byte {
|
||||
path, ok := args["path"]
|
||||
if !ok || path == "" {
|
||||
@@ -1088,6 +1199,142 @@ func summarizeChat(args map[string]string) []byte {
|
||||
return []byte(chatText)
|
||||
}
|
||||
|
||||
func windowIDToHex(decimalID string) string {
|
||||
id, err := strconv.ParseInt(decimalID, 10, 64)
|
||||
if err != nil {
|
||||
return decimalID
|
||||
}
|
||||
return fmt.Sprintf("0x%x", id)
|
||||
}
|
||||
|
||||
func listWindows(args map[string]string) []byte {
|
||||
if !windowToolsAvailable {
|
||||
return []byte("window tools not available: xdotool or maim not found")
|
||||
}
|
||||
cmd := exec.Command(xdotoolPath, "search", "--name", ".")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
msg := "failed to list windows: " + err.Error()
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
windowIDs := strings.Fields(string(output))
|
||||
windows := make(map[string]string)
|
||||
for _, id := range windowIDs {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
continue
|
||||
}
|
||||
nameCmd := exec.Command(xdotoolPath, "getwindowname", id)
|
||||
nameOutput, err := nameCmd.Output()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
name := strings.TrimSpace(string(nameOutput))
|
||||
windows[id] = name
|
||||
}
|
||||
data, err := json.Marshal(windows)
|
||||
if err != nil {
|
||||
msg := "failed to marshal window list: " + err.Error()
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func captureWindow(args map[string]string) []byte {
|
||||
if !windowToolsAvailable {
|
||||
return []byte("window tools not available: xdotool or maim not found")
|
||||
}
|
||||
window, ok := args["window"]
|
||||
if !ok || window == "" {
|
||||
return []byte("window parameter required (window ID or name)")
|
||||
}
|
||||
var windowID string
|
||||
if _, err := strconv.Atoi(window); err == nil {
|
||||
windowID = window
|
||||
} else {
|
||||
cmd := exec.Command(xdotoolPath, "search", "--name", window)
|
||||
output, err := cmd.Output()
|
||||
if err != nil || len(strings.Fields(string(output))) == 0 {
|
||||
return []byte("window not found: " + window)
|
||||
}
|
||||
windowID = strings.Fields(string(output))[0]
|
||||
}
|
||||
nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
|
||||
nameOutput, _ := nameCmd.Output()
|
||||
windowName := strings.TrimSpace(string(nameOutput))
|
||||
windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
|
||||
if windowName == "" {
|
||||
windowName = "window"
|
||||
}
|
||||
timestamp := time.Now().Unix()
|
||||
filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
|
||||
cmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
|
||||
if err := cmd.Run(); err != nil {
|
||||
msg := "failed to capture window: " + err.Error()
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
return []byte("screenshot saved: " + filename)
|
||||
}
|
||||
|
||||
func captureWindowAndView(args map[string]string) []byte {
|
||||
if !windowToolsAvailable {
|
||||
return []byte("window tools not available: xdotool or maim not found")
|
||||
}
|
||||
window, ok := args["window"]
|
||||
if !ok || window == "" {
|
||||
return []byte("window parameter required (window ID or name)")
|
||||
}
|
||||
var windowID string
|
||||
if _, err := strconv.Atoi(window); err == nil {
|
||||
windowID = window
|
||||
} else {
|
||||
cmd := exec.Command(xdotoolPath, "search", "--name", window)
|
||||
output, err := cmd.Output()
|
||||
if err != nil || len(strings.Fields(string(output))) == 0 {
|
||||
return []byte("window not found: " + window)
|
||||
}
|
||||
windowID = strings.Fields(string(output))[0]
|
||||
}
|
||||
nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
|
||||
nameOutput, _ := nameCmd.Output()
|
||||
windowName := strings.TrimSpace(string(nameOutput))
|
||||
windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
|
||||
if windowName == "" {
|
||||
windowName = "window"
|
||||
}
|
||||
timestamp := time.Now().Unix()
|
||||
filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
|
||||
captureCmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
|
||||
if err := captureCmd.Run(); err != nil {
|
||||
msg := "failed to capture window: " + err.Error()
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
dataURL, err := models.CreateImageURLFromPath(filename)
|
||||
if err != nil {
|
||||
msg := "failed to create image URL: " + err.Error()
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
result := models.MultimodalToolResp{
|
||||
Type: "multimodal_content",
|
||||
Parts: []map[string]string{
|
||||
{"type": "text", "text": "Screenshot saved: " + filename},
|
||||
{"type": "image_url", "url": dataURL},
|
||||
},
|
||||
}
|
||||
jsonResult, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
msg := "failed to marshal result: " + err.Error()
|
||||
logger.Error(msg)
|
||||
return []byte(msg)
|
||||
}
|
||||
return jsonResult
|
||||
}
|
||||
|
||||
type fnSig func(map[string]string) []byte
|
||||
|
||||
var fnMap = map[string]fnSig{
|
||||
@@ -1101,6 +1348,7 @@ var fnMap = map[string]fnSig{
|
||||
"read_url_raw": readURLRaw,
|
||||
"file_create": fileCreate,
|
||||
"file_read": fileRead,
|
||||
"file_read_image": fileReadImage,
|
||||
"file_write": fileWrite,
|
||||
"file_write_append": fileWriteAppend,
|
||||
"file_edit": fileEdit,
|
||||
@@ -1116,6 +1364,66 @@ var fnMap = map[string]fnSig{
|
||||
"summarize_chat": summarizeChat,
|
||||
}
|
||||
|
||||
func registerWindowTools() {
|
||||
if windowToolsAvailable {
|
||||
fnMap["list_windows"] = listWindows
|
||||
fnMap["capture_window"] = captureWindow
|
||||
windowTools := []models.Tool{
|
||||
{
|
||||
Type: "function",
|
||||
Function: models.ToolFunc{
|
||||
Name: "list_windows",
|
||||
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
|
||||
Parameters: models.ToolFuncParams{
|
||||
Type: "object",
|
||||
Required: []string{},
|
||||
Properties: map[string]models.ToolArgProps{},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: "function",
|
||||
Function: models.ToolFunc{
|
||||
Name: "capture_window",
|
||||
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
|
||||
Parameters: models.ToolFuncParams{
|
||||
Type: "object",
|
||||
Required: []string{"window"},
|
||||
Properties: map[string]models.ToolArgProps{
|
||||
"window": models.ToolArgProps{
|
||||
Type: "string",
|
||||
Description: "window ID or window name (partial match)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
if modelHasVision {
|
||||
fnMap["capture_window_and_view"] = captureWindowAndView
|
||||
windowTools = append(windowTools, models.Tool{
|
||||
Type: "function",
|
||||
Function: models.ToolFunc{
|
||||
Name: "capture_window_and_view",
|
||||
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
|
||||
Parameters: models.ToolFuncParams{
|
||||
Type: "object",
|
||||
Required: []string{"window"},
|
||||
Properties: map[string]models.ToolArgProps{
|
||||
"window": models.ToolArgProps{
|
||||
Type: "string",
|
||||
Description: "window ID or window name (partial match)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
baseTools = append(baseTools, windowTools...)
|
||||
toolSysMsg += windowToolSysMsg
|
||||
}
|
||||
}
|
||||
|
||||
// callToolWithAgent calls the tool and applies any registered agent.
|
||||
func callToolWithAgent(name string, args map[string]string) []byte {
|
||||
registerWebAgents()
|
||||
@@ -1327,6 +1635,24 @@ var baseTools = []models.Tool{
|
||||
},
|
||||
},
|
||||
},
|
||||
// file_read_image
|
||||
models.Tool{
|
||||
Type: "function",
|
||||
Function: models.ToolFunc{
|
||||
Name: "file_read_image",
|
||||
Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.",
|
||||
Parameters: models.ToolFuncParams{
|
||||
Type: "object",
|
||||
Required: []string{"path"},
|
||||
Properties: map[string]models.ToolArgProps{
|
||||
"path": models.ToolArgProps{
|
||||
Type: "string",
|
||||
Description: "path of the image file to read",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
// file_write
|
||||
models.Tool{
|
||||
Type: "function",
|
||||
@@ -1580,3 +1906,56 @@ var baseTools = []models.Tool{
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
if windowToolsAvailable {
|
||||
baseTools = append(baseTools,
|
||||
models.Tool{
|
||||
Type: "function",
|
||||
Function: models.ToolFunc{
|
||||
Name: "list_windows",
|
||||
Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
|
||||
Parameters: models.ToolFuncParams{
|
||||
Type: "object",
|
||||
Required: []string{},
|
||||
Properties: map[string]models.ToolArgProps{},
|
||||
},
|
||||
},
|
||||
},
|
||||
models.Tool{
|
||||
Type: "function",
|
||||
Function: models.ToolFunc{
|
||||
Name: "capture_window",
|
||||
Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
|
||||
Parameters: models.ToolFuncParams{
|
||||
Type: "object",
|
||||
Required: []string{"window"},
|
||||
Properties: map[string]models.ToolArgProps{
|
||||
"window": models.ToolArgProps{
|
||||
Type: "string",
|
||||
Description: "window ID or window name (partial match)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
models.Tool{
|
||||
Type: "function",
|
||||
Function: models.ToolFunc{
|
||||
Name: "capture_window_and_view",
|
||||
Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
|
||||
Parameters: models.ToolFuncParams{
|
||||
Type: "object",
|
||||
Required: []string{"window"},
|
||||
Properties: map[string]models.ToolArgProps{
|
||||
"window": models.ToolArgProps{
|
||||
Type: "string",
|
||||
Description: "window ID or window name (partial match)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
111
tui.go
111
tui.go
@@ -35,6 +35,8 @@ var (
|
||||
renameWindow *tview.InputField
|
||||
roleEditWindow *tview.InputField
|
||||
shellInput *tview.InputField
|
||||
confirmModal *tview.Modal
|
||||
confirmPageName = "confirm"
|
||||
fullscreenMode bool
|
||||
positionVisible bool = true
|
||||
scrollToEndEnabled bool = true
|
||||
@@ -195,6 +197,39 @@ func init() {
|
||||
}
|
||||
return event
|
||||
})
|
||||
confirmModal = tview.NewModal().
|
||||
SetText("You are trying to send an empty message.\nIt makes sense if the last message in the chat is from you.\nAre you sure?").
|
||||
AddButtons([]string{"Yes", "No"}).
|
||||
SetButtonBackgroundColor(tcell.ColorBlack).
|
||||
SetButtonTextColor(tcell.ColorWhite).
|
||||
SetDoneFunc(func(buttonIndex int, buttonLabel string) {
|
||||
if buttonLabel == "Yes" {
|
||||
persona := cfg.UserRole
|
||||
if cfg.WriteNextMsgAs != "" {
|
||||
persona = cfg.WriteNextMsgAs
|
||||
}
|
||||
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: ""}
|
||||
} // In both Yes and No, go back to the main page
|
||||
pages.SwitchToPage("main") // or whatever your main page is named
|
||||
})
|
||||
confirmModal.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
|
||||
if event.Key() == tcell.KeyRune {
|
||||
switch event.Rune() {
|
||||
case 'y', 'Y':
|
||||
persona := cfg.UserRole
|
||||
if cfg.WriteNextMsgAs != "" {
|
||||
persona = cfg.WriteNextMsgAs
|
||||
}
|
||||
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: ""}
|
||||
pages.SwitchToPage("main")
|
||||
return nil
|
||||
case 'n', 'N', 'x', 'X':
|
||||
pages.SwitchToPage("main")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return event
|
||||
})
|
||||
textArea = tview.NewTextArea().
|
||||
SetPlaceholder("input is multiline; press <Enter> to start the next line;\npress <Esc> to send the message.")
|
||||
textArea.SetBorder(true).SetTitle("input")
|
||||
@@ -691,6 +726,7 @@ func init() {
|
||||
if event.Key() == tcell.KeyF6 {
|
||||
interruptResp = true
|
||||
botRespMode = false
|
||||
toolRunningMode = false
|
||||
return nil
|
||||
}
|
||||
if event.Key() == tcell.KeyF7 {
|
||||
@@ -997,41 +1033,54 @@ func init() {
|
||||
return nil
|
||||
}
|
||||
msgText := textArea.GetText()
|
||||
nl := "\n\n" // keep empty lines between messages
|
||||
prevText := textView.GetText(true)
|
||||
persona := cfg.UserRole
|
||||
// strings.LastIndex()
|
||||
// newline is not needed is prev msg ends with one
|
||||
if strings.HasSuffix(prevText, nl) {
|
||||
nl = ""
|
||||
} else if strings.HasSuffix(prevText, "\n") {
|
||||
nl = "\n" // only one newline, add another
|
||||
}
|
||||
if msgText != "" {
|
||||
nl := "\n\n" // keep empty lines between messages
|
||||
prevText := textView.GetText(true)
|
||||
persona := cfg.UserRole
|
||||
// strings.LastIndex()
|
||||
// newline is not needed is prev msg ends with one
|
||||
if strings.HasSuffix(prevText, nl) {
|
||||
nl = ""
|
||||
} else if strings.HasSuffix(prevText, "\n") {
|
||||
nl = "\n" // only one newline, add another
|
||||
// as what char user sends msg?
|
||||
if cfg.WriteNextMsgAs != "" {
|
||||
persona = cfg.WriteNextMsgAs
|
||||
}
|
||||
if msgText != "" {
|
||||
// as what char user sends msg?
|
||||
if cfg.WriteNextMsgAs != "" {
|
||||
persona = cfg.WriteNextMsgAs
|
||||
// check if plain text
|
||||
if !injectRole {
|
||||
matches := roleRE.FindStringSubmatch(msgText)
|
||||
if len(matches) > 1 {
|
||||
persona = matches[1]
|
||||
msgText = strings.TrimLeft(msgText[len(matches[0]):], " ")
|
||||
}
|
||||
// check if plain text
|
||||
if !injectRole {
|
||||
matches := roleRE.FindStringSubmatch(msgText)
|
||||
if len(matches) > 1 {
|
||||
persona = matches[1]
|
||||
msgText = strings.TrimLeft(msgText[len(matches[0]):], " ")
|
||||
}
|
||||
}
|
||||
// add user icon before user msg
|
||||
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
||||
nl, len(chatBody.Messages), persona, msgText)
|
||||
textArea.SetText("", true)
|
||||
if scrollToEndEnabled {
|
||||
textView.ScrollToEnd()
|
||||
}
|
||||
colorText()
|
||||
}
|
||||
// go chatRound(msgText, persona, textView, false, false)
|
||||
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
|
||||
// add user icon before user msg
|
||||
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
||||
nl, len(chatBody.Messages), persona, msgText)
|
||||
textArea.SetText("", true)
|
||||
if scrollToEndEnabled {
|
||||
textView.ScrollToEnd()
|
||||
}
|
||||
colorText()
|
||||
} else {
|
||||
pages.AddPage(confirmPageName, confirmModal, true, true)
|
||||
return nil
|
||||
}
|
||||
// go chatRound(msgText, persona, textView, false, false)
|
||||
chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
|
||||
return nil
|
||||
}
|
||||
if event.Key() == tcell.KeyTab {
|
||||
currentF := app.GetFocus()
|
||||
if currentF == textArea {
|
||||
currentText := textArea.GetText()
|
||||
atIndex := strings.LastIndex(currentText, "@")
|
||||
if atIndex >= 0 {
|
||||
filter := currentText[atIndex+1:]
|
||||
showTextAreaFileCompletionPopup(filter)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user