Feat: input img

This commit is contained in:
Grail Finder
2025-11-20 19:13:04 +03:00
parent b4f9c5e26a
commit c21074a812
4 changed files with 366 additions and 18 deletions

54
llm.go
View File

@@ -8,6 +8,32 @@ import (
"strings" "strings"
) )
var imageAttachmentPath string // Global variable to track image attachment for next message
// SetImageAttachment sets an image to be attached to the next message sent to the LLM and updates UI
func SetImageAttachment(imagePath string) {
imageAttachmentPath = imagePath
// Update the UI to show image is attached (call function from tui.go)
// UpdateImageAttachmentStatus(imagePath)
}
// SetImageAttachmentWithoutUI sets an image to be attached without UI updates (for internal use where UI updates might cause hangs)
func SetImageAttachmentWithoutUI(imagePath string) {
imageAttachmentPath = imagePath
}
// ClearImageAttachment clears any pending image attachment and updates UI
func ClearImageAttachment() {
imageAttachmentPath = ""
// Update the UI to clear image attachment status (call function from tui.go)
// UpdateImageAttachmentStatus("")
}
// ClearImageAttachmentWithoutUI clears any pending image attachment without UI updates
func ClearImageAttachmentWithoutUI() {
imageAttachmentPath = ""
}
type ChunkParser interface { type ChunkParser interface {
ParseChunk([]byte) (*models.TextChunk, error) ParseChunk([]byte) (*models.TextChunk, error)
FormMsg(msg, role string, cont bool) (io.Reader, error) FormMsg(msg, role string, cont bool) (io.Reader, error)
@@ -165,7 +191,33 @@ func (op OpenAIer) ParseChunk(data []byte) (*models.TextChunk, error) {
func (op OpenAIer) FormMsg(msg, role string, resume bool) (io.Reader, error) { func (op OpenAIer) FormMsg(msg, role string, resume bool) (io.Reader, error) {
logger.Debug("formmsg openaier", "link", cfg.CurrentAPI) logger.Debug("formmsg openaier", "link", cfg.CurrentAPI)
if msg != "" { // otherwise let the bot continue if msg != "" { // otherwise let the bot continue
newMsg := models.RoleMsg{Role: role, Content: msg} // Create the message with support for multimodal content
var newMsg models.RoleMsg
// Check if we have an image to add to this message
if imageAttachmentPath != "" {
// Create a multimodal message with both text and image
newMsg = models.NewMultimodalMsg(role, []interface{}{})
// Add the text content
newMsg.AddTextPart(msg)
// Add the image content
imageURL, err := models.CreateImageURLFromPath(imageAttachmentPath)
if err != nil {
logger.Error("failed to create image URL from path", "error", err, "path", imageAttachmentPath)
// If image processing fails, fall back to simple text message
newMsg = models.NewRoleMsg(role, msg)
imageAttachmentPath = "" // Clear the attachment
} else {
newMsg.AddImagePart(imageURL)
imageAttachmentPath = "" // Clear the attachment after use
}
} else {
// Create a simple text message
newMsg = models.NewRoleMsg(role, msg)
}
chatBody.Messages = append(chatBody.Messages, newMsg) chatBody.Messages = append(chatBody.Messages, newMsg)
} }
req := models.OpenAIReq{ req := models.OpenAIReq{

View File

@@ -1,7 +1,10 @@
package models package models
import ( import (
"encoding/base64"
"encoding/json"
"fmt" "fmt"
"os"
"strings" "strings"
) )
@@ -69,23 +72,215 @@ type TextChunk struct {
FuncName string FuncName string
} }
type TextContentPart struct {
Type string `json:"type"`
Text string `json:"text"`
}
type ImageContentPart struct {
Type string `json:"type"`
ImageURL struct {
URL string `json:"url"`
} `json:"image_url"`
}
// RoleMsg represents a message with content that can be either a simple string or structured content parts
type RoleMsg struct { type RoleMsg struct {
Role string `json:"role"` Role string `json:"role"`
Content string `json:"content"` Content string `json:"-"`
ContentParts []interface{} `json:"-"`
hasContentParts bool // Flag to indicate which content type to marshal
}
// MarshalJSON implements custom JSON marshaling for RoleMsg
func (m RoleMsg) MarshalJSON() ([]byte, error) {
if m.hasContentParts {
// Use structured content format
aux := struct {
Role string `json:"role"`
Content []interface{} `json:"content"`
}{
Role: m.Role,
Content: m.ContentParts,
}
return json.Marshal(aux)
} else {
// Use simple content format
aux := struct {
Role string `json:"role"`
Content string `json:"content"`
}{
Role: m.Role,
Content: m.Content,
}
return json.Marshal(aux)
}
}
// UnmarshalJSON implements custom JSON unmarshaling for RoleMsg
func (m *RoleMsg) UnmarshalJSON(data []byte) error {
// First, try to unmarshal as structured content format
var structured struct {
Role string `json:"role"`
Content []interface{} `json:"content"`
}
if err := json.Unmarshal(data, &structured); err == nil && len(structured.Content) > 0 {
m.Role = structured.Role
m.ContentParts = structured.Content
m.hasContentParts = true
return nil
}
// Otherwise, unmarshal as simple content format
var simple struct {
Role string `json:"role"`
Content string `json:"content"`
}
if err := json.Unmarshal(data, &simple); err != nil {
return err
}
m.Role = simple.Role
m.Content = simple.Content
m.hasContentParts = false
return nil
} }
func (m RoleMsg) ToText(i int) string { func (m RoleMsg) ToText(i int) string {
icon := fmt.Sprintf("(%d)", i) icon := fmt.Sprintf("(%d)", i)
// Convert content to string representation
contentStr := ""
if !m.hasContentParts {
contentStr = m.Content
} else {
// For structured content, just take the text parts
for _, part := range m.ContentParts {
if partMap, ok := part.(map[string]interface{}); ok {
if partType, exists := partMap["type"]; exists && partType == "text" {
if textVal, textExists := partMap["text"]; textExists {
if textStr, isStr := textVal.(string); isStr {
contentStr += textStr + " "
}
}
}
}
}
}
// check if already has role annotation (/completion makes them) // check if already has role annotation (/completion makes them)
if !strings.HasPrefix(m.Content, m.Role+":") { if !strings.HasPrefix(contentStr, m.Role+":") {
icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role) icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
} }
textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content) textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, contentStr)
return strings.ReplaceAll(textMsg, "\n\n", "\n") return strings.ReplaceAll(textMsg, "\n\n", "\n")
} }
func (m RoleMsg) ToPrompt() string { func (m RoleMsg) ToPrompt() string {
return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n") contentStr := ""
if !m.hasContentParts {
contentStr = m.Content
} else {
// For structured content, just take the text parts
for _, part := range m.ContentParts {
if partMap, ok := part.(map[string]interface{}); ok {
if partType, exists := partMap["type"]; exists && partType == "text" {
if textVal, textExists := partMap["text"]; textExists {
if textStr, isStr := textVal.(string); isStr {
contentStr += textStr + " "
}
}
}
}
}
}
return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, contentStr), "\n\n", "\n")
}
// NewRoleMsg creates a simple RoleMsg with string content
func NewRoleMsg(role, content string) RoleMsg {
return RoleMsg{
Role: role,
Content: content,
hasContentParts: false,
}
}
// NewMultimodalMsg creates a RoleMsg with structured content parts (text and images)
func NewMultimodalMsg(role string, contentParts []interface{}) RoleMsg {
return RoleMsg{
Role: role,
ContentParts: contentParts,
hasContentParts: true,
}
}
// AddTextPart adds a text content part to the message
func (m *RoleMsg) AddTextPart(text string) {
if !m.hasContentParts {
// Convert to content parts format
if m.Content != "" {
m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}}
} else {
m.ContentParts = []interface{}{}
}
m.hasContentParts = true
}
textPart := TextContentPart{Type: "text", Text: text}
m.ContentParts = append(m.ContentParts, textPart)
}
// AddImagePart adds an image content part to the message
func (m *RoleMsg) AddImagePart(imageURL string) {
if !m.hasContentParts {
// Convert to content parts format
if m.Content != "" {
m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}}
} else {
m.ContentParts = []interface{}{}
}
m.hasContentParts = true
}
imagePart := ImageContentPart{
Type: "image_url",
ImageURL: struct {
URL string `json:"url"`
}{URL: imageURL},
}
m.ContentParts = append(m.ContentParts, imagePart)
}
// CreateImageURLFromPath creates a data URL from an image file path
func CreateImageURLFromPath(imagePath string) (string, error) {
// Read the image file
data, err := os.ReadFile(imagePath)
if err != nil {
return "", err
}
// Determine the image format based on file extension
var mimeType string
switch {
case strings.HasSuffix(strings.ToLower(imagePath), ".png"):
mimeType = "image/png"
case strings.HasSuffix(strings.ToLower(imagePath), ".jpg"):
fallthrough
case strings.HasSuffix(strings.ToLower(imagePath), ".jpeg"):
mimeType = "image/jpeg"
case strings.HasSuffix(strings.ToLower(imagePath), ".gif"):
mimeType = "image/gif"
case strings.HasSuffix(strings.ToLower(imagePath), ".webp"):
mimeType = "image/webp"
default:
mimeType = "image/jpeg" // default
}
// Encode to base64
encoded := base64.StdEncoding.EncodeToString(data)
// Create data URL
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
} }
type ChatBody struct { type ChatBody struct {

View File

@@ -563,6 +563,18 @@ func makeFilePicker() *tview.Flex {
// Track currently displayed directory (changes as user navigates) // Track currently displayed directory (changes as user navigates)
var currentDisplayDir string = startDir var currentDisplayDir string = startDir
// Helper function to check if a file is an image
isImageFile := func(filename string) bool {
imageExtensions := []string{".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg"}
lowerFilename := strings.ToLower(filename)
for _, ext := range imageExtensions {
if strings.HasSuffix(lowerFilename, ext) {
return true
}
}
return false
}
// Create UI elements // Create UI elements
listView := tview.NewList() listView := tview.NewList()
listView.SetBorder(true).SetTitle("Files & Directories").SetTitleAlign(tview.AlignLeft) listView.SetBorder(true).SetTitle("Files & Directories").SetTitleAlign(tview.AlignLeft)
@@ -584,11 +596,23 @@ func makeFilePicker() *tview.Flex {
loadButton := tview.NewButton("Load") loadButton := tview.NewButton("Load")
loadButton.SetSelectedFunc(func() { loadButton.SetSelectedFunc(func() {
if selectedFile != "" { if selectedFile != "" {
// Update the global text area with the selected file path // Check if the selected file is an image
textArea.SetText(selectedFile, true) if isImageFile(selectedFile) {
app.SetFocus(textArea) // For image files, set it as an attachment for the next LLM message
SetImageAttachment(selectedFile)
statusView.SetText("Image attached: " + selectedFile + " (will be sent with next message)")
// Close the file picker but don't change the text area
pages.RemovePage(filePickerPage)
} else {
// For non-image files, update the text area with file path
textArea.SetText(selectedFile, true)
app.SetFocus(textArea)
pages.RemovePage(filePickerPage)
}
} else {
// If no file is selected, just close the picker
pages.RemovePage(filePickerPage)
} }
pages.RemovePage(filePickerPage)
}) })
cancelButton := tview.NewButton("Cancel") cancelButton := tview.NewButton("Cancel")
@@ -649,6 +673,12 @@ func makeFilePicker() *tview.Flex {
// Add directories and files to the list // Add directories and files to the list
for _, file := range files { for _, file := range files {
name := file.Name() name := file.Name()
// Skip hidden files and directories (those starting with a dot)
if strings.HasPrefix(name, ".") {
continue
}
if file.IsDir() { if file.IsDir() {
// Capture the directory name for the closure to avoid loop variable issues // Capture the directory name for the closure to avoid loop variable issues
dirName := name dirName := name
@@ -662,9 +692,19 @@ func makeFilePicker() *tview.Flex {
} else { } else {
// Capture the file name for the closure to avoid loop variable issues // Capture the file name for the closure to avoid loop variable issues
fileName := name fileName := name
fullFilePath := path.Join(dir, fileName)
listView.AddItem(fileName, "(File)", 0, func() { listView.AddItem(fileName, "(File)", 0, func() {
selectedFile = path.Join(dir, fileName) selectedFile = fullFilePath
statusView.SetText("Selected: " + selectedFile) statusView.SetText("Selected: " + selectedFile)
// Check if the file is an image
if isImageFile(fileName) {
// For image files, offer to attach to the next LLM message
statusView.SetText("Selected image: " + selectedFile + " (Press Load to attach)")
} else {
// For non-image files, display as before
statusView.SetText("Selected: " + selectedFile)
}
}) })
} }
} }
@@ -769,9 +809,23 @@ func makeFilePicker() *tview.Flex {
filePath := path.Join(currentDisplayDir, itemText) filePath := path.Join(currentDisplayDir, itemText)
// Verify it's actually a file (not just lacking a directory suffix) // Verify it's actually a file (not just lacking a directory suffix)
if info, err := os.Stat(filePath); err == nil && !info.IsDir() { if info, err := os.Stat(filePath); err == nil && !info.IsDir() {
textArea.SetText(filePath, true) // Check if the file is an image
app.SetFocus(textArea) if isImageFile(itemText) {
pages.RemovePage(filePickerPage) // For image files, set it as an attachment for the next LLM message
// Use the version without UI updates to avoid hangs in event handlers
logger.Info("setting image", "file", itemText)
SetImageAttachmentWithoutUI(filePath)
logger.Info("after setting image", "file", itemText)
statusView.SetText("Image attached: " + filePath + " (will be sent with next message)")
logger.Info("after setting text", "file", itemText)
pages.RemovePage(filePickerPage)
logger.Info("after update drawn", "file", itemText)
} else {
// For non-image files, update the text area with file path
textArea.SetText(filePath, true)
app.SetFocus(textArea)
pages.RemovePage(filePickerPage)
}
} }
return nil return nil
} }

55
tui.go
View File

@@ -231,10 +231,22 @@ func makeStatusLine() string {
if cfg.WriteNextMsgAsCompletionAgent != "" { if cfg.WriteNextMsgAsCompletionAgent != "" {
botPersona = cfg.WriteNextMsgAsCompletionAgent botPersona = cfg.WriteNextMsgAsCompletionAgent
} }
// Add image attachment info to status line
var imageInfo string
if imageAttachmentPath != "" {
// Get just the filename from the path
imageName := path.Base(imageAttachmentPath)
imageInfo = fmt.Sprintf(" | attached img: [orange:-:b]%s[-:-:-]", imageName)
} else {
imageInfo = ""
}
statusLine := fmt.Sprintf(indexLineCompletion, botRespMode, cfg.AssistantRole, activeChatName, statusLine := fmt.Sprintf(indexLineCompletion, botRespMode, cfg.AssistantRole, activeChatName,
cfg.ToolUse, chatBody.Model, cfg.SkipLLMResp, cfg.CurrentAPI, cfg.ThinkUse, logLevel.Level(), cfg.ToolUse, chatBody.Model, cfg.SkipLLMResp, cfg.CurrentAPI, cfg.ThinkUse, logLevel.Level(),
isRecording, persona, botPersona, injectRole) isRecording, persona, botPersona, injectRole)
return statusLine
return statusLine + imageInfo
} }
func updateStatusLine() { func updateStatusLine() {
@@ -422,7 +434,7 @@ func init() {
}) })
flex = tview.NewFlex().SetDirection(tview.FlexRow). flex = tview.NewFlex().SetDirection(tview.FlexRow).
AddItem(textView, 0, 40, false). AddItem(textView, 0, 40, false).
AddItem(textArea, 0, 10, true). AddItem(textArea, 0, 10, true). // Restore original height
AddItem(position, 0, 2, false) AddItem(position, 0, 2, false)
editArea = tview.NewTextArea(). editArea = tview.NewTextArea().
SetPlaceholder("Replace msg...") SetPlaceholder("Replace msg...")
@@ -801,8 +813,29 @@ func init() {
return nil return nil
} }
if event.Key() == tcell.KeyCtrlJ { if event.Key() == tcell.KeyCtrlJ {
// show image // show image - check for attached image first, then fall back to agent image
loadImage() if imageAttachmentPath != "" {
// Load the attached image
file, err := os.Open(imageAttachmentPath)
if err != nil {
logger.Error("failed to open attached image", "path", imageAttachmentPath, "error", err)
// Fall back to showing agent image
loadImage()
} else {
defer file.Close()
img, _, err := image.Decode(file)
if err != nil {
logger.Error("failed to decode attached image", "path", imageAttachmentPath, "error", err)
// Fall back to showing agent image
loadImage()
} else {
imgView.SetImage(img)
}
}
} else {
// No attached image, show agent image as before
loadImage()
}
pages.AddPage(imgPage, imgView, true, true) pages.AddPage(imgPage, imgView, true, true)
return nil return nil
} }
@@ -977,6 +1010,13 @@ func init() {
colorText() colorText()
} }
go chatRound(msgText, persona, textView, false, false) go chatRound(msgText, persona, textView, false, false)
// Also clear any image attachment after sending the message
go func() {
// Wait a short moment for the message to be processed, then clear the image attachment
// This allows the image to be sent with the current message if it was attached
// But clears it for the next message
ClearImageAttachment()
}()
return nil return nil
} }
if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn { if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn {
@@ -990,3 +1030,10 @@ func init() {
return event return event
}) })
} }
// UpdateImageAttachmentStatus updates the UI to reflect the current image attachment status
func UpdateImageAttachmentStatus(imagePath string) {
// The image attachment status is now shown in the main status line
// Just update the status line to reflect the current image attachment
updateStatusLine()
}