Feat: input img
This commit is contained in:
54
llm.go
54
llm.go
@@ -8,6 +8,32 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
var imageAttachmentPath string // Global variable to track image attachment for next message
|
||||
|
||||
// SetImageAttachment sets an image to be attached to the next message sent to the LLM and updates UI
|
||||
func SetImageAttachment(imagePath string) {
|
||||
imageAttachmentPath = imagePath
|
||||
// Update the UI to show image is attached (call function from tui.go)
|
||||
// UpdateImageAttachmentStatus(imagePath)
|
||||
}
|
||||
|
||||
// SetImageAttachmentWithoutUI sets an image to be attached without UI updates (for internal use where UI updates might cause hangs)
|
||||
func SetImageAttachmentWithoutUI(imagePath string) {
|
||||
imageAttachmentPath = imagePath
|
||||
}
|
||||
|
||||
// ClearImageAttachment clears any pending image attachment and updates UI
|
||||
func ClearImageAttachment() {
|
||||
imageAttachmentPath = ""
|
||||
// Update the UI to clear image attachment status (call function from tui.go)
|
||||
// UpdateImageAttachmentStatus("")
|
||||
}
|
||||
|
||||
// ClearImageAttachmentWithoutUI clears any pending image attachment without UI updates
|
||||
func ClearImageAttachmentWithoutUI() {
|
||||
imageAttachmentPath = ""
|
||||
}
|
||||
|
||||
type ChunkParser interface {
|
||||
ParseChunk([]byte) (*models.TextChunk, error)
|
||||
FormMsg(msg, role string, cont bool) (io.Reader, error)
|
||||
@@ -165,7 +191,33 @@ func (op OpenAIer) ParseChunk(data []byte) (*models.TextChunk, error) {
|
||||
func (op OpenAIer) FormMsg(msg, role string, resume bool) (io.Reader, error) {
|
||||
logger.Debug("formmsg openaier", "link", cfg.CurrentAPI)
|
||||
if msg != "" { // otherwise let the bot continue
|
||||
newMsg := models.RoleMsg{Role: role, Content: msg}
|
||||
// Create the message with support for multimodal content
|
||||
var newMsg models.RoleMsg
|
||||
|
||||
// Check if we have an image to add to this message
|
||||
if imageAttachmentPath != "" {
|
||||
// Create a multimodal message with both text and image
|
||||
newMsg = models.NewMultimodalMsg(role, []interface{}{})
|
||||
|
||||
// Add the text content
|
||||
newMsg.AddTextPart(msg)
|
||||
|
||||
// Add the image content
|
||||
imageURL, err := models.CreateImageURLFromPath(imageAttachmentPath)
|
||||
if err != nil {
|
||||
logger.Error("failed to create image URL from path", "error", err, "path", imageAttachmentPath)
|
||||
// If image processing fails, fall back to simple text message
|
||||
newMsg = models.NewRoleMsg(role, msg)
|
||||
imageAttachmentPath = "" // Clear the attachment
|
||||
} else {
|
||||
newMsg.AddImagePart(imageURL)
|
||||
imageAttachmentPath = "" // Clear the attachment after use
|
||||
}
|
||||
} else {
|
||||
// Create a simple text message
|
||||
newMsg = models.NewRoleMsg(role, msg)
|
||||
}
|
||||
|
||||
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||
}
|
||||
req := models.OpenAIReq{
|
||||
|
||||
201
models/models.go
201
models/models.go
@@ -1,7 +1,10 @@
|
||||
package models
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -69,23 +72,215 @@ type TextChunk struct {
|
||||
FuncName string
|
||||
}
|
||||
|
||||
type TextContentPart struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
type ImageContentPart struct {
|
||||
Type string `json:"type"`
|
||||
ImageURL struct {
|
||||
URL string `json:"url"`
|
||||
} `json:"image_url"`
|
||||
}
|
||||
|
||||
// RoleMsg represents a message with content that can be either a simple string or structured content parts
|
||||
type RoleMsg struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"-"`
|
||||
ContentParts []interface{} `json:"-"`
|
||||
hasContentParts bool // Flag to indicate which content type to marshal
|
||||
}
|
||||
|
||||
// MarshalJSON implements custom JSON marshaling for RoleMsg
|
||||
func (m RoleMsg) MarshalJSON() ([]byte, error) {
|
||||
if m.hasContentParts {
|
||||
// Use structured content format
|
||||
aux := struct {
|
||||
Role string `json:"role"`
|
||||
Content []interface{} `json:"content"`
|
||||
}{
|
||||
Role: m.Role,
|
||||
Content: m.ContentParts,
|
||||
}
|
||||
return json.Marshal(aux)
|
||||
} else {
|
||||
// Use simple content format
|
||||
aux := struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}{
|
||||
Role: m.Role,
|
||||
Content: m.Content,
|
||||
}
|
||||
return json.Marshal(aux)
|
||||
}
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements custom JSON unmarshaling for RoleMsg
|
||||
func (m *RoleMsg) UnmarshalJSON(data []byte) error {
|
||||
// First, try to unmarshal as structured content format
|
||||
var structured struct {
|
||||
Role string `json:"role"`
|
||||
Content []interface{} `json:"content"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &structured); err == nil && len(structured.Content) > 0 {
|
||||
m.Role = structured.Role
|
||||
m.ContentParts = structured.Content
|
||||
m.hasContentParts = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Otherwise, unmarshal as simple content format
|
||||
var simple struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &simple); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Role = simple.Role
|
||||
m.Content = simple.Content
|
||||
m.hasContentParts = false
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m RoleMsg) ToText(i int) string {
|
||||
icon := fmt.Sprintf("(%d)", i)
|
||||
|
||||
// Convert content to string representation
|
||||
contentStr := ""
|
||||
if !m.hasContentParts {
|
||||
contentStr = m.Content
|
||||
} else {
|
||||
// For structured content, just take the text parts
|
||||
for _, part := range m.ContentParts {
|
||||
if partMap, ok := part.(map[string]interface{}); ok {
|
||||
if partType, exists := partMap["type"]; exists && partType == "text" {
|
||||
if textVal, textExists := partMap["text"]; textExists {
|
||||
if textStr, isStr := textVal.(string); isStr {
|
||||
contentStr += textStr + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check if already has role annotation (/completion makes them)
|
||||
if !strings.HasPrefix(m.Content, m.Role+":") {
|
||||
if !strings.HasPrefix(contentStr, m.Role+":") {
|
||||
icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
|
||||
}
|
||||
textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content)
|
||||
textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, contentStr)
|
||||
return strings.ReplaceAll(textMsg, "\n\n", "\n")
|
||||
}
|
||||
|
||||
func (m RoleMsg) ToPrompt() string {
|
||||
return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n")
|
||||
contentStr := ""
|
||||
if !m.hasContentParts {
|
||||
contentStr = m.Content
|
||||
} else {
|
||||
// For structured content, just take the text parts
|
||||
for _, part := range m.ContentParts {
|
||||
if partMap, ok := part.(map[string]interface{}); ok {
|
||||
if partType, exists := partMap["type"]; exists && partType == "text" {
|
||||
if textVal, textExists := partMap["text"]; textExists {
|
||||
if textStr, isStr := textVal.(string); isStr {
|
||||
contentStr += textStr + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, contentStr), "\n\n", "\n")
|
||||
}
|
||||
|
||||
// NewRoleMsg creates a simple RoleMsg with string content
|
||||
func NewRoleMsg(role, content string) RoleMsg {
|
||||
return RoleMsg{
|
||||
Role: role,
|
||||
Content: content,
|
||||
hasContentParts: false,
|
||||
}
|
||||
}
|
||||
|
||||
// NewMultimodalMsg creates a RoleMsg with structured content parts (text and images)
|
||||
func NewMultimodalMsg(role string, contentParts []interface{}) RoleMsg {
|
||||
return RoleMsg{
|
||||
Role: role,
|
||||
ContentParts: contentParts,
|
||||
hasContentParts: true,
|
||||
}
|
||||
}
|
||||
|
||||
// AddTextPart adds a text content part to the message
|
||||
func (m *RoleMsg) AddTextPart(text string) {
|
||||
if !m.hasContentParts {
|
||||
// Convert to content parts format
|
||||
if m.Content != "" {
|
||||
m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}}
|
||||
} else {
|
||||
m.ContentParts = []interface{}{}
|
||||
}
|
||||
m.hasContentParts = true
|
||||
}
|
||||
|
||||
textPart := TextContentPart{Type: "text", Text: text}
|
||||
m.ContentParts = append(m.ContentParts, textPart)
|
||||
}
|
||||
|
||||
// AddImagePart adds an image content part to the message
|
||||
func (m *RoleMsg) AddImagePart(imageURL string) {
|
||||
if !m.hasContentParts {
|
||||
// Convert to content parts format
|
||||
if m.Content != "" {
|
||||
m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}}
|
||||
} else {
|
||||
m.ContentParts = []interface{}{}
|
||||
}
|
||||
m.hasContentParts = true
|
||||
}
|
||||
|
||||
imagePart := ImageContentPart{
|
||||
Type: "image_url",
|
||||
ImageURL: struct {
|
||||
URL string `json:"url"`
|
||||
}{URL: imageURL},
|
||||
}
|
||||
m.ContentParts = append(m.ContentParts, imagePart)
|
||||
}
|
||||
|
||||
// CreateImageURLFromPath creates a data URL from an image file path
|
||||
func CreateImageURLFromPath(imagePath string) (string, error) {
|
||||
// Read the image file
|
||||
data, err := os.ReadFile(imagePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Determine the image format based on file extension
|
||||
var mimeType string
|
||||
switch {
|
||||
case strings.HasSuffix(strings.ToLower(imagePath), ".png"):
|
||||
mimeType = "image/png"
|
||||
case strings.HasSuffix(strings.ToLower(imagePath), ".jpg"):
|
||||
fallthrough
|
||||
case strings.HasSuffix(strings.ToLower(imagePath), ".jpeg"):
|
||||
mimeType = "image/jpeg"
|
||||
case strings.HasSuffix(strings.ToLower(imagePath), ".gif"):
|
||||
mimeType = "image/gif"
|
||||
case strings.HasSuffix(strings.ToLower(imagePath), ".webp"):
|
||||
mimeType = "image/webp"
|
||||
default:
|
||||
mimeType = "image/jpeg" // default
|
||||
}
|
||||
|
||||
// Encode to base64
|
||||
encoded := base64.StdEncoding.EncodeToString(data)
|
||||
|
||||
// Create data URL
|
||||
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
|
||||
}
|
||||
|
||||
type ChatBody struct {
|
||||
|
||||
60
tables.go
60
tables.go
@@ -563,6 +563,18 @@ func makeFilePicker() *tview.Flex {
|
||||
// Track currently displayed directory (changes as user navigates)
|
||||
var currentDisplayDir string = startDir
|
||||
|
||||
// Helper function to check if a file is an image
|
||||
isImageFile := func(filename string) bool {
|
||||
imageExtensions := []string{".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg"}
|
||||
lowerFilename := strings.ToLower(filename)
|
||||
for _, ext := range imageExtensions {
|
||||
if strings.HasSuffix(lowerFilename, ext) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Create UI elements
|
||||
listView := tview.NewList()
|
||||
listView.SetBorder(true).SetTitle("Files & Directories").SetTitleAlign(tview.AlignLeft)
|
||||
@@ -584,11 +596,23 @@ func makeFilePicker() *tview.Flex {
|
||||
loadButton := tview.NewButton("Load")
|
||||
loadButton.SetSelectedFunc(func() {
|
||||
if selectedFile != "" {
|
||||
// Update the global text area with the selected file path
|
||||
// Check if the selected file is an image
|
||||
if isImageFile(selectedFile) {
|
||||
// For image files, set it as an attachment for the next LLM message
|
||||
SetImageAttachment(selectedFile)
|
||||
statusView.SetText("Image attached: " + selectedFile + " (will be sent with next message)")
|
||||
// Close the file picker but don't change the text area
|
||||
pages.RemovePage(filePickerPage)
|
||||
} else {
|
||||
// For non-image files, update the text area with file path
|
||||
textArea.SetText(selectedFile, true)
|
||||
app.SetFocus(textArea)
|
||||
}
|
||||
pages.RemovePage(filePickerPage)
|
||||
}
|
||||
} else {
|
||||
// If no file is selected, just close the picker
|
||||
pages.RemovePage(filePickerPage)
|
||||
}
|
||||
})
|
||||
|
||||
cancelButton := tview.NewButton("Cancel")
|
||||
@@ -649,6 +673,12 @@ func makeFilePicker() *tview.Flex {
|
||||
// Add directories and files to the list
|
||||
for _, file := range files {
|
||||
name := file.Name()
|
||||
|
||||
// Skip hidden files and directories (those starting with a dot)
|
||||
if strings.HasPrefix(name, ".") {
|
||||
continue
|
||||
}
|
||||
|
||||
if file.IsDir() {
|
||||
// Capture the directory name for the closure to avoid loop variable issues
|
||||
dirName := name
|
||||
@@ -662,9 +692,19 @@ func makeFilePicker() *tview.Flex {
|
||||
} else {
|
||||
// Capture the file name for the closure to avoid loop variable issues
|
||||
fileName := name
|
||||
fullFilePath := path.Join(dir, fileName)
|
||||
listView.AddItem(fileName, "(File)", 0, func() {
|
||||
selectedFile = path.Join(dir, fileName)
|
||||
selectedFile = fullFilePath
|
||||
statusView.SetText("Selected: " + selectedFile)
|
||||
|
||||
// Check if the file is an image
|
||||
if isImageFile(fileName) {
|
||||
// For image files, offer to attach to the next LLM message
|
||||
statusView.SetText("Selected image: " + selectedFile + " (Press Load to attach)")
|
||||
} else {
|
||||
// For non-image files, display as before
|
||||
statusView.SetText("Selected: " + selectedFile)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -769,10 +809,24 @@ func makeFilePicker() *tview.Flex {
|
||||
filePath := path.Join(currentDisplayDir, itemText)
|
||||
// Verify it's actually a file (not just lacking a directory suffix)
|
||||
if info, err := os.Stat(filePath); err == nil && !info.IsDir() {
|
||||
// Check if the file is an image
|
||||
if isImageFile(itemText) {
|
||||
// For image files, set it as an attachment for the next LLM message
|
||||
// Use the version without UI updates to avoid hangs in event handlers
|
||||
logger.Info("setting image", "file", itemText)
|
||||
SetImageAttachmentWithoutUI(filePath)
|
||||
logger.Info("after setting image", "file", itemText)
|
||||
statusView.SetText("Image attached: " + filePath + " (will be sent with next message)")
|
||||
logger.Info("after setting text", "file", itemText)
|
||||
pages.RemovePage(filePickerPage)
|
||||
logger.Info("after update drawn", "file", itemText)
|
||||
} else {
|
||||
// For non-image files, update the text area with file path
|
||||
textArea.SetText(filePath, true)
|
||||
app.SetFocus(textArea)
|
||||
pages.RemovePage(filePickerPage)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
53
tui.go
53
tui.go
@@ -231,10 +231,22 @@ func makeStatusLine() string {
|
||||
if cfg.WriteNextMsgAsCompletionAgent != "" {
|
||||
botPersona = cfg.WriteNextMsgAsCompletionAgent
|
||||
}
|
||||
|
||||
// Add image attachment info to status line
|
||||
var imageInfo string
|
||||
if imageAttachmentPath != "" {
|
||||
// Get just the filename from the path
|
||||
imageName := path.Base(imageAttachmentPath)
|
||||
imageInfo = fmt.Sprintf(" | attached img: [orange:-:b]%s[-:-:-]", imageName)
|
||||
} else {
|
||||
imageInfo = ""
|
||||
}
|
||||
|
||||
statusLine := fmt.Sprintf(indexLineCompletion, botRespMode, cfg.AssistantRole, activeChatName,
|
||||
cfg.ToolUse, chatBody.Model, cfg.SkipLLMResp, cfg.CurrentAPI, cfg.ThinkUse, logLevel.Level(),
|
||||
isRecording, persona, botPersona, injectRole)
|
||||
return statusLine
|
||||
|
||||
return statusLine + imageInfo
|
||||
}
|
||||
|
||||
func updateStatusLine() {
|
||||
@@ -422,7 +434,7 @@ func init() {
|
||||
})
|
||||
flex = tview.NewFlex().SetDirection(tview.FlexRow).
|
||||
AddItem(textView, 0, 40, false).
|
||||
AddItem(textArea, 0, 10, true).
|
||||
AddItem(textArea, 0, 10, true). // Restore original height
|
||||
AddItem(position, 0, 2, false)
|
||||
editArea = tview.NewTextArea().
|
||||
SetPlaceholder("Replace msg...")
|
||||
@@ -801,8 +813,29 @@ func init() {
|
||||
return nil
|
||||
}
|
||||
if event.Key() == tcell.KeyCtrlJ {
|
||||
// show image
|
||||
// show image - check for attached image first, then fall back to agent image
|
||||
if imageAttachmentPath != "" {
|
||||
// Load the attached image
|
||||
file, err := os.Open(imageAttachmentPath)
|
||||
if err != nil {
|
||||
logger.Error("failed to open attached image", "path", imageAttachmentPath, "error", err)
|
||||
// Fall back to showing agent image
|
||||
loadImage()
|
||||
} else {
|
||||
defer file.Close()
|
||||
img, _, err := image.Decode(file)
|
||||
if err != nil {
|
||||
logger.Error("failed to decode attached image", "path", imageAttachmentPath, "error", err)
|
||||
// Fall back to showing agent image
|
||||
loadImage()
|
||||
} else {
|
||||
imgView.SetImage(img)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No attached image, show agent image as before
|
||||
loadImage()
|
||||
}
|
||||
pages.AddPage(imgPage, imgView, true, true)
|
||||
return nil
|
||||
}
|
||||
@@ -977,6 +1010,13 @@ func init() {
|
||||
colorText()
|
||||
}
|
||||
go chatRound(msgText, persona, textView, false, false)
|
||||
// Also clear any image attachment after sending the message
|
||||
go func() {
|
||||
// Wait a short moment for the message to be processed, then clear the image attachment
|
||||
// This allows the image to be sent with the current message if it was attached
|
||||
// But clears it for the next message
|
||||
ClearImageAttachment()
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn {
|
||||
@@ -990,3 +1030,10 @@ func init() {
|
||||
return event
|
||||
})
|
||||
}
|
||||
|
||||
// UpdateImageAttachmentStatus updates the UI to reflect the current image attachment status
|
||||
func UpdateImageAttachmentStatus(imagePath string) {
|
||||
// The image attachment status is now shown in the main status line
|
||||
// Just update the status line to reflect the current image attachment
|
||||
updateStatusLine()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user