Enha: or img attachment

This commit is contained in:
Grail Finder
2025-11-24 13:06:42 +03:00
parent 8a62e98789
commit b5ade05489
2 changed files with 202 additions and 35 deletions

68
bot.go
View File

@@ -151,21 +151,63 @@ func fetchORModels(free bool) ([]string, error) {
func sendMsgToLLM(body io.Reader) { func sendMsgToLLM(body io.Reader) {
choseChunkParser() choseChunkParser()
// nolint
req, err := http.NewRequest("POST", cfg.CurrentAPI, body) var req *http.Request
if err != nil { var err error
logger.Error("newreq error", "error", err)
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil { // Capture and log the request body for debugging
logger.Error("failed to notify", "error", err) if _, ok := body.(*io.LimitedReader); ok {
// If it's a LimitedReader, we need to handle it differently
logger.Debug("request body type is LimitedReader", "parser", chunkParser, "link", cfg.CurrentAPI)
req, err = http.NewRequest("POST", cfg.CurrentAPI, body)
if err != nil {
logger.Error("newreq error", "error", err)
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil {
logger.Error("failed to notify", "error", err)
}
streamDone <- true
return
} }
streamDone <- true req.Header.Add("Accept", "application/json")
return req.Header.Add("Content-Type", "application/json")
req.Header.Add("Authorization", "Bearer "+chunkParser.GetToken())
req.Header.Set("Accept-Encoding", "gzip")
} else {
// For other reader types, capture and log the body content
bodyBytes, err := io.ReadAll(body)
if err != nil {
logger.Error("failed to read request body for logging", "error", err)
// Create request with original body if reading fails
req, err = http.NewRequest("POST", cfg.CurrentAPI, bytes.NewReader(bodyBytes))
if err != nil {
logger.Error("newreq error", "error", err)
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil {
logger.Error("failed to notify", "error", err)
}
streamDone <- true
return
}
} else {
// Log the request body for debugging
logger.Info("sending request to API", "api", cfg.CurrentAPI, "body", string(bodyBytes))
// Create request with the captured body
req, err = http.NewRequest("POST", cfg.CurrentAPI, bytes.NewReader(bodyBytes))
if err != nil {
logger.Error("newreq error", "error", err)
if err := notifyUser("error", "apicall failed:"+err.Error()); err != nil {
logger.Error("failed to notify", "error", err)
}
streamDone <- true
return
}
}
req.Header.Add("Accept", "application/json")
req.Header.Add("Content-Type", "application/json")
req.Header.Add("Authorization", "Bearer "+chunkParser.GetToken())
req.Header.Set("Accept-Encoding", "gzip")
} }
req.Header.Add("Accept", "application/json")
req.Header.Add("Content-Type", "application/json")
req.Header.Add("Authorization", "Bearer "+chunkParser.GetToken())
// req.Header.Set("Content-Length", strconv.Itoa(len(bodyBytes)))
req.Header.Set("Accept-Encoding", "gzip")
// nolint // nolint
resp, err := httpClient.Do(req) resp, err := httpClient.Do(req)
if err != nil { if err != nil {

169
llm.go
View File

@@ -3,8 +3,10 @@ package main
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt"
"gf-lt/models" "gf-lt/models"
"io" "io"
"os"
"strings" "strings"
) )
@@ -76,6 +78,13 @@ type OpenRouterChat struct {
Model string Model string
} }
func min(a, b int) int {
if a < b {
return a
}
return b
}
func (lcp LlamaCPPeer) GetToken() string { func (lcp LlamaCPPeer) GetToken() string {
return "" return ""
} }
@@ -180,24 +189,29 @@ func (op OpenAIer) ParseChunk(data []byte) (*models.TextChunk, error) {
func (op OpenAIer) FormMsg(msg, role string, resume bool) (io.Reader, error) { func (op OpenAIer) FormMsg(msg, role string, resume bool) (io.Reader, error) {
logger.Debug("formmsg openaier", "link", cfg.CurrentAPI) logger.Debug("formmsg openaier", "link", cfg.CurrentAPI)
// Capture the image attachment path at the beginning to avoid race conditions
// with API rotation that might clear the global variable
localImageAttachmentPath := imageAttachmentPath
if msg != "" { // otherwise let the bot continue if msg != "" { // otherwise let the bot continue
// Create the message with support for multimodal content // Create the message with support for multimodal content
var newMsg models.RoleMsg var newMsg models.RoleMsg
// Check if we have an image to add to this message // Check if we have an image to add to this message
if imageAttachmentPath != "" { if localImageAttachmentPath != "" {
// Create a multimodal message with both text and image // Create a multimodal message with both text and image
newMsg = models.NewMultimodalMsg(role, []interface{}{}) newMsg = models.NewMultimodalMsg(role, []interface{}{})
// Add the text content // Add the text content
newMsg.AddTextPart(msg) newMsg.AddTextPart(msg)
// Add the image content // Add the image content
imageURL, err := models.CreateImageURLFromPath(imageAttachmentPath) imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
if err != nil { if err != nil {
logger.Error("failed to create image URL from path", "error", err, "path", imageAttachmentPath) logger.Error("failed to create image URL from path", "error", err, "path", localImageAttachmentPath)
// If image processing fails, fall back to simple text message // If image processing fails, fall back to simple text message
newMsg = models.NewRoleMsg(role, msg) newMsg = models.NewRoleMsg(role, msg)
imageAttachmentPath = "" // Clear the attachment
} else { } else {
newMsg.AddImagePart(imageURL) newMsg.AddImagePart(imageURL)
// Only clear the global image attachment after successfully processing it in this API call
imageAttachmentPath = "" // Clear the attachment after use imageAttachmentPath = "" // Clear the attachment after use
} }
} else { } else {
@@ -478,6 +492,11 @@ func (or OpenRouterChat) GetToken() string {
func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, error) { func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, error) {
logger.Debug("formmsg open router completion", "link", cfg.CurrentAPI) logger.Debug("formmsg open router completion", "link", cfg.CurrentAPI)
// Capture the image attachment path at the beginning to avoid race conditions
// with API rotation that might clear the global variable
localImageAttachmentPath := imageAttachmentPath
if cfg.ToolUse && !resume { if cfg.ToolUse && !resume {
// prompt += "\n" + cfg.ToolRole + ":\n" + toolSysMsg // prompt += "\n" + cfg.ToolRole + ":\n" + toolSysMsg
// add to chat body // add to chat body
@@ -486,21 +505,36 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
if msg != "" { // otherwise let the bot continue if msg != "" { // otherwise let the bot continue
var newMsg models.RoleMsg var newMsg models.RoleMsg
// Check if we have an image to add to this message // Check if we have an image to add to this message
if imageAttachmentPath != "" { logger.Debug("checking for image attachment", "imageAttachmentPath", localImageAttachmentPath, "msg", msg, "role", role)
// Create a multimodal message with both text and image if localImageAttachmentPath != "" {
newMsg = models.NewMultimodalMsg(role, []interface{}{}) logger.Info("processing image attachment for OpenRouter", "path", localImageAttachmentPath, "msg", msg)
// Add the text content // Check if file exists before attempting to create image URL
newMsg.AddTextPart(msg) if _, err := os.Stat(localImageAttachmentPath); os.IsNotExist(err) {
// Add the image content logger.Error("image file does not exist", "path", localImageAttachmentPath)
imageURL, err := models.CreateImageURLFromPath(imageAttachmentPath) // Fallback to simple text message
if err != nil { newMsg = models.NewRoleMsg(role, msg)
logger.Error("failed to create image URL from path", "error", err, "path", imageAttachmentPath) } else if err != nil {
// If image processing fails, fall back to simple text message logger.Error("error checking image file", "path", localImageAttachmentPath, "error", err)
// Fallback to simple text message
newMsg = models.NewRoleMsg(role, msg) newMsg = models.NewRoleMsg(role, msg)
imageAttachmentPath = "" // Clear the attachment
} else { } else {
newMsg.AddImagePart(imageURL) logger.Debug("image file exists, proceeding to create URL", "path", localImageAttachmentPath)
imageAttachmentPath = "" // Clear the attachment after use // Create a multimodal message with both text and image
newMsg = models.NewMultimodalMsg(role, []interface{}{})
// Add the text content
newMsg.AddTextPart(msg)
// Add the image content
imageURL, err := models.CreateImageURLFromPath(localImageAttachmentPath)
if err != nil {
logger.Error("failed to create image URL from path", "error", err, "path", localImageAttachmentPath)
// If image processing fails, fall back to simple text message
newMsg = models.NewRoleMsg(role, msg)
} else {
logger.Info("image URL created successfully for OpenRouter", "imageURL", imageURL[:min(len(imageURL), 50)]+"...")
newMsg.AddImagePart(imageURL)
// Only clear the global image attachment after successfully processing it in this API call
imageAttachmentPath = "" // Clear the attachment after use
}
} }
} else { } else {
// Create a simple text message // Create a simple text message
@@ -520,27 +554,118 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
} }
} }
// Create copy of chat body with standardized user role // Create copy of chat body with standardized user role
// modifiedBody := *chatBody
bodyCopy := &models.ChatBody{ bodyCopy := &models.ChatBody{
Messages: make([]models.RoleMsg, len(chatBody.Messages)), Messages: make([]models.RoleMsg, len(chatBody.Messages)),
Model: chatBody.Model, Model: chatBody.Model,
Stream: chatBody.Stream, Stream: chatBody.Stream,
} }
// modifiedBody.Messages = make([]models.RoleMsg, len(chatBody.Messages))
for i, msg := range chatBody.Messages { for i, msg := range chatBody.Messages {
logger.Debug("checking roles", "#", i, "role", msg.Role) logger.Debug("checking roles", "#", i, "role", msg.Role)
if msg.Role == cfg.UserRole || i == 1 { // Check if this message has content parts (multimodal) by attempting to marshal and checking structure
msgBytes, err := json.Marshal(msg)
if err != nil {
logger.Error("failed to serialize message for inspection", "error", err)
// Fallback to direct assignment
bodyCopy.Messages[i] = msg
} else {
// Try to deserialize to check if it has content parts
var tempMsg map[string]interface{}
if err := json.Unmarshal(msgBytes, &tempMsg); err != nil {
logger.Error("failed to inspect message structure", "error", err)
bodyCopy.Messages[i] = msg
} else {
// Check if content is an array (indicating content parts) or string (simple content)
if content, ok := tempMsg["content"]; ok {
if _, isArray := content.([]interface{}); isArray {
logger.Info("multimodal message detected", "#", i, "role", msg.Role)
// Deserialize to RoleMsg to access ContentParts
var detailedMsg models.RoleMsg
if err := json.Unmarshal(msgBytes, &detailedMsg); err == nil {
if len(detailedMsg.ContentParts) > 0 {
for j, part := range detailedMsg.ContentParts {
if textPart, ok := part.(models.TextContentPart); ok {
logger.Debug("text content part", "msg#", i, "part#", j, "text", textPart.Text)
} else if imgPart, ok := part.(models.ImageContentPart); ok {
logger.Info("image content part", "msg#", i, "part#", j, "url", imgPart.ImageURL.URL[:min(len(imgPart.ImageURL.URL), 50)]+"...")
} else {
logger.Debug("other content part", "msg#", i, "part#", j, "type", fmt.Sprintf("%T", part))
}
}
}
}
}
}
}
}
// Create a proper copy of the message that preserves all internal state
// First, serialize and deserialize to ensure content parts are preserved
copyMsgBytes, err := json.Marshal(msg)
if err != nil {
logger.Error("failed to serialize message", "error", err)
// Fallback to direct assignment
bodyCopy.Messages[i] = msg
} else {
// Deserialize back to preserve all internal state
var copiedMsg models.RoleMsg
err := json.Unmarshal(copyMsgBytes, &copiedMsg)
if err != nil {
logger.Error("failed to deserialize message", "error", err)
// Fallback to direct assignment
bodyCopy.Messages[i] = msg
} else {
bodyCopy.Messages[i] = copiedMsg
}
}
// Standardize role if it's a user role or first message
if bodyCopy.Messages[i].Role == cfg.UserRole || i == 1 {
bodyCopy.Messages[i].Role = "user" bodyCopy.Messages[i].Role = "user"
logger.Debug("replaced role in body", "#", i) logger.Debug("replaced role in body", "#", i)
} else {
bodyCopy.Messages[i] = msg
} }
} }
// Log the final request body before sending to OpenRouter
orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps) orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps)
data, err := json.Marshal(orBody) data, err := json.Marshal(orBody)
if err != nil { if err != nil {
logger.Error("failed to form a msg", "error", err) logger.Error("failed to form a msg", "error", err)
return nil, err return nil, err
} }
logger.Info("OpenRouter request prepared", "messages_count", len(orBody.Messages))
for i, msg := range orBody.Messages {
// Check if this final message has content parts (multimodal)
msgBytes, err := json.Marshal(msg)
if err == nil {
var tempMsg map[string]interface{}
if err := json.Unmarshal(msgBytes, &tempMsg); err == nil {
if content, ok := tempMsg["content"]; ok {
if _, isArray := content.([]interface{}); isArray {
logger.Debug("final message", "#", i, "role", msg.Role, "hasContentParts", true)
// Deserialize to access content parts
var detailedMsg models.RoleMsg
if err := json.Unmarshal(msgBytes, &detailedMsg); err == nil {
if len(detailedMsg.ContentParts) > 0 {
for j, part := range detailedMsg.ContentParts {
if textPart, ok := part.(models.TextContentPart); ok {
logger.Debug("final text part", "msg#", i, "part#", j, "text", textPart.Text)
} else if imgPart, ok := part.(models.ImageContentPart); ok {
logger.Info("final image part sent to OpenRouter", "msg#", i, "part#", j, "url", imgPart.ImageURL.URL[:min(len(imgPart.ImageURL.URL), 50)]+"...")
} else {
logger.Debug("final other part", "msg#", i, "part#", j, "type", fmt.Sprintf("%T", part))
}
}
}
}
} else {
logger.Debug("final message", "#", i, "role", msg.Role, "hasContentParts", false)
}
}
}
}
}
return bytes.NewReader(data), nil return bytes.NewReader(data), nil
} }