Feat: read img tool for chat endpoint
This commit is contained in:
49
bot.go
49
bot.go
@@ -1174,17 +1174,60 @@ func findCall(msg, toolCall string) bool {
|
|||||||
toolRunningMode = false
|
toolRunningMode = false
|
||||||
toolMsg := string(resp)
|
toolMsg := string(resp)
|
||||||
logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
|
logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
|
||||||
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
|
||||||
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolMsg)
|
|
||||||
// Create tool response message with the proper tool_call_id
|
// Create tool response message with the proper tool_call_id
|
||||||
// Mark shell commands as always visible
|
// Mark shell commands as always visible
|
||||||
isShellCommand := fc.Name == "execute_command"
|
isShellCommand := fc.Name == "execute_command"
|
||||||
toolResponseMsg := models.RoleMsg{
|
|
||||||
|
// Check if response is multimodal content (image)
|
||||||
|
var toolResponseMsg models.RoleMsg
|
||||||
|
if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) {
|
||||||
|
// Parse multimodal content response
|
||||||
|
multimodalResp := models.MultimodalToolResp{}
|
||||||
|
if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" {
|
||||||
|
// Create RoleMsg with ContentParts
|
||||||
|
var contentParts []any
|
||||||
|
for _, part := range multimodalResp.Parts {
|
||||||
|
partType, ok := part["type"]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if partType == "text" {
|
||||||
|
contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]})
|
||||||
|
} else if partType == "image_url" {
|
||||||
|
contentParts = append(contentParts, models.ImageContentPart{
|
||||||
|
Type: "image_url",
|
||||||
|
ImageURL: struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
}{URL: part["url"]},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
toolResponseMsg = models.RoleMsg{
|
||||||
|
Role: cfg.ToolRole,
|
||||||
|
ContentParts: contentParts,
|
||||||
|
HasContentParts: true,
|
||||||
|
ToolCallID: lastToolCall.ID,
|
||||||
|
IsShellCommand: isShellCommand,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fallback to regular content
|
||||||
|
toolResponseMsg = models.RoleMsg{
|
||||||
Role: cfg.ToolRole,
|
Role: cfg.ToolRole,
|
||||||
Content: toolMsg,
|
Content: toolMsg,
|
||||||
ToolCallID: lastToolCall.ID,
|
ToolCallID: lastToolCall.ID,
|
||||||
IsShellCommand: isShellCommand,
|
IsShellCommand: isShellCommand,
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
toolResponseMsg = models.RoleMsg{
|
||||||
|
Role: cfg.ToolRole,
|
||||||
|
Content: toolMsg,
|
||||||
|
ToolCallID: lastToolCall.ID,
|
||||||
|
IsShellCommand: isShellCommand,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
|
||||||
|
"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
|
||||||
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
|
chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
|
||||||
logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
|
logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
|
||||||
// Clear the stored tool call ID after using it
|
// Clear the stored tool call ID after using it
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package models
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
LoadedMark = "(loaded) "
|
LoadedMark = "(loaded) "
|
||||||
|
ToolRespMultyType = "multimodel_content"
|
||||||
)
|
)
|
||||||
|
|
||||||
type APIType int
|
type APIType int
|
||||||
|
|||||||
@@ -391,7 +391,6 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine the image format based on file extension
|
// Determine the image format based on file extension
|
||||||
var mimeType string
|
var mimeType string
|
||||||
switch {
|
switch {
|
||||||
@@ -408,10 +407,8 @@ func CreateImageURLFromPath(imagePath string) (string, error) {
|
|||||||
default:
|
default:
|
||||||
mimeType = "image/jpeg" // default
|
mimeType = "image/jpeg" // default
|
||||||
}
|
}
|
||||||
|
|
||||||
// Encode to base64
|
// Encode to base64
|
||||||
encoded := base64.StdEncoding.EncodeToString(data)
|
encoded := base64.StdEncoding.EncodeToString(data)
|
||||||
|
|
||||||
// Create data URL
|
// Create data URL
|
||||||
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
|
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
|
||||||
}
|
}
|
||||||
@@ -623,3 +620,8 @@ type ChatRoundReq struct {
|
|||||||
Regen bool
|
Regen bool
|
||||||
Resume bool
|
Resume bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MultimodalToolResp struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Parts []map[string]string `json:"parts"`
|
||||||
|
}
|
||||||
|
|||||||
56
tools.go
56
tools.go
@@ -469,6 +469,43 @@ func fileRead(args map[string]string) []byte {
|
|||||||
return jsonResult
|
return jsonResult
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func fileReadImage(args map[string]string) []byte {
|
||||||
|
path, ok := args["path"]
|
||||||
|
if !ok || path == "" {
|
||||||
|
msg := "path not provided to file_read_image tool"
|
||||||
|
logger.Error(msg)
|
||||||
|
return []byte(msg)
|
||||||
|
}
|
||||||
|
path = resolvePath(path)
|
||||||
|
dataURL, err := models.CreateImageURLFromPath(path)
|
||||||
|
if err != nil {
|
||||||
|
msg := "failed to read image; error: " + err.Error()
|
||||||
|
logger.Error(msg)
|
||||||
|
return []byte(msg)
|
||||||
|
}
|
||||||
|
// result := map[string]any{
|
||||||
|
// "type": "multimodal_content",
|
||||||
|
// "parts": []map[string]string{
|
||||||
|
// {"type": "text", "text": "Image at " + path},
|
||||||
|
// {"type": "image_url", "url": dataURL},
|
||||||
|
// },
|
||||||
|
// }
|
||||||
|
result := models.MultimodalToolResp{
|
||||||
|
Type: "multimodal_content",
|
||||||
|
Parts: []map[string]string{
|
||||||
|
{"type": "text", "text": "Image at " + path},
|
||||||
|
{"type": "image_url", "url": dataURL},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
jsonResult, err := json.Marshal(result)
|
||||||
|
if err != nil {
|
||||||
|
msg := "failed to marshal result; error: " + err.Error()
|
||||||
|
logger.Error(msg)
|
||||||
|
return []byte(msg)
|
||||||
|
}
|
||||||
|
return jsonResult
|
||||||
|
}
|
||||||
|
|
||||||
func fileWrite(args map[string]string) []byte {
|
func fileWrite(args map[string]string) []byte {
|
||||||
path, ok := args["path"]
|
path, ok := args["path"]
|
||||||
if !ok || path == "" {
|
if !ok || path == "" {
|
||||||
@@ -1101,6 +1138,7 @@ var fnMap = map[string]fnSig{
|
|||||||
"read_url_raw": readURLRaw,
|
"read_url_raw": readURLRaw,
|
||||||
"file_create": fileCreate,
|
"file_create": fileCreate,
|
||||||
"file_read": fileRead,
|
"file_read": fileRead,
|
||||||
|
"file_read_image": fileReadImage,
|
||||||
"file_write": fileWrite,
|
"file_write": fileWrite,
|
||||||
"file_write_append": fileWriteAppend,
|
"file_write_append": fileWriteAppend,
|
||||||
"file_edit": fileEdit,
|
"file_edit": fileEdit,
|
||||||
@@ -1327,6 +1365,24 @@ var baseTools = []models.Tool{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
// file_read_image
|
||||||
|
models.Tool{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "file_read_image",
|
||||||
|
Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"path"},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"path": models.ToolArgProps{
|
||||||
|
Type: "string",
|
||||||
|
Description: "path of the image file to read",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
// file_write
|
// file_write
|
||||||
models.Tool{
|
models.Tool{
|
||||||
Type: "function",
|
Type: "function",
|
||||||
|
|||||||
Reference in New Issue
Block a user