Enha: pw agent
This commit is contained in:
@@ -1,5 +1,11 @@
|
|||||||
package agent
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"gf-lt/models"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
// PWAgent: is AgenterA type agent (enclosed with tool chaining)
|
// PWAgent: is AgenterA type agent (enclosed with tool chaining)
|
||||||
// sysprompt explain tools and how to plan for execution
|
// sysprompt explain tools and how to plan for execution
|
||||||
type PWAgent struct {
|
type PWAgent struct {
|
||||||
@@ -7,11 +13,16 @@ type PWAgent struct {
|
|||||||
sysprompt string
|
sysprompt string
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewWebAgentB creates a WebAgentB that uses the given formatting function
|
// NewPWAgent creates a PWAgent with the given client and system prompt
|
||||||
func NewPWAgent(client *AgentClient, sysprompt string) *PWAgent {
|
func NewPWAgent(client *AgentClient, sysprompt string) *PWAgent {
|
||||||
return &PWAgent{AgentClient: client, sysprompt: sysprompt}
|
return &PWAgent{AgentClient: client, sysprompt: sysprompt}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetTools sets the tools available to the agent
|
||||||
|
func (a *PWAgent) SetTools(tools []models.Tool) {
|
||||||
|
a.tools = tools
|
||||||
|
}
|
||||||
|
|
||||||
func (a *PWAgent) ProcessTask(task string) []byte {
|
func (a *PWAgent) ProcessTask(task string) []byte {
|
||||||
req, err := a.FormFirstMsg(a.sysprompt, task)
|
req, err := a.FormFirstMsg(a.sysprompt, task)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -25,16 +36,91 @@ func (a *PWAgent) ProcessTask(task string) []byte {
|
|||||||
a.Log().Error("failed to process the request", "error", err)
|
a.Log().Error("failed to process the request", "error", err)
|
||||||
return []byte("failed to process the request; err: " + err.Error())
|
return []byte("failed to process the request; err: " + err.Error())
|
||||||
}
|
}
|
||||||
toolCall, hasToolCall := findToolCall(resp)
|
execTool, toolCallID, hasToolCall := findToolCall(resp)
|
||||||
if !hasToolCall {
|
if !hasToolCall {
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
// check resp for tool calls
|
|
||||||
// make tool call
|
a.setToolCallOnLastMessage(resp, toolCallID)
|
||||||
// add tool call resp to body
|
|
||||||
// send new request too lmm
|
toolResp := string(execTool())
|
||||||
tooResp := toolCall(resp)
|
req, err = a.FormMsgWithToolCallID(toolResp, toolCallID)
|
||||||
req, err = a.FormMsg(toolResp)
|
if err != nil {
|
||||||
|
a.Log().Error("failed to form next message", "error", err)
|
||||||
|
return []byte("failed to form next message; err: " + err.Error())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *PWAgent) setToolCallOnLastMessage(resp []byte, toolCallID string) {
|
||||||
|
if toolCallID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var genericResp map[string]interface{}
|
||||||
|
if err := json.Unmarshal(resp, &genericResp); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var name string
|
||||||
|
var args map[string]string
|
||||||
|
|
||||||
|
if choices, ok := genericResp["choices"].([]interface{}); ok && len(choices) > 0 {
|
||||||
|
if firstChoice, ok := choices[0].(map[string]interface{}); ok {
|
||||||
|
if message, ok := firstChoice["message"].(map[string]interface{}); ok {
|
||||||
|
if toolCalls, ok := message["tool_calls"].([]interface{}); ok && len(toolCalls) > 0 {
|
||||||
|
if tc, ok := toolCalls[0].(map[string]interface{}); ok {
|
||||||
|
if fn, ok := tc["function"].(map[string]interface{}); ok {
|
||||||
|
name, _ = fn["name"].(string)
|
||||||
|
argsStr, _ := fn["arguments"].(string)
|
||||||
|
json.Unmarshal([]byte(argsStr), &args)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if name == "" {
|
||||||
|
content, _ := genericResp["content"].(string)
|
||||||
|
name = extractToolNameFromText(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
lastIdx := len(a.chatBody.Messages) - 1
|
||||||
|
if lastIdx >= 0 {
|
||||||
|
a.chatBody.Messages[lastIdx].ToolCallID = toolCallID
|
||||||
|
if name != "" {
|
||||||
|
argsJSON, _ := json.Marshal(args)
|
||||||
|
a.chatBody.Messages[lastIdx].ToolCall = &models.ToolCall{
|
||||||
|
ID: toolCallID,
|
||||||
|
Name: name,
|
||||||
|
Args: string(argsJSON),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractToolNameFromText(text string) string {
|
||||||
|
jsStr := toolCallRE.FindString(text)
|
||||||
|
if jsStr == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
jsStr = strings.TrimSpace(jsStr)
|
||||||
|
jsStr = strings.TrimPrefix(jsStr, "__tool_call__")
|
||||||
|
jsStr = strings.TrimSuffix(jsStr, "__tool_call__")
|
||||||
|
jsStr = strings.TrimSpace(jsStr)
|
||||||
|
|
||||||
|
start := strings.Index(jsStr, "{")
|
||||||
|
end := strings.LastIndex(jsStr, "}")
|
||||||
|
if start == -1 || end == -1 || end <= start {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
jsStr = jsStr[start : end+1]
|
||||||
|
|
||||||
|
var fc models.FuncCall
|
||||||
|
if err := json.Unmarshal([]byte(jsStr), &fc); err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return fc.Name
|
||||||
|
}
|
||||||
|
|||||||
349
agent/pw_tools.go
Normal file
349
agent/pw_tools.go
Normal file
@@ -0,0 +1,349 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"gf-lt/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ToolFunc func(map[string]string) []byte
|
||||||
|
|
||||||
|
var pwToolMap = make(map[string]ToolFunc)
|
||||||
|
|
||||||
|
func RegisterPWTool(name string, fn ToolFunc) {
|
||||||
|
pwToolMap[name] = fn
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetPWTools() []models.Tool {
|
||||||
|
return pwTools
|
||||||
|
}
|
||||||
|
|
||||||
|
var pwTools = []models.Tool{
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_start",
|
||||||
|
Description: "Start a Playwright browser instance. Must be called first before any other browser automation. Uses headless mode by default.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_stop",
|
||||||
|
Description: "Stop the Playwright browser instance. Call when done with browser automation.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_is_running",
|
||||||
|
Description: "Check if Playwright browser is currently running.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_navigate",
|
||||||
|
Description: "Navigate to a URL in the browser.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"url"},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"url": {Type: "string", Description: "URL to navigate to"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_click",
|
||||||
|
Description: "Click on an element on the current webpage. Use 'index' for multiple matches (default 0).",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"selector"},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"selector": {Type: "string", Description: "CSS selector for the element"},
|
||||||
|
"index": {Type: "integer", Description: "Index for multiple matches (default 0)"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_fill",
|
||||||
|
Description: "Type text into an input field. Use 'index' for multiple matches (default 0).",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"selector", "text"},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"selector": {Type: "string", Description: "CSS selector for the input element"},
|
||||||
|
"text": {Type: "string", Description: "Text to type into the field"},
|
||||||
|
"index": {Type: "integer", Description: "Index for multiple matches (default 0)"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_extract_text",
|
||||||
|
Description: "Extract text content from the page or specific elements. Use selector 'body' for all page text.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"selector": {Type: "string", Description: "CSS selector (default 'body' for all page text)"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_screenshot",
|
||||||
|
Description: "Take a screenshot of the page or a specific element. Returns a file path to the image.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"selector": {Type: "string", Description: "CSS selector for element to screenshot"},
|
||||||
|
"full_page": {Type: "boolean", Description: "Capture full page (default false)"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_screenshot_and_view",
|
||||||
|
Description: "Take a screenshot and return the image for viewing. Use to visually verify page state.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"selector": {Type: "string", Description: "CSS selector for element to screenshot"},
|
||||||
|
"full_page": {Type: "boolean", Description: "Capture full page (default false)"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_wait_for_selector",
|
||||||
|
Description: "Wait for an element to appear on the page before proceeding.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"selector"},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"selector": {Type: "string", Description: "CSS selector to wait for"},
|
||||||
|
"timeout": {Type: "integer", Description: "Timeout in milliseconds (default 30000)"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_drag",
|
||||||
|
Description: "Drag the mouse from point (x1,y1) to (x2,y2).",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"x1", "y1", "x2", "y2"},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"x1": {Type: "number", Description: "Starting X coordinate"},
|
||||||
|
"y1": {Type: "number", Description: "Starting Y coordinate"},
|
||||||
|
"x2": {Type: "number", Description: "Ending X coordinate"},
|
||||||
|
"y2": {Type: "number", Description: "Ending Y coordinate"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_click_at",
|
||||||
|
Description: "Click at specific X,Y coordinates on the page.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"x", "y"},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"x": {Type: "number", Description: "X coordinate"},
|
||||||
|
"y": {Type: "number", Description: "Y coordinate"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_get_html",
|
||||||
|
Description: "Get the HTML content of the page or a specific element.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"selector": {Type: "string", Description: "CSS selector (default 'body')"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_get_dom",
|
||||||
|
Description: "Get a structured DOM representation with tag, attributes, text, and children.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"selector": {Type: "string", Description: "CSS selector (default 'body')"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "pw_search_elements",
|
||||||
|
Description: "Search for elements by text content or CSS selector.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"text": {Type: "string", Description: "Text content to search for"},
|
||||||
|
"selector": {Type: "string", Description: "CSS selector to search for"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var toolCallRE = regexp.MustCompile(`__tool_call__(.+?)__tool_call__`)
|
||||||
|
|
||||||
|
type ParsedToolCall struct {
|
||||||
|
ID string
|
||||||
|
Name string
|
||||||
|
Args map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func findToolCall(resp []byte) (func() []byte, string, bool) {
|
||||||
|
var genericResp map[string]interface{}
|
||||||
|
if err := json.Unmarshal(resp, &genericResp); err != nil {
|
||||||
|
return findToolCallFromText(string(resp))
|
||||||
|
}
|
||||||
|
|
||||||
|
if choices, ok := genericResp["choices"].([]interface{}); ok && len(choices) > 0 {
|
||||||
|
if firstChoice, ok := choices[0].(map[string]interface{}); ok {
|
||||||
|
if message, ok := firstChoice["message"].(map[string]interface{}); ok {
|
||||||
|
if toolCalls, ok := message["tool_calls"].([]interface{}); ok && len(toolCalls) > 0 {
|
||||||
|
return parseOpenAIToolCall(toolCalls)
|
||||||
|
}
|
||||||
|
if content, ok := message["content"].(string); ok {
|
||||||
|
return findToolCallFromText(content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if text, ok := firstChoice["text"].(string); ok {
|
||||||
|
return findToolCallFromText(text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if content, ok := genericResp["content"].(string); ok {
|
||||||
|
return findToolCallFromText(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
return findToolCallFromText(string(resp))
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseOpenAIToolCall(toolCalls []interface{}) (func() []byte, string, bool) {
|
||||||
|
if len(toolCalls) == 0 {
|
||||||
|
return nil, "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
tc := toolCalls[0].(map[string]interface{})
|
||||||
|
id, _ := tc["id"].(string)
|
||||||
|
function, _ := tc["function"].(map[string]interface{})
|
||||||
|
name, _ := function["name"].(string)
|
||||||
|
argsStr, _ := function["arguments"].(string)
|
||||||
|
|
||||||
|
var args map[string]string
|
||||||
|
if err := json.Unmarshal([]byte(argsStr), &args); err != nil {
|
||||||
|
return func() []byte {
|
||||||
|
return []byte(fmt.Sprintf(`{"error": "failed to parse arguments: %v"}`, err))
|
||||||
|
}, id, true
|
||||||
|
}
|
||||||
|
|
||||||
|
return func() []byte {
|
||||||
|
fn, ok := pwToolMap[name]
|
||||||
|
if !ok {
|
||||||
|
return []byte(fmt.Sprintf(`{"error": "tool %s not found"}`, name))
|
||||||
|
}
|
||||||
|
return fn(args)
|
||||||
|
}, id, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func findToolCallFromText(text string) (func() []byte, string, bool) {
|
||||||
|
jsStr := toolCallRE.FindString(text)
|
||||||
|
if jsStr == "" {
|
||||||
|
return nil, "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
jsStr = strings.TrimSpace(jsStr)
|
||||||
|
jsStr = strings.TrimPrefix(jsStr, "__tool_call__")
|
||||||
|
jsStr = strings.TrimSuffix(jsStr, "__tool_call__")
|
||||||
|
jsStr = strings.TrimSpace(jsStr)
|
||||||
|
|
||||||
|
start := strings.Index(jsStr, "{")
|
||||||
|
end := strings.LastIndex(jsStr, "}")
|
||||||
|
if start == -1 || end == -1 || end <= start {
|
||||||
|
return func() []byte {
|
||||||
|
return []byte(`{"error": "no valid JSON found in tool call"}`)
|
||||||
|
}, "", true
|
||||||
|
}
|
||||||
|
|
||||||
|
jsStr = jsStr[start : end+1]
|
||||||
|
|
||||||
|
var fc models.FuncCall
|
||||||
|
if err := json.Unmarshal([]byte(jsStr), &fc); err != nil {
|
||||||
|
return func() []byte {
|
||||||
|
return []byte(fmt.Sprintf(`{"error": "failed to parse tool call: %v}`, err))
|
||||||
|
}, "", true
|
||||||
|
}
|
||||||
|
|
||||||
|
if fc.ID == "" {
|
||||||
|
fc.ID = "call_" + generateToolCallID()
|
||||||
|
}
|
||||||
|
|
||||||
|
return func() []byte {
|
||||||
|
fn, ok := pwToolMap[fc.Name]
|
||||||
|
if !ok {
|
||||||
|
return []byte(fmt.Sprintf(`{"error": "tool %s not found"}`, fc.Name))
|
||||||
|
}
|
||||||
|
return fn(fc.Args)
|
||||||
|
}, fc.ID, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateToolCallID() string {
|
||||||
|
return fmt.Sprintf("%d", len(pwToolMap)%10000)
|
||||||
|
}
|
||||||
@@ -80,6 +80,20 @@ func (ag *AgentClient) FormMsg(msg string) (io.Reader, error) {
|
|||||||
return bytes.NewReader(b), nil
|
return bytes.NewReader(b), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ag *AgentClient) FormMsgWithToolCallID(msg, toolCallID string) (io.Reader, error) {
|
||||||
|
m := models.RoleMsg{
|
||||||
|
Role: "tool",
|
||||||
|
Content: msg,
|
||||||
|
ToolCallID: toolCallID,
|
||||||
|
}
|
||||||
|
ag.chatBody.Messages = append(ag.chatBody.Messages, m)
|
||||||
|
b, err := ag.buildRequest()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return bytes.NewReader(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
// buildRequest creates the appropriate LLM request based on the current API endpoint.
|
// buildRequest creates the appropriate LLM request based on the current API endpoint.
|
||||||
func (ag *AgentClient) buildRequest() ([]byte, error) {
|
func (ag *AgentClient) buildRequest() ([]byte, error) {
|
||||||
isCompletion, isChat, isDeepSeek, isOpenRouter := detectAPI(ag.cfg.CurrentAPI)
|
isCompletion, isChat, isDeepSeek, isOpenRouter := detectAPI(ag.cfg.CurrentAPI)
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ func NewWebAgentB(client *AgentClient, sysprompt string) *WebAgentB {
|
|||||||
|
|
||||||
// Process applies the formatting function to raw output
|
// Process applies the formatting function to raw output
|
||||||
func (a *WebAgentB) Process(args map[string]string, rawOutput []byte) []byte {
|
func (a *WebAgentB) Process(args map[string]string, rawOutput []byte) []byte {
|
||||||
msg, err := a.FormMsg(a.sysprompt,
|
msg, err := a.FormMsg(
|
||||||
fmt.Sprintf("request:\n%+v\ntool response:\n%v", args, string(rawOutput)))
|
fmt.Sprintf("%s\n\nrequest:\n%+v\ntool response:\n%v", a.sysprompt, args, string(rawOutput)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.Log().Error("failed to process the request", "error", err)
|
a.Log().Error("failed to process the request", "error", err)
|
||||||
return []byte("failed to process the request; err: " + err.Error())
|
return []byte("failed to process the request; err: " + err.Error())
|
||||||
|
|||||||
77
tools.go
77
tools.go
@@ -1491,6 +1491,47 @@ func registerWindowTools() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var browserAgentSysPrompt = `You are an autonomous browser automation agent. Your goal is to complete the user's task by intelligently using browser automation tools.
|
||||||
|
|
||||||
|
Available tools:
|
||||||
|
- pw_start: Start browser (must call first)
|
||||||
|
- pw_stop: Stop browser (call when done)
|
||||||
|
- pw_is_running: Check if browser is running
|
||||||
|
- pw_navigate: Go to a URL
|
||||||
|
- pw_click: Click an element by CSS selector
|
||||||
|
- pw_fill: Type text into an input
|
||||||
|
- pw_extract_text: Get text from page/element
|
||||||
|
- pw_screenshot: Take a screenshot (returns file path)
|
||||||
|
- pw_screenshot_and_view: Take screenshot with image for viewing
|
||||||
|
- pw_wait_for_selector: Wait for element to appear
|
||||||
|
- pw_drag: Drag mouse from one point to another
|
||||||
|
- pw_click_at: Click at X,Y coordinates
|
||||||
|
- pw_get_html: Get HTML content
|
||||||
|
- pw_get_dom: Get structured DOM tree
|
||||||
|
- pw_search_elements: Search for elements by text or selector
|
||||||
|
|
||||||
|
Workflow:
|
||||||
|
1. Start browser if not running (pw_start)
|
||||||
|
2. Navigate to required pages (pw_navigate)
|
||||||
|
3. Interact with elements as needed (click, fill, etc.)
|
||||||
|
4. Extract information or take screenshots as requested
|
||||||
|
5. Stop browser when done (pw_stop)
|
||||||
|
|
||||||
|
Always provide clear feedback about what you're doing and what you found.`
|
||||||
|
|
||||||
|
func runBrowserAgent(args map[string]string) []byte {
|
||||||
|
task, ok := args["task"]
|
||||||
|
if !ok || task == "" {
|
||||||
|
return []byte(`{"error": "task argument is required"}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
client := getWebAgentClient()
|
||||||
|
pwAgent := agent.NewPWAgent(client, browserAgentSysPrompt)
|
||||||
|
pwAgent.SetTools(agent.GetPWTools())
|
||||||
|
|
||||||
|
return pwAgent.ProcessTask(task)
|
||||||
|
}
|
||||||
|
|
||||||
func registerPlaywrightTools() {
|
func registerPlaywrightTools() {
|
||||||
removePlaywrightToolsFromBaseTools()
|
removePlaywrightToolsFromBaseTools()
|
||||||
if cfg != nil && cfg.PlaywrightEnabled {
|
if cfg != nil && cfg.PlaywrightEnabled {
|
||||||
@@ -1776,6 +1817,42 @@ func registerPlaywrightTools() {
|
|||||||
}
|
}
|
||||||
baseTools = append(baseTools, playwrightTools...)
|
baseTools = append(baseTools, playwrightTools...)
|
||||||
toolSysMsg += browserToolSysMsg
|
toolSysMsg += browserToolSysMsg
|
||||||
|
|
||||||
|
agent.RegisterPWTool("pw_start", pwStart)
|
||||||
|
agent.RegisterPWTool("pw_stop", pwStop)
|
||||||
|
agent.RegisterPWTool("pw_is_running", pwIsRunning)
|
||||||
|
agent.RegisterPWTool("pw_navigate", pwNavigate)
|
||||||
|
agent.RegisterPWTool("pw_click", pwClick)
|
||||||
|
agent.RegisterPWTool("pw_click_at", pwClickAt)
|
||||||
|
agent.RegisterPWTool("pw_fill", pwFill)
|
||||||
|
agent.RegisterPWTool("pw_extract_text", pwExtractText)
|
||||||
|
agent.RegisterPWTool("pw_screenshot", pwScreenshot)
|
||||||
|
agent.RegisterPWTool("pw_screenshot_and_view", pwScreenshotAndView)
|
||||||
|
agent.RegisterPWTool("pw_wait_for_selector", pwWaitForSelector)
|
||||||
|
agent.RegisterPWTool("pw_drag", pwDrag)
|
||||||
|
agent.RegisterPWTool("pw_get_html", pwGetHTML)
|
||||||
|
agent.RegisterPWTool("pw_get_dom", pwGetDOM)
|
||||||
|
agent.RegisterPWTool("pw_search_elements", pwSearchElements)
|
||||||
|
|
||||||
|
browserAgentTool := []models.Tool{
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: models.ToolFunc{
|
||||||
|
Name: "browser_agent",
|
||||||
|
Description: "Autonomous browser automation agent. Use for complex multi-step browser tasks like 'go to website, login, and take screenshot'. The agent will plan and execute steps automatically using browser tools.",
|
||||||
|
Parameters: models.ToolFuncParams{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"task"},
|
||||||
|
Properties: map[string]models.ToolArgProps{
|
||||||
|
"task": {Type: "string", Description: "The task to accomplish, e.g., 'go to github.com and take a screenshot of the homepage'"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
baseTools = append(baseTools, browserAgentTool...)
|
||||||
|
|
||||||
|
fnMap["browser_agent"] = runBrowserAgent
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user