Enha (tools): move browser to top command
This commit is contained in:
127
tools/tools.go
127
tools/tools.go
@@ -35,7 +35,12 @@ Your current tools:
|
||||
{
|
||||
"name":"run",
|
||||
"args": ["command"],
|
||||
"when_to_use": "Main tool for file operations, shell commands, memory, git, and todo. Use run \"help\" for all commands. Examples: run \"ls -la\", run \"help\", run \"mkdir -p foo/bar\", run \"cat file.txt\", run \"write file.txt content\", run \"view_img image.png\", run \"git status\", run \"memory store foo bar\", run \"todo create task\", run \"grep pattern file\", run \"cd /path\", run \"pwd\", run \"find . -name *.txt\", run \"file image.png\", run \"head file\", run \"tail file\", run \"wc -l file\", run \"sort file\", run \"uniq file\", run \"sed 's/old/new/' file\", run \"echo text\", run \"go build ./...\", run \"time\", run \"stat file\", run \"cp src dst\", run \"mv src dst\", run \"rm file\""
|
||||
"when_to_use": "Main tool for file operations, shell commands, memory, git, and todo. Use run "help" for all commands. Examples: run "ls -la", run "help", run "mkdir -p foo/bar", run "cat file.txt", run "write file.txt content", run "git status", run "memory store foo bar", run "todo create task", run "grep pattern file", run "cd /path", run "pwd", run "find . -name *.txt", run "file image.png", run "head file", run "tail file", run "wc -l file", run "sort file", run "uniq file", run "sed 's/old/new/' file", run "echo text", run "go build ./...", run "time", run "stat file", run "cp src dst", run "mv src dst", run "rm file"
|
||||
},
|
||||
{
|
||||
"name":"browser",
|
||||
"args": ["action", "args"],
|
||||
"when_to_use": "Playwright browser automation. Actions: start, stop, running, go <url>, click <selector>, fill <selector> <text>, text [selector], html [selector], screenshot [path], screenshot_and_view, wait <selector>, drag <x1> <y1> <x2> <y2>. Example: browser start, browser go https://example.com, browser click #submit-button"
|
||||
},
|
||||
{
|
||||
"name":"view_img",
|
||||
@@ -415,6 +420,36 @@ func runCmd(args map[string]string) []byte {
|
||||
}
|
||||
}
|
||||
|
||||
// browserCmd handles top-level browser tool calls
|
||||
func browserCmd(args map[string]string) []byte {
|
||||
action, _ := args["action"]
|
||||
argsStr, _ := args["args"]
|
||||
// Parse args string into slice (space-separated, respecting quoted strings)
|
||||
var browserArgs []string
|
||||
if argsStr != "" {
|
||||
browserArgs = strings.Fields(argsStr)
|
||||
}
|
||||
if action == "" {
|
||||
return []byte(`usage: browser <action> [args...]
|
||||
Actions:
|
||||
start - start browser
|
||||
stop - stop browser
|
||||
running - check if running
|
||||
go <url> - navigate to URL
|
||||
click <selector> - click element
|
||||
fill <selector> <text> - fill input
|
||||
text [selector] - extract text
|
||||
html [selector] - get HTML
|
||||
screenshot [path] - take screenshot
|
||||
screenshot_and_view - take and view screenshot
|
||||
wait <selector> - wait for element
|
||||
drag <from> <to> - drag element`)
|
||||
}
|
||||
// Prepend action to args for runBrowserCommand
|
||||
fullArgs := append([]string{action}, browserArgs...)
|
||||
return runBrowserCommand(fullArgs, args)
|
||||
}
|
||||
|
||||
// runBrowserCommand routes browser subcommands to Playwright handlers
|
||||
func runBrowserCommand(args []string, originalArgs map[string]string) []byte {
|
||||
if len(args) == 0 {
|
||||
@@ -538,6 +573,33 @@ Actions:
|
||||
"fromSelector": rest[0],
|
||||
"toSelector": rest[1],
|
||||
})
|
||||
case "help":
|
||||
return []byte(`browser <action> [args]
|
||||
Playwright browser automation.
|
||||
Actions:
|
||||
start - start browser
|
||||
stop - stop browser
|
||||
running - check if browser is running
|
||||
go <url> - navigate to URL
|
||||
click <selector> - click element (use index for multiple: click #btn 1)
|
||||
fill <sel> <text> - fill input field
|
||||
text [selector] - extract text (from element or whole page)
|
||||
html [selector] - get HTML (from element or whole page)
|
||||
screenshot [path] - take screenshot
|
||||
screenshot_and_view - take and view screenshot
|
||||
wait <selector> - wait for element to appear
|
||||
drag <x1> <y1> <x2> <y2> - drag by coordinates
|
||||
drag <sel1> <sel2> - drag by selectors (center points)
|
||||
Examples:
|
||||
browser start
|
||||
browser go https://example.com
|
||||
browser click #submit-button
|
||||
browser fill #search-input hello
|
||||
browser text
|
||||
browser screenshot
|
||||
browser screenshot_and_view
|
||||
browser drag 100 200 300 400
|
||||
browser drag #item1 #container2`)
|
||||
default:
|
||||
return []byte("unknown browser action: " + action)
|
||||
}
|
||||
@@ -597,20 +659,6 @@ func getHelp(args []string) string {
|
||||
capture <name> - capture a window screenshot
|
||||
capture_and_view <name> - capture and view screenshot
|
||||
|
||||
# Browser (requires Playwright)
|
||||
browser start - start browser
|
||||
browser stop - stop browser
|
||||
browser running - check if running
|
||||
browser go <url> - navigate to URL
|
||||
browser click <sel> - click element
|
||||
browser fill <sel> <txt> - fill input
|
||||
browser text [sel] - extract text
|
||||
browser html [sel] - get HTML
|
||||
browser screenshot - take screenshot
|
||||
browser wait <sel> - wait for element
|
||||
browser drag <x1> <y1> <x2> <y2> - drag by coordinates
|
||||
browser drag <sel1> <sel2> - drag by selectors (center points)
|
||||
|
||||
# System
|
||||
<any shell command> - run shell command directly
|
||||
|
||||
@@ -747,31 +795,6 @@ Use: run "command" to execute.`
|
||||
Requires: xdotool and maim
|
||||
Examples:
|
||||
run "capture_and_view Firefox"`
|
||||
case "browser":
|
||||
return `browser <action> [args]
|
||||
Playwright browser automation.
|
||||
Requires: Playwright browser server running
|
||||
Actions:
|
||||
start - start browser
|
||||
stop - stop browser
|
||||
running - check if browser is running
|
||||
go <url> - navigate to URL
|
||||
click <selector> - click element (use index for multiple: click #btn 1)
|
||||
fill <selector> <text> - fill input field
|
||||
text [selector] - extract text (from element or whole page)
|
||||
html [selector] - get HTML (from element or whole page)
|
||||
screenshot [path] - take screenshot
|
||||
wait <selector> - wait for element to appear
|
||||
drag <from> <to> - drag element to another element
|
||||
Examples:
|
||||
run "browser start"
|
||||
run "browser go https://example.com"
|
||||
run "browser click #submit-button"
|
||||
run "browser fill #search-input hello"
|
||||
run "browser text"
|
||||
run "browser screenshot"
|
||||
run "browser drag 100 200 300 400"
|
||||
run "browser drag #item1 #container2"`
|
||||
default:
|
||||
return fmt.Sprintf("No help available for: %s. Use: run \"help\" for all commands.", cmd)
|
||||
}
|
||||
@@ -1264,6 +1287,8 @@ var FnMap = map[string]fnSig{
|
||||
"help": helpTool,
|
||||
// Unified run command
|
||||
"run": runCmd,
|
||||
// Browser tool - routes to runBrowserCommand
|
||||
"browser": browserCmd,
|
||||
"summarize_chat": summarizeChat,
|
||||
}
|
||||
|
||||
@@ -1452,4 +1477,26 @@ var BaseTools = []models.Tool{
|
||||
},
|
||||
},
|
||||
},
|
||||
// browser - Playwright browser automation
|
||||
models.Tool{
|
||||
Type: "function",
|
||||
Function: models.ToolFunc{
|
||||
Name: "browser",
|
||||
Description: "Playwright browser automation. Actions: start (launch browser), stop (close browser), running (check if browser is running), go <url> (navigate), click <selector> [index] (click element), fill <selector> <text> (type into input), text [selector] (extract text), html [selector] (get HTML), screenshot [path] (take screenshot), screenshot_and_view (take and view), wait <selector> (wait for element), drag <x1> <y1> <x2> <y2> (drag by coords) or drag <sel1> <sel2> (drag by selectors)",
|
||||
Parameters: models.ToolFuncParams{
|
||||
Type: "object",
|
||||
Required: []string{"action"},
|
||||
Properties: map[string]models.ToolArgProps{
|
||||
"action": models.ToolArgProps{
|
||||
Type: "string",
|
||||
Description: "Browser action: start, stop, running, go, click, fill, text, html, screenshot, screenshot_and_view, wait, drag",
|
||||
},
|
||||
"args": models.ToolArgProps{
|
||||
Type: "string",
|
||||
Description: "Arguments for the action (e.g., URL for go, selector for click, etc.)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user