Enha (tools): move browser to top command

2026-04-11 14:04:18 +03:00
parent 556eab9d89
commit 43d78ff47b
1 changed files with 88 additions and 41 deletions
--- a/tools/tools.go
+++ b/tools/tools.go
@@ -35,7 +35,12 @@ Your current tools:
 {
 "name":"run",
 "args": ["command"],
-"when_to_use": "Main tool for file operations, shell commands, memory, git, and todo. Use run \"help\" for all commands. Examples: run \"ls -la\", run \"help\", run \"mkdir -p foo/bar\", run \"cat file.txt\", run \"write file.txt content\", run \"view_img image.png\", run \"git status\", run \"memory store foo bar\", run \"todo create task\", run \"grep pattern file\", run \"cd /path\", run \"pwd\", run \"find . -name *.txt\", run \"file image.png\", run \"head file\", run \"tail file\", run \"wc -l file\", run \"sort file\", run \"uniq file\", run \"sed 's/old/new/' file\", run \"echo text\", run \"go build ./...\", run \"time\", run \"stat file\", run \"cp src dst\", run \"mv src dst\", run \"rm file\""
+"when_to_use": "Main tool for file operations, shell commands, memory, git, and todo. Use run "help" for all commands. Examples: run "ls -la", run "help", run "mkdir -p foo/bar", run "cat file.txt", run "write file.txt content", run "git status", run "memory store foo bar", run "todo create task", run "grep pattern file", run "cd /path", run "pwd", run "find . -name *.txt", run "file image.png", run "head file", run "tail file", run "wc -l file", run "sort file", run "uniq file", run "sed 's/old/new/' file", run "echo text", run "go build ./...", run "time", run "stat file", run "cp src dst", run "mv src dst", run "rm file"
+},
+{
+"name":"browser",
+"args": ["action", "args"],
+"when_to_use": "Playwright browser automation. Actions: start, stop, running, go <url>, click <selector>, fill <selector> <text>, text [selector], html [selector], screenshot [path], screenshot_and_view, wait <selector>, drag <x1> <y1> <x2> <y2>. Example: browser start, browser go https://example.com, browser click #submit-button"
 },
 {
 "name":"view_img",
@@ -415,6 +420,36 @@ func runCmd(args map[string]string) []byte {
 	}
 }

+// browserCmd handles top-level browser tool calls
+func browserCmd(args map[string]string) []byte {
+	action, _ := args["action"]
+	argsStr, _ := args["args"]
+	// Parse args string into slice (space-separated, respecting quoted strings)
+	var browserArgs []string
+	if argsStr != "" {
+		browserArgs = strings.Fields(argsStr)
+	}
+	if action == "" {
+		return []byte(`usage: browser <action> [args...]
+Actions:
+  start              - start browser
+  stop               - stop browser
+  running            - check if running
+  go <url>           - navigate to URL
+  click <selector>   - click element
+  fill <selector> <text> - fill input
+  text [selector]    - extract text
+  html [selector]    - get HTML
+  screenshot [path]  - take screenshot
+  screenshot_and_view - take and view screenshot
+  wait <selector>    - wait for element
+  drag <from> <to>   - drag element`)
+	}
+	// Prepend action to args for runBrowserCommand
+	fullArgs := append([]string{action}, browserArgs...)
+	return runBrowserCommand(fullArgs, args)
+}
+
 // runBrowserCommand routes browser subcommands to Playwright handlers
 func runBrowserCommand(args []string, originalArgs map[string]string) []byte {
 	if len(args) == 0 {
@@ -538,6 +573,33 @@ Actions:
 			"fromSelector": rest[0],
 			"toSelector":   rest[1],
 		})
+	case "help":
+		return []byte(`browser <action> [args]
+Playwright browser automation.
+Actions:
+  start              - start browser
+  stop               - stop browser
+  running            - check if browser is running
+  go <url>          - navigate to URL
+  click <selector>  - click element (use index for multiple: click #btn 1)
+  fill <sel> <text> - fill input field
+  text [selector]   - extract text (from element or whole page)
+  html [selector]   - get HTML (from element or whole page)
+  screenshot [path] - take screenshot
+  screenshot_and_view - take and view screenshot
+  wait <selector>   - wait for element to appear
+  drag <x1> <y1> <x2> <y2> - drag by coordinates
+  drag <sel1> <sel2> - drag by selectors (center points)
+Examples:
+  browser start
+  browser go https://example.com
+  browser click #submit-button
+  browser fill #search-input hello
+  browser text
+  browser screenshot
+  browser screenshot_and_view
+  browser drag 100 200 300 400
+  browser drag #item1 #container2`)
 	default:
 		return []byte("unknown browser action: " + action)
 	}
@@ -597,20 +659,6 @@ func getHelp(args []string) string {
  capture <name>    - capture a window screenshot
  capture_and_view <name> - capture and view screenshot

-  # Browser (requires Playwright)
-  browser start           - start browser
-  browser stop            - stop browser
-  browser running         - check if running
-  browser go <url>        - navigate to URL
-  browser click <sel>     - click element
-  browser fill <sel> <txt> - fill input
-  browser text [sel]     - extract text
-  browser html [sel]      - get HTML
-  browser screenshot     - take screenshot
-  browser wait <sel>     - wait for element
-  browser drag <x1> <y1> <x2> <y2> - drag by coordinates
-  browser drag <sel1> <sel2> - drag by selectors (center points)
-
  # System
  <any shell command> - run shell command directly

@@ -747,31 +795,6 @@ Use: run "command" to execute.`
  Requires: xdotool and maim
  Examples:
    run "capture_and_view Firefox"`
-	case "browser":
-		return `browser <action> [args]
-  Playwright browser automation.
-  Requires: Playwright browser server running
-  Actions:
-    start              - start browser
-    stop               - stop browser
-    running            - check if browser is running
-    go <url>           - navigate to URL
-    click <selector>   - click element (use index for multiple: click #btn 1)
-    fill <selector> <text> - fill input field
-    text [selector]   - extract text (from element or whole page)
-    html [selector]   - get HTML (from element or whole page)
-    screenshot [path] - take screenshot
-    wait <selector>   - wait for element to appear
-    drag <from> <to>  - drag element to another element
-  Examples:
-    run "browser start"
-    run "browser go https://example.com"
-    run "browser click #submit-button"
-    run "browser fill #search-input hello"
-    run "browser text"
-    run "browser screenshot"
-    run "browser drag 100 200 300 400"
-    run "browser drag #item1 #container2"`
 	default:
 		return fmt.Sprintf("No help available for: %s. Use: run \"help\" for all commands.", cmd)
 	}
@@ -1264,6 +1287,8 @@ var FnMap = map[string]fnSig{
 	"help":          helpTool,
 	// Unified run command
 	"run": runCmd,
+	// Browser tool - routes to runBrowserCommand
+	"browser":        browserCmd,
 	"summarize_chat": summarizeChat,
 }

@@ -1452,4 +1477,26 @@ var BaseTools = []models.Tool{
 			},
 		},
 	},
+	// browser - Playwright browser automation
+	models.Tool{
+		Type: "function",
+		Function: models.ToolFunc{
+			Name:        "browser",
+			Description: "Playwright browser automation. Actions: start (launch browser), stop (close browser), running (check if browser is running), go <url> (navigate), click <selector> [index] (click element), fill <selector> <text> (type into input), text [selector] (extract text), html [selector] (get HTML), screenshot [path] (take screenshot), screenshot_and_view (take and view), wait <selector> (wait for element), drag <x1> <y1> <x2> <y2> (drag by coords) or drag <sel1> <sel2> (drag by selectors)",
+			Parameters: models.ToolFuncParams{
+				Type:     "object",
+				Required: []string{"action"},
+				Properties: map[string]models.ToolArgProps{
+					"action": models.ToolArgProps{
+						Type:        "string",
+						Description: "Browser action: start, stop, running, go, click, fill, text, html, screenshot, screenshot_and_view, wait, drag",
+					},
+					"args": models.ToolArgProps{
+						Type:        "string",
+						Description: "Arguments for the action (e.g., URL for go, selector for click, etc.)",
+					},
+				},
+			},
+		},
+	},
 }