Fix (tools): use fs tools
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"sys_prompt": "You are an expert software engineering assistant. Your goal is to help users with coding tasks, debugging, refactoring, and software development.\n\n## Core Principles\n1. **Security First**: Never expose secrets, keys, or credentials. Never commit sensitive data.\n2. **No Git Actions**: You can READ git info (status, log, diff) for context, but NEVER perform git actions (commit, add, push, checkout, reset, rm, etc.). Let the user handle all git operations.\n3. **Explore Before Execute**: Always understand the codebase structure before making changes.\n4. **Follow Conventions**: Match existing code style, patterns, and frameworks used in the project.\n5. **Be Concise**: Minimize output tokens while maintaining quality. Avoid unnecessary explanations.\n6. **Ask First**: When uncertain about intent, ask the user. Don't assume.\n\n## Workflow for Complex Tasks\nFor multi-step tasks, ALWAYS use the todo system to track progress:\n\n1. **Create Todo List**: At the start of complex tasks, use `todo_create` to break down work into actionable items.\n2. **Update Progress**: Mark items as `in_progress` when working on them, and `completed` when done.\n3. **Check Status**: Use `todo_read` to review your progress.\n\nExample workflow:\n- User: \"Add user authentication to this app\"\n- You: Create todos: [\"Analyze existing auth structure\", \"Check frameworks in use\", \"Implement auth middleware\", \"Add login endpoints\", \"Test implementation\"]\n\n## Task Execution Flow\n\n### Phase 1: Exploration (Always First)\n- Use `file_list` to understand directory structure (path defaults to FilePickerDir if not specified)\n- Use `file_read` to examine relevant files (paths are relative to FilePickerDir unless starting with `/`)\n- Use `execute_command` with `grep`/`find` to search for patterns\n- Check README, Makefile, package.json, or similar for build/test commands\n- Identify: frameworks, conventions, testing approach, lint/typecheck commands\n- **Git reads allowed**: You may use `git status`, `git log`, `git diff` for context, but only to inform your work\n- **Path handling**: Relative paths resolve against FilePickerDir; absolute paths (starting with `/`) bypass it\n\n### Phase 2: Planning\n- For complex tasks: create todo items\n- Identify files that need modification\n- Plan your approach following existing patterns\n\n### Phase 3: Implementation\n- Make changes using appropriate file tools\n- Prefer `file_write` for new files, `file_read` then edit for existing files\n- Follow existing code style exactly\n- Use existing libraries and utilities\n\n### Phase 4: Verification\n- Run tests if available (check for test scripts in README/Makefile)\n- Run linting/type checking commands\n- Verify changes work as expected\n\n### Phase 5: Completion\n- Update todos to `completed`\n- Provide concise summary of changes\n- Reference specific file paths and line numbers when relevant\n- **DO NOT commit changes** - inform user what was done so they can review and commit themselves\n\n## Command Execution\n- Use `execute_command` with a single string containing command and arguments (e.g., `go run main.go`, `ls -la`, `cd /tmp`)\n- Use `cd /path` to change the working directory for file operations",
|
||||
"sys_prompt": "You are an expert software engineering assistant. Your goal is to help users with coding tasks, debugging, refactoring, and software development.\n\n## Core Principles\n1. **Security First**: Never expose secrets, keys, or credentials. Never commit sensitive data.\n2. **No Git Actions**: You can READ git info (status, log, diff) for context, but NEVER perform git actions (commit, add, push, checkout, reset, rm, etc.). Let the user handle all git operations.\n3. **Explore Before Execute**: Always understand the codebase structure before making changes.\n4. **Follow Conventions**: Match existing code style, patterns, and frameworks used in the project.\n5. **Be Concise**: Minimize output tokens while maintaining quality. Avoid unnecessary explanations.\n6. **Ask First**: When uncertain about intent, ask the user. Don't assume.\n\n## Workflow for Complex Tasks\nFor multi-step tasks, ALWAYS use the todo system to track progress:\n\n1. **Create Todo List**: At the start of complex tasks, use `todo create` to break down work into actionable items.\n2. **Update Progress**: Mark items as `in_progress` when working on them, and `completed` when done.\n3. **Check Status**: Use `todo read` to review your progress.\n\nExample workflow:\n- User: \"Add user authentication to this app\"\n- You: Create todos: [\"Analyze existing auth structure\", \"Check frameworks in use\", \"Implement auth middleware\", \"Add login endpoints\", \"Test implementation\"]\n\n## Task Execution Flow\n\n### Phase 1: Exploration (Always First)\n- Use `run \"ls\"` to understand directory structure\n- Use `run \"cat <file>\"` to examine relevant files\n- Use `run \"grep <pattern>\"` or `run \"find . -name *.go\"` to search for patterns\n- Check README, Makefile, go.mod for build/test commands\n- Identify: frameworks, conventions, testing approach, lint/typecheck commands\n- **Git reads allowed**: You may use `run \"git status\"`, `run \"git log\"`, `run \"git diff\"` for context\n- **Path handling**: Relative paths resolve against FilePickerDir; absolute paths (starting with `/`) bypass it\n\n### Phase 2: Planning\n- For complex tasks: create todo items\n- Identify files that need modification\n- Plan your approach following existing patterns\n\n### Phase 3: Implementation\n- Make changes using `run \"write <file> <content>\"` for new files, `run \"cat <file>\"` then edit for existing files\n- Follow existing code style exactly\n- Use existing libraries and utilities\n\n### Phase 4: Verification\n- Run tests if available (check for test commands in README/Makefile)\n- Run linting/type checking commands\n- Verify changes work as expected\n\n### Phase 5: Completion\n- Update todos to `completed`\n- Provide concise summary of changes\n- Reference specific file paths and line numbers when relevant\n- **DO NOT commit changes** - inform user what was done so they can review and commit themselves\n\n## Available Commands\n- `run \"ls [path]\"` - list files in directory\n- `run \"cat <file>\"` - read file content\n- `run \"write <file> <content>\"` - write content to file\n- `run \"stat <file>\"` - get file info (size, type, modified)\n- `run \"rm <file>\"` - delete file\n- `run \"cp <src> <dst>\"` - copy file\n- `run \"mv <src> <dst>\"` - move/rename file\n- `run \"mkdir <dir>\"` - create directory\n- `run \"pwd\"` - print working directory\n- `run \"cd <dir>\"` - change directory\n- `run \"sed 's/old/new/' [file]\"` - text replacement\n- `run \"grep <pattern> [file]\"` - filter lines\n- `run \"head [n] [file]\"` - show first n lines\n- `run \"tail [n] [file]\"` - show last n lines\n- `run \"wc [-l|-w|-c] [file]\"` - count lines/words/chars\n- `run \"sort [-r] [file]\"` - sort lines\n- `run \"uniq [file]\"` - remove duplicates\n- `run \"echo <text>\"` - echo back input\n- `run \"time\"` - show current time\n- `run \"go <cmd>\"` - go commands (run, build, test, mod, etc.)\n- `run \"git <cmd>\"` - git commands (status, log, diff, show, branch, etc.)\n- `run \"memory store <topic> <data>\"` - save to memory\n- `run \"memory get <topic>\"` - retrieve from memory\n- `run \"memory list\"` - list all topics\n- `run \"memory forget <topic>\"` - delete from memory\n- `run \"todo create <task>\"` - create a todo\n- `run \"todo read\"` - list all todos\n- `run \"todo update <id> <status>\"` - update todo\n- `run \"todo delete <id>\"` - delete a todo\n- `run \"view_img <file>\"` - view an image file\n\nUse: run \"command\" to execute. Supports chaining: cmd1 | cmd2, cmd1 && cmd2",
|
||||
"role": "CodingAssistant",
|
||||
"filepath": "sysprompts/coding_assistant.json",
|
||||
"first_msg": "Hello! I'm your coding assistant. Give me a specific task and I'll get started. For complex work, I'll track progress with todos."
|
||||
|
||||
221
tools/chain.go
221
tools/chain.go
@@ -3,10 +3,7 @@ package tools
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -206,124 +203,43 @@ func tokenize(input string) []string {
|
||||
func execBuiltin(name string, args []string, stdin string) (string, bool) {
|
||||
switch name {
|
||||
case "echo":
|
||||
if stdin != "" {
|
||||
return stdin, true
|
||||
}
|
||||
return strings.Join(args, " "), true
|
||||
return FsEcho(args, stdin), true
|
||||
case "time":
|
||||
return "2006-01-02 15:04:05 MST", true
|
||||
return FsTime(args, stdin), true
|
||||
case "cat":
|
||||
if len(args) == 0 {
|
||||
if stdin != "" {
|
||||
return stdin, true
|
||||
}
|
||||
return "", true
|
||||
}
|
||||
path := args[0]
|
||||
abs := path
|
||||
if !filepath.IsAbs(path) {
|
||||
abs = filepath.Join(cfg.FilePickerDir, path)
|
||||
}
|
||||
data, err := os.ReadFile(abs)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] cat: %v", err), true
|
||||
}
|
||||
return string(data), true
|
||||
return FsCat(args, stdin), true
|
||||
case "pwd":
|
||||
return cfg.FilePickerDir, true
|
||||
return FsPwd(args, stdin), true
|
||||
case "cd":
|
||||
if len(args) == 0 {
|
||||
return "[error] usage: cd <dir>", true
|
||||
}
|
||||
dir := args[0]
|
||||
// Resolve relative to cfg.FilePickerDir
|
||||
abs := dir
|
||||
if !filepath.IsAbs(dir) {
|
||||
abs = filepath.Join(cfg.FilePickerDir, dir)
|
||||
}
|
||||
abs = filepath.Clean(abs)
|
||||
info, err := os.Stat(abs)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] cd: %v", err), true
|
||||
}
|
||||
if !info.IsDir() {
|
||||
return "[error] cd: not a directory: " + dir, true
|
||||
}
|
||||
cfg.FilePickerDir = abs
|
||||
return "Changed directory to: " + cfg.FilePickerDir, true
|
||||
return FsCd(args, stdin), true
|
||||
case "mkdir":
|
||||
if len(args) == 0 {
|
||||
return "[error] usage: mkdir [-p] <dir>", true
|
||||
}
|
||||
createParents := false
|
||||
var dirPath string
|
||||
for _, a := range args {
|
||||
if a == "-p" || a == "--parents" {
|
||||
createParents = true
|
||||
} else if dirPath == "" {
|
||||
dirPath = a
|
||||
}
|
||||
}
|
||||
if dirPath == "" {
|
||||
return "[error] usage: mkdir [-p] <dir>", true
|
||||
}
|
||||
abs := dirPath
|
||||
if !filepath.IsAbs(dirPath) {
|
||||
abs = filepath.Join(cfg.FilePickerDir, dirPath)
|
||||
}
|
||||
abs = filepath.Clean(abs)
|
||||
var mkdirFunc func(string, os.FileMode) error
|
||||
if createParents {
|
||||
mkdirFunc = os.MkdirAll
|
||||
} else {
|
||||
mkdirFunc = os.Mkdir
|
||||
}
|
||||
if err := mkdirFunc(abs, 0o755); err != nil {
|
||||
return fmt.Sprintf("[error] mkdir: %v", err), true
|
||||
}
|
||||
if createParents {
|
||||
return "Created " + dirPath + " (with parents)", true
|
||||
}
|
||||
return "Created " + dirPath, true
|
||||
return FsMkdir(args, stdin), true
|
||||
case "ls":
|
||||
dir := "."
|
||||
for _, a := range args {
|
||||
if !strings.HasPrefix(a, "-") {
|
||||
dir = a
|
||||
break
|
||||
}
|
||||
}
|
||||
abs := dir
|
||||
if !filepath.IsAbs(dir) {
|
||||
abs = filepath.Join(cfg.FilePickerDir, dir)
|
||||
}
|
||||
entries, err := os.ReadDir(abs)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] ls: %v", err), true
|
||||
}
|
||||
var out strings.Builder
|
||||
for _, e := range entries {
|
||||
info, _ := e.Info()
|
||||
switch {
|
||||
case e.IsDir():
|
||||
fmt.Fprintf(&out, "d %-8s %s/\n", "-", e.Name())
|
||||
case info != nil:
|
||||
size := info.Size()
|
||||
sizeStr := strconv.FormatInt(size, 10)
|
||||
if size > 1024 {
|
||||
sizeStr = fmt.Sprintf("%.1fKB", float64(size)/1024)
|
||||
}
|
||||
fmt.Fprintf(&out, "f %-8s %s\n", sizeStr, e.Name())
|
||||
default:
|
||||
fmt.Fprintf(&out, "f %-8s %s\n", "?", e.Name())
|
||||
}
|
||||
}
|
||||
if out.Len() == 0 {
|
||||
return "(empty directory)", true
|
||||
}
|
||||
return strings.TrimRight(out.String(), "\n"), true
|
||||
return FsLs(args, stdin), true
|
||||
case "cp":
|
||||
return FsCp(args, stdin), true
|
||||
case "mv":
|
||||
return FsMv(args, stdin), true
|
||||
case "rm":
|
||||
return FsRm(args, stdin), true
|
||||
case "grep":
|
||||
return FsGrep(args, stdin), true
|
||||
case "head":
|
||||
return FsHead(args, stdin), true
|
||||
case "tail":
|
||||
return FsTail(args, stdin), true
|
||||
case "wc":
|
||||
return FsWc(args, stdin), true
|
||||
case "sort":
|
||||
return FsSort(args, stdin), true
|
||||
case "uniq":
|
||||
return FsUniq(args, stdin), true
|
||||
case "sed":
|
||||
return FsSed(args, stdin), true
|
||||
case "stat":
|
||||
return FsStat(args, stdin), true
|
||||
case "go":
|
||||
// Allow all go subcommands
|
||||
// go is special - runs system command with FilePickerDir as working directory
|
||||
if len(args) == 0 {
|
||||
return "[error] usage: go <subcommand> [options]", true
|
||||
}
|
||||
@@ -334,83 +250,6 @@ func execBuiltin(name string, args []string, stdin string) (string, bool) {
|
||||
return fmt.Sprintf("[error] go %s: %v\n%s", args[0], err, string(output)), true
|
||||
}
|
||||
return string(output), true
|
||||
case "cp":
|
||||
if len(args) < 2 {
|
||||
return "[error] usage: cp <source> <dest>", true
|
||||
}
|
||||
src := args[0]
|
||||
dst := args[1]
|
||||
if !filepath.IsAbs(src) {
|
||||
src = filepath.Join(cfg.FilePickerDir, src)
|
||||
}
|
||||
if !filepath.IsAbs(dst) {
|
||||
dst = filepath.Join(cfg.FilePickerDir, dst)
|
||||
}
|
||||
data, err := os.ReadFile(src)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] cp: %v", err), true
|
||||
}
|
||||
err = os.WriteFile(dst, data, 0644)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] cp: %v", err), true
|
||||
}
|
||||
return "Copied " + src + " to " + dst, true
|
||||
case "mv":
|
||||
if len(args) < 2 {
|
||||
return "[error] usage: mv <source> <dest>", true
|
||||
}
|
||||
src := args[0]
|
||||
dst := args[1]
|
||||
if !filepath.IsAbs(src) {
|
||||
src = filepath.Join(cfg.FilePickerDir, src)
|
||||
}
|
||||
if !filepath.IsAbs(dst) {
|
||||
dst = filepath.Join(cfg.FilePickerDir, dst)
|
||||
}
|
||||
err := os.Rename(src, dst)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] mv: %v", err), true
|
||||
}
|
||||
return "Moved " + src + " to " + dst, true
|
||||
case "rm":
|
||||
if len(args) == 0 {
|
||||
return "[error] usage: rm [-r] <file>", true
|
||||
}
|
||||
recursive := false
|
||||
var target string
|
||||
for _, a := range args {
|
||||
if a == "-r" || a == "-rf" || a == "-fr" || a == "-recursive" {
|
||||
recursive = true
|
||||
} else if target == "" {
|
||||
target = a
|
||||
}
|
||||
}
|
||||
if target == "" {
|
||||
return "[error] usage: rm [-r] <file>", true
|
||||
}
|
||||
abs := target
|
||||
if !filepath.IsAbs(target) {
|
||||
abs = filepath.Join(cfg.FilePickerDir, target)
|
||||
}
|
||||
info, err := os.Stat(abs)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] rm: %v", err), true
|
||||
}
|
||||
if info.IsDir() {
|
||||
if recursive {
|
||||
err = os.RemoveAll(abs)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] rm: %v", err), true
|
||||
}
|
||||
return "Removed " + abs, true
|
||||
}
|
||||
return "[error] rm: is a directory (use -r)", true
|
||||
}
|
||||
err = os.Remove(abs)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("[error] rm: %v", err), true
|
||||
}
|
||||
return "Removed " + abs, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
458
tools/tools.go
458
tools/tools.go
@@ -35,7 +35,7 @@ Your current tools:
|
||||
{
|
||||
"name":"run",
|
||||
"args": ["command"],
|
||||
"when_to_use": "Main tool for file operations, shell commands, memory, git, and todo. Use run \"help\" for all commands. Examples: run \"ls -la\", run \"help\", run \"mkdir -p foo/bar\", run \"cat file.txt\", run \"view_img image.png\", run \"git status\", run \"memory store foo bar\", run \"todo create task\", run \"grep pattern file\", run \"cd /path\", run \"pwd\", run \"find . -name *.txt\", run \"file image.png\", run \"head file\", run \"tail file\", run \"wc -l file\", run \"sort file\", run \"uniq file\", run \"sed 's/old/new/' file\", run \"echo text\", run \"go build ./...\", run \"time\", run \"stat file\", run \"cp src dst\", run \"mv src dst\", run \"rm file\""
|
||||
"when_to_use": "Main tool for file operations, shell commands, memory, git, and todo. Use run \"help\" for all commands. Examples: run \"ls -la\", run \"help\", run \"mkdir -p foo/bar\", run \"cat file.txt\", run \"write file.txt content\", run \"view_img image.png\", run \"git status\", run \"memory store foo bar\", run \"todo create task\", run \"grep pattern file\", run \"cd /path\", run \"pwd\", run \"find . -name *.txt\", run \"file image.png\", run \"head file\", run \"tail file\", run \"wc -l file\", run \"sort file\", run \"uniq file\", run \"sed 's/old/new/' file\", run \"echo text\", run \"go build ./...\", run \"time\", run \"stat file\", run \"cp src dst\", run \"mv src dst\", run \"rm file\""
|
||||
},
|
||||
{
|
||||
"name":"view_img",
|
||||
@@ -403,7 +403,7 @@ func runCmd(args map[string]string) []byte {
|
||||
case "browser":
|
||||
// browser <action> [args...] - Playwright browser automation
|
||||
return runBrowserCommand(rest, args)
|
||||
case "mkdir", "ls", "cat", "pwd", "cd", "cp", "mv", "rm", "sed", "grep", "head", "tail", "wc", "sort", "uniq", "echo", "time", "stat", "go", "find", "file":
|
||||
case "mkdir", "ls", "cat", "write", "stat", "pwd", "cd", "cp", "mv", "rm", "sed", "grep", "head", "tail", "wc", "sort", "uniq", "echo", "time", "go", "find", "file":
|
||||
// File operations and shell commands - use ExecChain which has whitelist
|
||||
return executeCommand(args)
|
||||
case "git":
|
||||
@@ -1293,467 +1293,13 @@ func summarizeChat(args map[string]string) []byte {
|
||||
return data
|
||||
}
|
||||
|
||||
// func removePlaywrightToolsFromBaseTools() {
|
||||
// playwrightToolNames := map[string]bool{
|
||||
// "pw_start": true,
|
||||
// "pw_stop": true,
|
||||
// "pw_is_running": true,
|
||||
// "pw_navigate": true,
|
||||
// "pw_click": true,
|
||||
// "pw_click_at": true,
|
||||
// "pw_fill": true,
|
||||
// "pw_extract_text": true,
|
||||
// "pw_screenshot": true,
|
||||
// "pw_screenshot_and_view": true,
|
||||
// "pw_wait_for_selector": true,
|
||||
// "pw_drag": true,
|
||||
// }
|
||||
// var filtered []models.Tool
|
||||
// for _, tool := range BaseTools {
|
||||
// if !playwrightToolNames[tool.Function.Name] {
|
||||
// filtered = append(filtered, tool)
|
||||
// }
|
||||
// }
|
||||
// BaseTools = filtered
|
||||
// delete(FnMap, "pw_start")
|
||||
// delete(FnMap, "pw_stop")
|
||||
// delete(FnMap, "pw_is_running")
|
||||
// delete(FnMap, "pw_navigate")
|
||||
// delete(FnMap, "pw_click")
|
||||
// delete(FnMap, "pw_click_at")
|
||||
// delete(FnMap, "pw_fill")
|
||||
// delete(FnMap, "pw_extract_text")
|
||||
// delete(FnMap, "pw_screenshot")
|
||||
// delete(FnMap, "pw_screenshot_and_view")
|
||||
// delete(FnMap, "pw_wait_for_selector")
|
||||
// delete(FnMap, "pw_drag")
|
||||
// }
|
||||
|
||||
// func (t *Tools) RegisterWindowTools(modelHasVision bool) {
|
||||
// removeWindowToolsFromBaseTools()
|
||||
// if t.WindowToolsAvailable {
|
||||
// FnMap["list_windows"] = listWindows
|
||||
// FnMap["capture_window"] = captureWindow
|
||||
// windowTools := []models.Tool{
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "list_windows",
|
||||
// Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "capture_window",
|
||||
// Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"window"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "window": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "window ID or window name (partial match)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
// if modelHasVision {
|
||||
// FnMap["capture_window_and_view"] = captureWindowAndView
|
||||
// windowTools = append(windowTools, models.Tool{
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "capture_window_and_view",
|
||||
// Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"window"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "window": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "window ID or window name (partial match)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// })
|
||||
// }
|
||||
// BaseTools = append(BaseTools, windowTools...)
|
||||
// ToolSysMsg += windowToolSysMsg
|
||||
// }
|
||||
// }
|
||||
|
||||
// for pw agentA
|
||||
// var browserAgentSysPrompt = `You are an autonomous browser automation agent. Your goal is to complete the user's task by intelligently using browser automation
|
||||
|
||||
// Important: The browser may already be running from a previous task! Always check pw_is_running first before starting a new browser.
|
||||
|
||||
// Available tools:
|
||||
// - pw_start: Start browser (only if not already running)
|
||||
// - pw_stop: Stop browser (only when you're truly done and browser is no longer needed)
|
||||
// - pw_is_running: Check if browser is running
|
||||
// - pw_navigate: Go to a URL
|
||||
// - pw_click: Click an element by CSS selector
|
||||
// - pw_fill: Type text into an input
|
||||
// - pw_extract_text: Get text from page/element
|
||||
// - pw_screenshot: Take a screenshot (returns file path)
|
||||
// - pw_screenshot_and_view: Take screenshot with image for viewing
|
||||
// - pw_wait_for_selector: Wait for element to appear
|
||||
// - pw_drag: Drag mouse from one point to another
|
||||
// - pw_click_at: Click at X,Y coordinates
|
||||
// - pw_get_html: Get HTML content
|
||||
// - pw_get_dom: Get structured DOM tree
|
||||
// - pw_search_elements: Search for elements by text or selector
|
||||
|
||||
// Workflow:
|
||||
// 1. First, check if browser is already running (pw_is_running)
|
||||
// 2. Only start browser if not already running (pw_start)
|
||||
// 3. Navigate to required pages (pw_navigate)
|
||||
// 4. Interact with elements as needed (click, fill, etc.)
|
||||
// 5. Extract information or take screenshots as requested
|
||||
// 6. IMPORTANT: Do NOT stop the browser when done! Leave it running so the user can continue interacting with the page in subsequent requests.
|
||||
|
||||
// Always provide clear feedback about what you're doing and what you found.`
|
||||
|
||||
// func (t *Tools) runBrowserAgent(args map[string]string) []byte {
|
||||
// task, ok := args["task"]
|
||||
// if !ok || task == "" {
|
||||
// return []byte(`{"error": "task argument is required"}`)
|
||||
// }
|
||||
// client := t.GetWebAgentClient()
|
||||
// pwAgent := agent.NewPWAgent(client, browserAgentSysPrompt)
|
||||
// pwAgent.SetTools(agent.GetPWTools())
|
||||
// return pwAgent.ProcessTask(task)
|
||||
// }
|
||||
|
||||
// func registerPlaywrightTools() {
|
||||
// removePlaywrightToolsFromBaseTools()
|
||||
// if cfg != nil && cfg.PlaywrightEnabled {
|
||||
// FnMap["pw_start"] = pwStart
|
||||
// FnMap["pw_stop"] = pwStop
|
||||
// FnMap["pw_is_running"] = pwIsRunning
|
||||
// FnMap["pw_navigate"] = pwNavigate
|
||||
// FnMap["pw_click"] = pwClick
|
||||
// FnMap["pw_click_at"] = pwClickAt
|
||||
// FnMap["pw_fill"] = pwFill
|
||||
// FnMap["pw_extract_text"] = pwExtractText
|
||||
// FnMap["pw_screenshot"] = pwScreenshot
|
||||
// FnMap["pw_screenshot_and_view"] = pwScreenshotAndView
|
||||
// FnMap["pw_wait_for_selector"] = pwWaitForSelector
|
||||
// FnMap["pw_drag"] = pwDrag
|
||||
// FnMap["pw_get_html"] = pwGetHTML
|
||||
// FnMap["pw_get_dom"] = pwGetDOM
|
||||
// FnMap["pw_search_elements"] = pwSearchElements
|
||||
// playwrightTools := []models.Tool{
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_start",
|
||||
// Description: "Start a Playwright browser instance. Call this first before using other pw_ Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_stop",
|
||||
// Description: "Stop the Playwright browser instance. Call when done with browser automation.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_is_running",
|
||||
// Description: "Check if Playwright browser is currently running.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_navigate",
|
||||
// Description: "Navigate to a URL in the browser.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"url"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "url": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "URL to navigate to",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_click",
|
||||
// Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"selector"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "CSS selector for the element to click",
|
||||
// },
|
||||
// "index": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional index for multiple matches (default 0)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_fill",
|
||||
// Description: "Fill an input field with text using CSS selector.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"selector", "text"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "CSS selector for the input element",
|
||||
// },
|
||||
// "text": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "text to fill into the input",
|
||||
// },
|
||||
// "index": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional index for multiple matches (default 0)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_extract_text",
|
||||
// Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"selector"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "CSS selector (use 'body' for all page text)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_screenshot",
|
||||
// Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional CSS selector for element to screenshot",
|
||||
// },
|
||||
// "full_page": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional: 'true' to capture full page (default false)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_screenshot_and_view",
|
||||
// Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional CSS selector for element to screenshot",
|
||||
// },
|
||||
// "full_page": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional: 'true' to capture full page (default false)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_wait_for_selector",
|
||||
// Description: "Wait for an element to appear on the page.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"selector"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "CSS selector to wait for",
|
||||
// },
|
||||
// "timeout": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional timeout in ms (default 30000)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_drag",
|
||||
// Description: "Drag the mouse from one point to another.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"x1", "y1", "x2", "y2"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "x1": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "starting X coordinate",
|
||||
// },
|
||||
// "y1": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "starting Y coordinate",
|
||||
// },
|
||||
// "x2": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "ending X coordinate",
|
||||
// },
|
||||
// "y2": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "ending Y coordinate",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_get_html",
|
||||
// Description: "Get the HTML content of the page or a specific element.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional CSS selector (default: body)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_get_dom",
|
||||
// Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "optional CSS selector (default: body)",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "pw_search_elements",
|
||||
// Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "text": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "text to search for in elements",
|
||||
// },
|
||||
// "selector": models.ToolArgProps{
|
||||
// Type: "string",
|
||||
// Description: "CSS selector to search for",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
// BaseTools = append(BaseTools, playwrightTools...)
|
||||
// ToolSysMsg += browserToolSysMsg
|
||||
// agent.RegisterPWTool("pw_start", pwStart)
|
||||
// agent.RegisterPWTool("pw_stop", pwStop)
|
||||
// agent.RegisterPWTool("pw_is_running", pwIsRunning)
|
||||
// agent.RegisterPWTool("pw_navigate", pwNavigate)
|
||||
// agent.RegisterPWTool("pw_click", pwClick)
|
||||
// agent.RegisterPWTool("pw_click_at", pwClickAt)
|
||||
// agent.RegisterPWTool("pw_fill", pwFill)
|
||||
// agent.RegisterPWTool("pw_extract_text", pwExtractText)
|
||||
// agent.RegisterPWTool("pw_screenshot", pwScreenshot)
|
||||
// agent.RegisterPWTool("pw_screenshot_and_view", pwScreenshotAndView)
|
||||
// agent.RegisterPWTool("pw_wait_for_selector", pwWaitForSelector)
|
||||
// agent.RegisterPWTool("pw_drag", pwDrag)
|
||||
// agent.RegisterPWTool("pw_get_html", pwGetHTML)
|
||||
// agent.RegisterPWTool("pw_get_dom", pwGetDOM)
|
||||
// agent.RegisterPWTool("pw_search_elements", pwSearchElements)
|
||||
// browserAgentTool := []models.Tool{
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: models.ToolFunc{
|
||||
// Name: "browser_agent",
|
||||
// Description: "Autonomous browser automation agent. Use for complex multi-step browser tasks like 'go to website, login, and take screenshot'. The agent will plan and execute steps automatically using browser ",
|
||||
// Parameters: models.ToolFuncParams{
|
||||
// Type: "object",
|
||||
// Required: []string{"task"},
|
||||
// Properties: map[string]models.ToolArgProps{
|
||||
// "task": {Type: "string", Description: "The task to accomplish, e.g., 'go to github.com and take a screenshot of the homepage'"},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
// BaseTools = append(BaseTools, browserAgentTool...)
|
||||
// FnMap["browser_agent"] = tooler.runBrowserAgent
|
||||
// }
|
||||
// }
|
||||
|
||||
func CallToolWithAgent(name string, args map[string]string) ([]byte, bool) {
|
||||
f, ok := FnMap[name]
|
||||
if !ok {
|
||||
|
||||
Reference in New Issue
Block a user