diff --git a/bot.go b/bot.go index 5ae215f..56a5318 100644 --- a/bot.go +++ b/bot.go @@ -1416,7 +1416,7 @@ func updateModelLists() { chatBody.Model = m cachedModelColor = "green" updateStatusLine() - UpdateToolCapabilities() + updateToolCapabilities() app.Draw() return } @@ -1530,6 +1530,23 @@ func init() { if cfg.STT_ENABLED { asr = NewSTT(logger, cfg) } + if !cfg.NoPlaywright { + if err := checkPlaywright(); err != nil { + // slow, need a faster check if playwright install + if err := installPW(); err != nil { + logger.Error("failed to install playwright", "error", err) + cancel() + os.Exit(1) + return + } + if err := checkPlaywright(); err != nil { + logger.Error("failed to run playwright", "error", err) + cancel() + os.Exit(1) + return + } + } + } // Initialize scrollToEndEnabled based on config scrollToEndEnabled = cfg.AutoScrollEnabled go updateModelLists() diff --git a/config/config.go b/config/config.go index 7aed1c7..217c32b 100644 --- a/config/config.go +++ b/config/config.go @@ -71,6 +71,7 @@ type Config struct { CharSpecificContextTag string `toml:"CharSpecificContextTag"` AutoTurn bool `toml:"AutoTurn"` // playwright browser + NoPlaywright bool `toml:"NoPlaywright"` // when we want to avoid pw tool use PlaywrightHeadless bool `toml:"PlaywrightHeadless"` } diff --git a/popups.go b/popups.go index aa08e5e..87359e8 100644 --- a/popups.go +++ b/popups.go @@ -143,7 +143,7 @@ func showAPILinkSelectionPopup() { apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) { // Update the API in config cfg.CurrentAPI = mainText - UpdateToolCapabilities() + updateToolCapabilities() // Update model list based on new API // Helper function to get model list for a given API (same as in props_table.go) getModelListForAPI := func(api string) []string { @@ -163,7 +163,7 @@ func showAPILinkSelectionPopup() { if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) { chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark) cfg.CurrentModel = chatBody.Model - UpdateToolCapabilities() + updateToolCapabilities() } pages.RemovePage("apiLinkSelectionPopup") app.SetFocus(textArea) diff --git a/tools.go b/tools.go index 4c3bad6..87956ef 100644 --- a/tools.go +++ b/tools.go @@ -234,7 +234,7 @@ func checkWindowTools() { } } -func UpdateToolCapabilities() { +func updateToolCapabilities() { if !cfg.ToolUse { return } @@ -242,6 +242,7 @@ func UpdateToolCapabilities() { if cfg == nil || cfg.CurrentAPI == "" { logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil") registerWindowTools() + registerPlaywrightTools() return } prevHasVision := modelHasVision @@ -255,6 +256,7 @@ func UpdateToolCapabilities() { } } registerWindowTools() + registerPlaywrightTools() } // getWebAgentClient returns a singleton AgentClient for web agents. @@ -1362,18 +1364,6 @@ var fnMap = map[string]fnSig{ "todo_update": todoUpdate, "todo_delete": todoDelete, "summarize_chat": summarizeChat, - // playwright tools - "pw_start": pwStart, - "pw_stop": pwStop, - "pw_is_running": pwIsRunning, - "pw_navigate": pwNavigate, - "pw_click": pwClick, - "pw_fill": pwFill, - "pw_extract_text": pwExtractText, - "pw_screenshot": pwScreenshot, - "pw_screenshot_and_view": pwScreenshotAndView, - "pw_wait_for_selector": pwWaitForSelector, - "pw_drag": pwDrag, } func removeWindowToolsFromBaseTools() { @@ -1394,6 +1384,40 @@ func removeWindowToolsFromBaseTools() { delete(fnMap, "capture_window_and_view") } +func removePlaywrightToolsFromBaseTools() { + playwrightToolNames := map[string]bool{ + "pw_start": true, + "pw_stop": true, + "pw_is_running": true, + "pw_navigate": true, + "pw_click": true, + "pw_fill": true, + "pw_extract_text": true, + "pw_screenshot": true, + "pw_screenshot_and_view": true, + "pw_wait_for_selector": true, + "pw_drag": true, + } + var filtered []models.Tool + for _, tool := range baseTools { + if !playwrightToolNames[tool.Function.Name] { + filtered = append(filtered, tool) + } + } + baseTools = filtered + delete(fnMap, "pw_start") + delete(fnMap, "pw_stop") + delete(fnMap, "pw_is_running") + delete(fnMap, "pw_navigate") + delete(fnMap, "pw_click") + delete(fnMap, "pw_fill") + delete(fnMap, "pw_extract_text") + delete(fnMap, "pw_screenshot") + delete(fnMap, "pw_screenshot_and_view") + delete(fnMap, "pw_wait_for_selector") + delete(fnMap, "pw_drag") +} + func registerWindowTools() { removeWindowToolsFromBaseTools() if windowToolsAvailable { @@ -1455,6 +1479,235 @@ func registerWindowTools() { } } +func registerPlaywrightTools() { + removePlaywrightToolsFromBaseTools() + if cfg != nil && !cfg.NoPlaywright { + fnMap["pw_start"] = pwStart + fnMap["pw_stop"] = pwStop + fnMap["pw_is_running"] = pwIsRunning + fnMap["pw_navigate"] = pwNavigate + fnMap["pw_click"] = pwClick + fnMap["pw_fill"] = pwFill + fnMap["pw_extract_text"] = pwExtractText + fnMap["pw_screenshot"] = pwScreenshot + fnMap["pw_screenshot_and_view"] = pwScreenshotAndView + fnMap["pw_wait_for_selector"] = pwWaitForSelector + fnMap["pw_drag"] = pwDrag + playwrightTools := []models.Tool{ + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_start", + Description: "Start a Playwright browser instance. Call this first before using other pw_ tools. Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{}, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_stop", + Description: "Stop the Playwright browser instance. Call when done with browser automation.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{}, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_is_running", + Description: "Check if Playwright browser is currently running.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{}, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_navigate", + Description: "Navigate to a URL in the browser.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"url"}, + Properties: map[string]models.ToolArgProps{ + "url": models.ToolArgProps{ + Type: "string", + Description: "URL to navigate to", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_click", + Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"selector"}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector for the element to click", + }, + "index": models.ToolArgProps{ + Type: "string", + Description: "optional index for multiple matches (default 0)", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_fill", + Description: "Fill an input field with text using CSS selector.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"selector", "text"}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector for the input element", + }, + "text": models.ToolArgProps{ + Type: "string", + Description: "text to fill into the input", + }, + "index": models.ToolArgProps{ + Type: "string", + Description: "optional index for multiple matches (default 0)", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_extract_text", + Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"selector"}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector (use 'body' for all page text)", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_screenshot", + Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "optional CSS selector for element to screenshot", + }, + "full_page": models.ToolArgProps{ + Type: "string", + Description: "optional: 'true' to capture full page (default false)", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_screenshot_and_view", + Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "optional CSS selector for element to screenshot", + }, + "full_page": models.ToolArgProps{ + Type: "string", + Description: "optional: 'true' to capture full page (default false)", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_wait_for_selector", + Description: "Wait for an element to appear on the page.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"selector"}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector to wait for", + }, + "timeout": models.ToolArgProps{ + Type: "string", + Description: "optional timeout in ms (default 30000)", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_drag", + Description: "Drag the mouse from one point to another.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"x1", "y1", "x2", "y2"}, + Properties: map[string]models.ToolArgProps{ + "x1": models.ToolArgProps{ + Type: "string", + Description: "starting X coordinate", + }, + "y1": models.ToolArgProps{ + Type: "string", + Description: "starting Y coordinate", + }, + "x2": models.ToolArgProps{ + Type: "string", + Description: "ending X coordinate", + }, + "y2": models.ToolArgProps{ + Type: "string", + Description: "ending Y coordinate", + }, + }, + }, + }, + }, + } + baseTools = append(baseTools, playwrightTools...) + toolSysMsg += browserToolSysMsg + } +} + // callToolWithAgent calls the tool and applies any registered agent. func callToolWithAgent(name string, args map[string]string) []byte { registerWebAgents() @@ -1989,217 +2242,4 @@ func init() { }, ) } - if browserAvailable { - baseTools = append(baseTools, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_start", - Description: "Start a Playwright browser instance. Call this first before using other pw_ tools. Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{}, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_stop", - Description: "Stop the Playwright browser instance. Call when done with browser automation.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{}, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_is_running", - Description: "Check if Playwright browser is currently running.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{}, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_navigate", - Description: "Navigate to a URL in the browser.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"url"}, - Properties: map[string]models.ToolArgProps{ - "url": models.ToolArgProps{ - Type: "string", - Description: "URL to navigate to", - }, - }, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_click", - Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"selector"}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector for the element to click", - }, - "index": models.ToolArgProps{ - Type: "string", - Description: "optional index for multiple matches (default 0)", - }, - }, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_fill", - Description: "Fill an input field with text using CSS selector.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"selector", "text"}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector for the input element", - }, - "text": models.ToolArgProps{ - Type: "string", - Description: "text to fill into the input", - }, - "index": models.ToolArgProps{ - Type: "string", - Description: "optional index for multiple matches (default 0)", - }, - }, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_extract_text", - Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"selector"}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector (use 'body' for all page text)", - }, - }, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_screenshot", - Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "optional CSS selector for element to screenshot", - }, - "full_page": models.ToolArgProps{ - Type: "string", - Description: "optional: 'true' to capture full page (default false)", - }, - }, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_screenshot_and_view", - Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "optional CSS selector for element to screenshot", - }, - "full_page": models.ToolArgProps{ - Type: "string", - Description: "optional: 'true' to capture full page (default false)", - }, - }, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_wait_for_selector", - Description: "Wait for an element to appear on the page.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"selector"}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector to wait for", - }, - "timeout": models.ToolArgProps{ - Type: "string", - Description: "optional timeout in ms (default 30000)", - }, - }, - }, - }, - }, - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "pw_drag", - Description: "Drag the mouse from one point to another.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"x1", "y1", "x2", "y2"}, - Properties: map[string]models.ToolArgProps{ - "x1": models.ToolArgProps{ - Type: "string", - Description: "starting X coordinate", - }, - "y1": models.ToolArgProps{ - Type: "string", - Description: "starting Y coordinate", - }, - "x2": models.ToolArgProps{ - Type: "string", - Description: "ending X coordinate", - }, - "y2": models.ToolArgProps{ - Type: "string", - Description: "ending Y coordinate", - }, - }, - }, - }, - }, - ) - toolSysMsg += browserToolSysMsg - } } diff --git a/tools_playwright.go b/tools_playwright.go index a4917a9..74a8e41 100644 --- a/tools_playwright.go +++ b/tools_playwright.go @@ -74,23 +74,30 @@ Additional browser automation tools (Playwright): ` var ( - pw *playwright.Playwright - browser playwright.Browser - browserStarted bool - browserStartMu sync.Mutex - page playwright.Page - browserAvailable bool + pw *playwright.Playwright + browser playwright.Browser + browserStarted bool + browserStartMu sync.Mutex + page playwright.Page ) -func checkPlaywright() { +func installPW() error { + err := playwright.Install(&playwright.RunOptions{Verbose: false}) + if err != nil { + logger.Warn("playwright not available", "error", err) + return err + } + return nil +} + +func checkPlaywright() error { var err error pw, err = playwright.Run() if err != nil { logger.Warn("playwright not available", "error", err) - return + return err } - browserAvailable = true - logger.Info("playwright tools available") + return nil } func pwStart(args map[string]string) []byte { @@ -420,7 +427,3 @@ func pwDrag(args map[string]string) []byte { } return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2)) } - -func init() { - go checkPlaywright() -} diff --git a/tui.go b/tui.go index a06a45b..bdf1c9b 100644 --- a/tui.go +++ b/tui.go @@ -860,7 +860,7 @@ func init() { if event.Key() == tcell.KeyCtrlK { // add message from tools cfg.ToolUse = !cfg.ToolUse - UpdateToolCapabilities() + updateToolCapabilities() updateStatusLine() return nil }