diff --git a/config.example.toml b/config.example.toml index 8893345..7df35d9 100644 --- a/config.example.toml +++ b/config.example.toml @@ -56,3 +56,6 @@ StripThinkingFromAPI = true # Strip blocks from messages before sending # Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled) # Models that support reasoning will include thinking content wrapped in tags ReasoningEffort = "medium" +# playwright +NoPlaywright = false +PlaywrightDebug = false diff --git a/config/config.go b/config/config.go index 217c32b..e812fb8 100644 --- a/config/config.go +++ b/config/config.go @@ -71,8 +71,8 @@ type Config struct { CharSpecificContextTag string `toml:"CharSpecificContextTag"` AutoTurn bool `toml:"AutoTurn"` // playwright browser - NoPlaywright bool `toml:"NoPlaywright"` // when we want to avoid pw tool use - PlaywrightHeadless bool `toml:"PlaywrightHeadless"` + NoPlaywright bool `toml:"NoPlaywright"` // when we want to avoid pw tool use + PlaywrightDebug bool `toml:"PlaywrightDebug"` // !headless } func LoadConfig(fn string) (*Config, error) { diff --git a/tools.go b/tools.go index 87956ef..5683a90 100644 --- a/tools.go +++ b/tools.go @@ -1493,6 +1493,9 @@ func registerPlaywrightTools() { fnMap["pw_screenshot_and_view"] = pwScreenshotAndView fnMap["pw_wait_for_selector"] = pwWaitForSelector fnMap["pw_drag"] = pwDrag + fnMap["pw_get_html"] = pwGetHTML + fnMap["pw_get_dom"] = pwGetDOM + fnMap["pw_search_elements"] = pwSearchElements playwrightTools := []models.Tool{ { Type: "function", @@ -1702,6 +1705,61 @@ func registerPlaywrightTools() { }, }, }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_get_html", + Description: "Get the HTML content of the page or a specific element.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "optional CSS selector (default: body)", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_get_dom", + Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "optional CSS selector (default: body)", + }, + }, + }, + }, + }, + { + Type: "function", + Function: models.ToolFunc{ + Name: "pw_search_elements", + Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{ + "text": models.ToolArgProps{ + Type: "string", + Description: "text to search for in elements", + }, + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector to search for", + }, + }, + }, + }, + }, } baseTools = append(baseTools, playwrightTools...) toolSysMsg += browserToolSysMsg diff --git a/tools_playwright.go b/tools_playwright.go index 74a8e41..cd36b60 100644 --- a/tools_playwright.go +++ b/tools_playwright.go @@ -69,6 +69,21 @@ Additional browser automation tools (Playwright): "name": "pw_drag", "args": ["x1", "y1", "x2", "y2"], "when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)" +}, +{ + "name": "pw_get_html", + "args": ["selector"], + "when_to_use": "get the HTML content of the page or a specific element. Use when you need to understand page structure or extract HTML." +}, +{ + "name": "pw_get_dom", + "args": ["selector"], + "when_to_use": "get a structured DOM representation with tag, attributes, text, and children. Use when you need a readable tree view of page elements." +}, +{ + "name": "pw_search_elements", + "args": ["text", "selector"], + "when_to_use": "search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML." } ] ` @@ -106,10 +121,9 @@ func pwStart(args map[string]string) []byte { if browserStarted { return []byte(`{"error": "Browser already started"}`) } - headless := cfg == nil || cfg.PlaywrightHeadless var err error browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{ - Headless: playwright.Bool(headless), + Headless: playwright.Bool(!cfg.PlaywrightDebug), }) if err != nil { return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error())) @@ -427,3 +441,178 @@ func pwDrag(args map[string]string) []byte { } return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2)) } + +func pwGetHTML(args map[string]string) []byte { + selector := args["selector"] + if selector == "" { + selector = "body" + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + if count == 0 { + return []byte(`{"error": "No elements found"}`) + } + html, err := locator.First().InnerHTML() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html))) +} + +type DOMElement struct { + Tag string `json:"tag,omitempty"` + Attributes map[string]string `json:"attributes,omitempty"` + Text string `json:"text,omitempty"` + Children []DOMElement `json:"children,omitempty"` + Selector string `json:"selector,omitempty"` + InnerHTML string `json:"innerHTML,omitempty"` +} + +func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) { + var results []DOMElement + count, err := locator.Count() + if err != nil { + return nil, err + } + for i := 0; i < count; i++ { + el := locator.Nth(i) + dom, err := elementToDOM(el) + if err != nil { + continue + } + results = append(results, dom) + } + return results, nil +} + +func elementToDOM(el playwright.Locator) (DOMElement, error) { + dom := DOMElement{} + + tag, err := el.Evaluate(`el => el.nodeName`, nil) + if err == nil { + dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag)) + } + + attributes := make(map[string]string) + attrs, err := el.Evaluate(`el => { + let attrs = {}; + for (let i = 0; i < el.attributes.length; i++) { + let attr = el.attributes[i]; + attrs[attr.name] = attr.value; + } + return attrs; + }`, nil) + if err == nil { + if amap, ok := attrs.(map[string]any); ok { + for k, v := range amap { + if vs, ok := v.(string); ok { + attributes[k] = vs + } + } + } + } + if len(attributes) > 0 { + dom.Attributes = attributes + } + + text, err := el.TextContent() + if err == nil && text != "" { + dom.Text = text + } + + innerHTML, err := el.InnerHTML() + if err == nil && innerHTML != "" { + dom.InnerHTML = innerHTML + } + + childCount, _ := el.Count() + if childCount > 0 { + childrenLocator := el.Locator("*") + children, err := buildDOMTree(childrenLocator) + if err == nil && len(children) > 0 { + dom.Children = children + } + } + + return dom, nil +} + +func pwGetDOM(args map[string]string) []byte { + selector := args["selector"] + if selector == "" { + selector = "body" + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + if count == 0 { + return []byte(`{"error": "No elements found"}`) + } + dom, err := elementToDOM(locator.First()) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error())) + } + data, err := json.Marshal(dom) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"dom": %s}`, string(data))) +} + +func pwSearchElements(args map[string]string) []byte { + text := args["text"] + selector := args["selector"] + if text == "" && selector == "" { + return []byte(`{"error": "text or selector not provided"}`) + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + var locator playwright.Locator + if text != "" { + locator = page.GetByText(text) + } else { + locator = page.Locator(selector) + } + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error())) + } + if count == 0 { + return []byte(`{"elements": []}`) + } + var results []map[string]string + for i := 0; i < count; i++ { + el := locator.Nth(i) + tag, _ := el.Evaluate(`el => el.nodeName`, nil) + text, _ := el.TextContent() + html, _ := el.InnerHTML() + results = append(results, map[string]string{ + "index": fmt.Sprintf("%d", i), + "tag": strings.ToLower(fmt.Sprintf("%v", tag)), + "text": text, + "html": html, + }) + } + data, err := json.Marshal(results) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"elements": %s}`, string(data))) +} + +func jsonString(s string) string { + b, _ := json.Marshal(s) + return string(b) +}