Enha: palywright dom and elements fetching

This commit is contained in:
Grail Finder
2026-03-03 09:27:05 +03:00
parent 0c9c590d8f
commit 93ecfc8a34
4 changed files with 254 additions and 4 deletions

View File

@@ -56,3 +56,6 @@ StripThinkingFromAPI = true # Strip <think> blocks from messages before sending
# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled) # Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
# Models that support reasoning will include thinking content wrapped in <think> tags # Models that support reasoning will include thinking content wrapped in <think> tags
ReasoningEffort = "medium" ReasoningEffort = "medium"
# playwright
NoPlaywright = false
PlaywrightDebug = false

View File

@@ -71,8 +71,8 @@ type Config struct {
CharSpecificContextTag string `toml:"CharSpecificContextTag"` CharSpecificContextTag string `toml:"CharSpecificContextTag"`
AutoTurn bool `toml:"AutoTurn"` AutoTurn bool `toml:"AutoTurn"`
// playwright browser // playwright browser
NoPlaywright bool `toml:"NoPlaywright"` // when we want to avoid pw tool use NoPlaywright bool `toml:"NoPlaywright"` // when we want to avoid pw tool use
PlaywrightHeadless bool `toml:"PlaywrightHeadless"` PlaywrightDebug bool `toml:"PlaywrightDebug"` // !headless
} }
func LoadConfig(fn string) (*Config, error) { func LoadConfig(fn string) (*Config, error) {

View File

@@ -1493,6 +1493,9 @@ func registerPlaywrightTools() {
fnMap["pw_screenshot_and_view"] = pwScreenshotAndView fnMap["pw_screenshot_and_view"] = pwScreenshotAndView
fnMap["pw_wait_for_selector"] = pwWaitForSelector fnMap["pw_wait_for_selector"] = pwWaitForSelector
fnMap["pw_drag"] = pwDrag fnMap["pw_drag"] = pwDrag
fnMap["pw_get_html"] = pwGetHTML
fnMap["pw_get_dom"] = pwGetDOM
fnMap["pw_search_elements"] = pwSearchElements
playwrightTools := []models.Tool{ playwrightTools := []models.Tool{
{ {
Type: "function", Type: "function",
@@ -1702,6 +1705,61 @@ func registerPlaywrightTools() {
}, },
}, },
}, },
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_get_html",
Description: "Get the HTML content of the page or a specific element.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector (default: body)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_get_dom",
Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"selector": models.ToolArgProps{
Type: "string",
Description: "optional CSS selector (default: body)",
},
},
},
},
},
{
Type: "function",
Function: models.ToolFunc{
Name: "pw_search_elements",
Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{},
Properties: map[string]models.ToolArgProps{
"text": models.ToolArgProps{
Type: "string",
Description: "text to search for in elements",
},
"selector": models.ToolArgProps{
Type: "string",
Description: "CSS selector to search for",
},
},
},
},
},
} }
baseTools = append(baseTools, playwrightTools...) baseTools = append(baseTools, playwrightTools...)
toolSysMsg += browserToolSysMsg toolSysMsg += browserToolSysMsg

View File

@@ -69,6 +69,21 @@ Additional browser automation tools (Playwright):
"name": "pw_drag", "name": "pw_drag",
"args": ["x1", "y1", "x2", "y2"], "args": ["x1", "y1", "x2", "y2"],
"when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)" "when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)"
},
{
"name": "pw_get_html",
"args": ["selector"],
"when_to_use": "get the HTML content of the page or a specific element. Use when you need to understand page structure or extract HTML."
},
{
"name": "pw_get_dom",
"args": ["selector"],
"when_to_use": "get a structured DOM representation with tag, attributes, text, and children. Use when you need a readable tree view of page elements."
},
{
"name": "pw_search_elements",
"args": ["text", "selector"],
"when_to_use": "search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML."
} }
] ]
` `
@@ -106,10 +121,9 @@ func pwStart(args map[string]string) []byte {
if browserStarted { if browserStarted {
return []byte(`{"error": "Browser already started"}`) return []byte(`{"error": "Browser already started"}`)
} }
headless := cfg == nil || cfg.PlaywrightHeadless
var err error var err error
browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{ browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(headless), Headless: playwright.Bool(!cfg.PlaywrightDebug),
}) })
if err != nil { if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error())) return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error()))
@@ -427,3 +441,178 @@ func pwDrag(args map[string]string) []byte {
} }
return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2)) return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2))
} }
func pwGetHTML(args map[string]string) []byte {
selector := args["selector"]
if selector == "" {
selector = "body"
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"error": "No elements found"}`)
}
html, err := locator.First().InnerHTML()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html)))
}
type DOMElement struct {
Tag string `json:"tag,omitempty"`
Attributes map[string]string `json:"attributes,omitempty"`
Text string `json:"text,omitempty"`
Children []DOMElement `json:"children,omitempty"`
Selector string `json:"selector,omitempty"`
InnerHTML string `json:"innerHTML,omitempty"`
}
func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) {
var results []DOMElement
count, err := locator.Count()
if err != nil {
return nil, err
}
for i := 0; i < count; i++ {
el := locator.Nth(i)
dom, err := elementToDOM(el)
if err != nil {
continue
}
results = append(results, dom)
}
return results, nil
}
func elementToDOM(el playwright.Locator) (DOMElement, error) {
dom := DOMElement{}
tag, err := el.Evaluate(`el => el.nodeName`, nil)
if err == nil {
dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag))
}
attributes := make(map[string]string)
attrs, err := el.Evaluate(`el => {
let attrs = {};
for (let i = 0; i < el.attributes.length; i++) {
let attr = el.attributes[i];
attrs[attr.name] = attr.value;
}
return attrs;
}`, nil)
if err == nil {
if amap, ok := attrs.(map[string]any); ok {
for k, v := range amap {
if vs, ok := v.(string); ok {
attributes[k] = vs
}
}
}
}
if len(attributes) > 0 {
dom.Attributes = attributes
}
text, err := el.TextContent()
if err == nil && text != "" {
dom.Text = text
}
innerHTML, err := el.InnerHTML()
if err == nil && innerHTML != "" {
dom.InnerHTML = innerHTML
}
childCount, _ := el.Count()
if childCount > 0 {
childrenLocator := el.Locator("*")
children, err := buildDOMTree(childrenLocator)
if err == nil && len(children) > 0 {
dom.Children = children
}
}
return dom, nil
}
func pwGetDOM(args map[string]string) []byte {
selector := args["selector"]
if selector == "" {
selector = "body"
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
locator := page.Locator(selector)
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"error": "No elements found"}`)
}
dom, err := elementToDOM(locator.First())
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error()))
}
data, err := json.Marshal(dom)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"dom": %s}`, string(data)))
}
func pwSearchElements(args map[string]string) []byte {
text := args["text"]
selector := args["selector"]
if text == "" && selector == "" {
return []byte(`{"error": "text or selector not provided"}`)
}
if !browserStarted || page == nil {
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
}
var locator playwright.Locator
if text != "" {
locator = page.GetByText(text)
} else {
locator = page.Locator(selector)
}
count, err := locator.Count()
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error()))
}
if count == 0 {
return []byte(`{"elements": []}`)
}
var results []map[string]string
for i := 0; i < count; i++ {
el := locator.Nth(i)
tag, _ := el.Evaluate(`el => el.nodeName`, nil)
text, _ := el.TextContent()
html, _ := el.InnerHTML()
results = append(results, map[string]string{
"index": fmt.Sprintf("%d", i),
"tag": strings.ToLower(fmt.Sprintf("%v", tag)),
"text": text,
"html": html,
})
}
data, err := json.Marshal(results)
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error()))
}
return []byte(fmt.Sprintf(`{"elements": %s}`, string(data)))
}
func jsonString(s string) string {
b, _ := json.Marshal(s)
return string(b)
}