613 lines
17 KiB
Go
613 lines
17 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"gf-lt/models"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/playwright-community/playwright-go"
|
|
)
|
|
|
|
var browserToolSysMsg = `
|
|
Additional browser automation tools (Playwright):
|
|
[
|
|
{
|
|
"name": "pw_start",
|
|
"args": [],
|
|
"when_to_use": "start a browser instance before doing any browser automation. Must be called first."
|
|
},
|
|
{
|
|
"name": "pw_stop",
|
|
"args": [],
|
|
"when_to_use": "stop the browser instance when done with automation."
|
|
},
|
|
{
|
|
"name": "pw_is_running",
|
|
"args": [],
|
|
"when_to_use": "check if browser is currently running."
|
|
},
|
|
{
|
|
"name": "pw_navigate",
|
|
"args": ["url"],
|
|
"when_to_use": "open a specific URL in the web browser."
|
|
},
|
|
{
|
|
"name": "pw_click",
|
|
"args": ["selector", "index"],
|
|
"when_to_use": "click on an element on the current webpage. Use 'index' for multiple matches (default 0)."
|
|
},
|
|
{
|
|
"name": "pw_fill",
|
|
"args": ["selector", "text", "index"],
|
|
"when_to_use": "type text into an input field. Use 'index' for multiple matches (default 0)."
|
|
},
|
|
{
|
|
"name": "pw_extract_text",
|
|
"args": ["selector"],
|
|
"when_to_use": "extract text content from the page or specific elements. Use selector 'body' for all page text."
|
|
},
|
|
{
|
|
"name": "pw_screenshot",
|
|
"args": ["selector", "full_page"],
|
|
"when_to_use": "take a screenshot of the page or a specific element. Returns a file path to the image. Use to verify actions or inspect visual state."
|
|
},
|
|
{
|
|
"name": "pw_screenshot_and_view",
|
|
"args": ["selector", "full_page"],
|
|
"when_to_use": "take a screenshot and return the image for viewing. Use to visually verify page state."
|
|
},
|
|
{
|
|
"name": "pw_wait_for_selector",
|
|
"args": ["selector", "timeout"],
|
|
"when_to_use": "wait for an element to appear on the page before proceeding with further actions."
|
|
},
|
|
{
|
|
"name": "pw_drag",
|
|
"args": ["x1", "y1", "x2", "y2"],
|
|
"when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)."
|
|
},
|
|
{
|
|
"name": "pw_get_html",
|
|
"args": ["selector"],
|
|
"when_to_use": "get the HTML content of the page or a specific element. Use to understand page structure or extract raw HTML."
|
|
},
|
|
{
|
|
"name": "pw_get_dom",
|
|
"args": ["selector"],
|
|
"when_to_use": "get a structured DOM representation with tag, attributes, text, and children. Use to inspect element hierarchy and properties."
|
|
},
|
|
{
|
|
"name": "pw_search_elements",
|
|
"args": ["text", "selector"],
|
|
"when_to_use": "search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML."
|
|
}
|
|
]
|
|
`
|
|
|
|
var (
|
|
pw *playwright.Playwright
|
|
browser playwright.Browser
|
|
browserStarted bool
|
|
browserStartMu sync.Mutex
|
|
page playwright.Page
|
|
)
|
|
|
|
func installPW() error {
|
|
err := playwright.Install(&playwright.RunOptions{Verbose: false})
|
|
if err != nil {
|
|
logger.Warn("playwright not available", "error", err)
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func checkPlaywright() error {
|
|
var err error
|
|
pw, err = playwright.Run()
|
|
if err != nil {
|
|
logger.Warn("playwright not available", "error", err)
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func pwStart(args map[string]string) []byte {
|
|
browserStartMu.Lock()
|
|
defer browserStartMu.Unlock()
|
|
if browserStarted {
|
|
return []byte(`{"error": "Browser already started"}`)
|
|
}
|
|
var err error
|
|
browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
|
|
Headless: playwright.Bool(!cfg.PlaywrightDebug),
|
|
})
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error()))
|
|
}
|
|
page, err = browser.NewPage()
|
|
if err != nil {
|
|
browser.Close()
|
|
return []byte(fmt.Sprintf(`{"error": "failed to create page: %s"}`, err.Error()))
|
|
}
|
|
browserStarted = true
|
|
return []byte(`{"success": true, "message": "Browser started"}`)
|
|
}
|
|
|
|
func pwStop(args map[string]string) []byte {
|
|
browserStartMu.Lock()
|
|
defer browserStartMu.Unlock()
|
|
if !browserStarted {
|
|
return []byte(`{"success": true, "message": "Browser was not running"}`)
|
|
}
|
|
if page != nil {
|
|
page.Close()
|
|
page = nil
|
|
}
|
|
if browser != nil {
|
|
browser.Close()
|
|
browser = nil
|
|
}
|
|
browserStarted = false
|
|
return []byte(`{"success": true, "message": "Browser stopped"}`)
|
|
}
|
|
|
|
func pwIsRunning(args map[string]string) []byte {
|
|
if browserStarted {
|
|
return []byte(`{"running": true, "message": "Browser is running"}`)
|
|
}
|
|
return []byte(`{"running": false, "message": "Browser is not running"}`)
|
|
}
|
|
|
|
func pwNavigate(args map[string]string) []byte {
|
|
url, ok := args["url"]
|
|
if !ok || url == "" {
|
|
return []byte(`{"error": "url not provided"}`)
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
_, err := page.Goto(url)
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to navigate: %s"}`, err.Error()))
|
|
}
|
|
title, _ := page.Title()
|
|
pageURL := page.URL()
|
|
return []byte(fmt.Sprintf(`{"success": true, "title": "%s", "url": "%s"}`, title, pageURL))
|
|
}
|
|
|
|
func pwClick(args map[string]string) []byte {
|
|
selector, ok := args["selector"]
|
|
if !ok || selector == "" {
|
|
return []byte(`{"error": "selector not provided"}`)
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
index := 0
|
|
if args["index"] != "" {
|
|
if i, err := strconv.Atoi(args["index"]); err != nil {
|
|
logger.Warn("failed to parse index", "value", args["index"], "error", err)
|
|
} else {
|
|
index = i
|
|
}
|
|
}
|
|
locator := page.Locator(selector)
|
|
count, err := locator.Count()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
|
|
}
|
|
if index >= count {
|
|
return []byte(fmt.Sprintf(`{"error": "Element not found at index %d (found %d elements)"}`, index, count))
|
|
}
|
|
err = locator.Nth(index).Click()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
|
|
}
|
|
return []byte(`{"success": true, "message": "Clicked element"}`)
|
|
}
|
|
|
|
func pwFill(args map[string]string) []byte {
|
|
selector, ok := args["selector"]
|
|
if !ok || selector == "" {
|
|
return []byte(`{"error": "selector not provided"}`)
|
|
}
|
|
text := args["text"]
|
|
if text == "" {
|
|
text = ""
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
index := 0
|
|
if args["index"] != "" {
|
|
if i, err := strconv.Atoi(args["index"]); err != nil {
|
|
logger.Warn("failed to parse index", "value", args["index"], "error", err)
|
|
} else {
|
|
index = i
|
|
}
|
|
}
|
|
locator := page.Locator(selector)
|
|
count, err := locator.Count()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
|
|
}
|
|
if index >= count {
|
|
return []byte(fmt.Sprintf(`{"error": "Element not found at index %d"}`, index))
|
|
}
|
|
err = locator.Nth(index).Fill(text)
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to fill: %s"}`, err.Error()))
|
|
}
|
|
return []byte(`{"success": true, "message": "Filled input"}`)
|
|
}
|
|
|
|
func pwExtractText(args map[string]string) []byte {
|
|
selector := args["selector"]
|
|
if selector == "" {
|
|
selector = "body"
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
locator := page.Locator(selector)
|
|
count, err := locator.Count()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
|
|
}
|
|
if count == 0 {
|
|
return []byte(`{"error": "No elements found"}`)
|
|
}
|
|
if selector == "body" {
|
|
text, err := page.Locator("body").TextContent()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to get text: %s"}`, err.Error()))
|
|
}
|
|
return []byte(fmt.Sprintf(`{"text": "%s"}`, text))
|
|
}
|
|
var texts []string
|
|
for i := 0; i < count; i++ {
|
|
text, err := locator.Nth(i).TextContent()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
texts = append(texts, text)
|
|
}
|
|
return []byte(fmt.Sprintf(`{"text": "%s"}`, joinLines(texts)))
|
|
}
|
|
|
|
func joinLines(lines []string) string {
|
|
var sb strings.Builder
|
|
for i, line := range lines {
|
|
if i > 0 {
|
|
sb.WriteString("\n")
|
|
}
|
|
sb.WriteString(line)
|
|
}
|
|
return sb.String()
|
|
}
|
|
|
|
func pwScreenshot(args map[string]string) []byte {
|
|
selector := args["selector"]
|
|
fullPage := args["full_page"] == "true"
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
|
|
var err error
|
|
if selector != "" && selector != "body" {
|
|
locator := page.Locator(selector)
|
|
_, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
|
|
Path: playwright.String(path),
|
|
})
|
|
} else {
|
|
_, err = page.Screenshot(playwright.PageScreenshotOptions{
|
|
Path: playwright.String(path),
|
|
FullPage: playwright.Bool(fullPage),
|
|
})
|
|
}
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
|
|
}
|
|
return []byte(fmt.Sprintf(`{"path": "%s"}`, path))
|
|
}
|
|
|
|
func pwScreenshotAndView(args map[string]string) []byte {
|
|
selector := args["selector"]
|
|
fullPage := args["full_page"] == "true"
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
|
|
var err error
|
|
if selector != "" && selector != "body" {
|
|
locator := page.Locator(selector)
|
|
_, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
|
|
Path: playwright.String(path),
|
|
})
|
|
} else {
|
|
_, err = page.Screenshot(playwright.PageScreenshotOptions{
|
|
Path: playwright.String(path),
|
|
FullPage: playwright.Bool(fullPage),
|
|
})
|
|
}
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
|
|
}
|
|
dataURL, err := models.CreateImageURLFromPath(path)
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to create image URL: %s"}`, err.Error()))
|
|
}
|
|
resp := models.MultimodalToolResp{
|
|
Type: "multimodal_content",
|
|
Parts: []map[string]string{
|
|
{"type": "text", "text": "Screenshot saved: " + path},
|
|
{"type": "image_url", "url": dataURL},
|
|
},
|
|
}
|
|
jsonResult, err := json.Marshal(resp)
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to marshal result: %s"}`, err.Error()))
|
|
}
|
|
return jsonResult
|
|
}
|
|
|
|
func pwWaitForSelector(args map[string]string) []byte {
|
|
selector, ok := args["selector"]
|
|
if !ok || selector == "" {
|
|
return []byte(`{"error": "selector not provided"}`)
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
timeout := 30000
|
|
if args["timeout"] != "" {
|
|
if t, err := strconv.Atoi(args["timeout"]); err != nil {
|
|
logger.Warn("failed to parse timeout", "value", args["timeout"], "error", err)
|
|
} else {
|
|
timeout = t
|
|
}
|
|
}
|
|
locator := page.Locator(selector)
|
|
err := locator.WaitFor(playwright.LocatorWaitForOptions{
|
|
Timeout: playwright.Float(float64(timeout)),
|
|
})
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "element not found: %s"}`, err.Error()))
|
|
}
|
|
return []byte(`{"success": true, "message": "Element found"}`)
|
|
}
|
|
|
|
func pwDrag(args map[string]string) []byte {
|
|
x1, ok := args["x1"]
|
|
if !ok {
|
|
return []byte(`{"error": "x1 not provided"}`)
|
|
}
|
|
y1, ok := args["y1"]
|
|
if !ok {
|
|
return []byte(`{"error": "y1 not provided"}`)
|
|
}
|
|
x2, ok := args["x2"]
|
|
if !ok {
|
|
return []byte(`{"error": "x2 not provided"}`)
|
|
}
|
|
y2, ok := args["y2"]
|
|
if !ok {
|
|
return []byte(`{"error": "y2 not provided"}`)
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
var fx1, fy1, fx2, fy2 float64
|
|
if parsedX1, err := strconv.ParseFloat(x1, 64); err != nil {
|
|
logger.Warn("failed to parse x1", "value", x1, "error", err)
|
|
} else {
|
|
fx1 = parsedX1
|
|
}
|
|
if parsedY1, err := strconv.ParseFloat(y1, 64); err != nil {
|
|
logger.Warn("failed to parse y1", "value", y1, "error", err)
|
|
} else {
|
|
fy1 = parsedY1
|
|
}
|
|
if parsedX2, err := strconv.ParseFloat(x2, 64); err != nil {
|
|
logger.Warn("failed to parse x2", "value", x2, "error", err)
|
|
} else {
|
|
fx2 = parsedX2
|
|
}
|
|
if parsedY2, err := strconv.ParseFloat(y2, 64); err != nil {
|
|
logger.Warn("failed to parse y2", "value", y2, "error", err)
|
|
} else {
|
|
fy2 = parsedY2
|
|
}
|
|
mouse := page.Mouse()
|
|
err := mouse.Move(fx1, fy1)
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
|
|
}
|
|
err = mouse.Down()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error()))
|
|
}
|
|
err = mouse.Move(fx2, fy2)
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
|
|
}
|
|
err = mouse.Up()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error()))
|
|
}
|
|
return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2))
|
|
}
|
|
|
|
func pwGetHTML(args map[string]string) []byte {
|
|
selector := args["selector"]
|
|
if selector == "" {
|
|
selector = "body"
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
locator := page.Locator(selector)
|
|
count, err := locator.Count()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
|
|
}
|
|
if count == 0 {
|
|
return []byte(`{"error": "No elements found"}`)
|
|
}
|
|
html, err := locator.First().InnerHTML()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error()))
|
|
}
|
|
return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html)))
|
|
}
|
|
|
|
type DOMElement struct {
|
|
Tag string `json:"tag,omitempty"`
|
|
Attributes map[string]string `json:"attributes,omitempty"`
|
|
Text string `json:"text,omitempty"`
|
|
Children []DOMElement `json:"children,omitempty"`
|
|
Selector string `json:"selector,omitempty"`
|
|
InnerHTML string `json:"innerHTML,omitempty"`
|
|
}
|
|
|
|
func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) {
|
|
var results []DOMElement
|
|
count, err := locator.Count()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for i := 0; i < count; i++ {
|
|
el := locator.Nth(i)
|
|
dom, err := elementToDOM(el)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
results = append(results, dom)
|
|
}
|
|
return results, nil
|
|
}
|
|
|
|
func elementToDOM(el playwright.Locator) (DOMElement, error) {
|
|
dom := DOMElement{}
|
|
tag, err := el.Evaluate(`el => el.nodeName`, nil)
|
|
if err == nil {
|
|
dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag))
|
|
}
|
|
attributes := make(map[string]string)
|
|
attrs, err := el.Evaluate(`el => {
|
|
let attrs = {};
|
|
for (let i = 0; i < el.attributes.length; i++) {
|
|
let attr = el.attributes[i];
|
|
attrs[attr.name] = attr.value;
|
|
}
|
|
return attrs;
|
|
}`, nil)
|
|
if err == nil {
|
|
if amap, ok := attrs.(map[string]any); ok {
|
|
for k, v := range amap {
|
|
if vs, ok := v.(string); ok {
|
|
attributes[k] = vs
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if len(attributes) > 0 {
|
|
dom.Attributes = attributes
|
|
}
|
|
text, err := el.TextContent()
|
|
if err == nil && text != "" {
|
|
dom.Text = text
|
|
}
|
|
innerHTML, err := el.InnerHTML()
|
|
if err == nil && innerHTML != "" {
|
|
dom.InnerHTML = innerHTML
|
|
}
|
|
childCount, _ := el.Count()
|
|
if childCount > 0 {
|
|
childrenLocator := el.Locator("*")
|
|
children, err := buildDOMTree(childrenLocator)
|
|
if err == nil && len(children) > 0 {
|
|
dom.Children = children
|
|
}
|
|
}
|
|
return dom, nil
|
|
}
|
|
|
|
func pwGetDOM(args map[string]string) []byte {
|
|
selector := args["selector"]
|
|
if selector == "" {
|
|
selector = "body"
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
locator := page.Locator(selector)
|
|
count, err := locator.Count()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
|
|
}
|
|
if count == 0 {
|
|
return []byte(`{"error": "No elements found"}`)
|
|
}
|
|
dom, err := elementToDOM(locator.First())
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error()))
|
|
}
|
|
data, err := json.Marshal(dom)
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error()))
|
|
}
|
|
return []byte(fmt.Sprintf(`{"dom": %s}`, string(data)))
|
|
}
|
|
|
|
func pwSearchElements(args map[string]string) []byte {
|
|
text := args["text"]
|
|
selector := args["selector"]
|
|
if text == "" && selector == "" {
|
|
return []byte(`{"error": "text or selector not provided"}`)
|
|
}
|
|
if !browserStarted || page == nil {
|
|
return []byte(`{"error": "Browser not started. Call pw_start first."}`)
|
|
}
|
|
var locator playwright.Locator
|
|
if text != "" {
|
|
locator = page.GetByText(text)
|
|
} else {
|
|
locator = page.Locator(selector)
|
|
}
|
|
count, err := locator.Count()
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error()))
|
|
}
|
|
if count == 0 {
|
|
return []byte(`{"elements": []}`)
|
|
}
|
|
var results []map[string]string
|
|
for i := 0; i < count; i++ {
|
|
el := locator.Nth(i)
|
|
tag, _ := el.Evaluate(`el => el.nodeName`, nil)
|
|
text, _ := el.TextContent()
|
|
html, _ := el.InnerHTML()
|
|
results = append(results, map[string]string{
|
|
"index": strconv.Itoa(i),
|
|
"tag": strings.ToLower(fmt.Sprintf("%v", tag)),
|
|
"text": text,
|
|
"html": html,
|
|
})
|
|
}
|
|
data, err := json.Marshal(results)
|
|
if err != nil {
|
|
return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error()))
|
|
}
|
|
return []byte(fmt.Sprintf(`{"elements": %s}`, string(data)))
|
|
}
|
|
|
|
func jsonString(s string) string {
|
|
b, _ := json.Marshal(s)
|
|
return string(b)
|
|
}
|