From cad1bd46c191f811729ab1cd5f044931c7d10269 Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Mon, 2 Mar 2026 19:20:54 +0300 Subject: [PATCH] Feat: playwright tools --- browser.go | 401 ++++++++++++++++++++++++++++++++++++++++++++ config/config.go | 2 + go.mod | 4 + go.sum | 14 ++ main.go | 2 +- tools.go | 225 +++++++++++++++++++++++++ tools_playwright.go | 62 +++++++ 7 files changed, 709 insertions(+), 1 deletion(-) create mode 100644 browser.go create mode 100644 tools_playwright.go diff --git a/browser.go b/browser.go new file mode 100644 index 0000000..1ff0408 --- /dev/null +++ b/browser.go @@ -0,0 +1,401 @@ +package main + +import ( + "encoding/json" + "fmt" + "log/slog" + "os" + "sync" + + "github.com/playwright-community/playwright-go" + + "gf-lt/models" +) + +var ( + browserLogger *slog.Logger + pw *playwright.Playwright + browser playwright.Browser + browserStarted bool + browserStartMu sync.Mutex + page playwright.Page + browserAvailable bool +) + +func checkPlaywright() { + var err error + pw, err = playwright.Run() + if err != nil { + if browserLogger != nil { + browserLogger.Warn("playwright not available", "error", err) + } + return + } + browserAvailable = true + if browserLogger != nil { + browserLogger.Info("playwright tools available") + } +} + +func pwStart(args map[string]string) []byte { + browserStartMu.Lock() + defer browserStartMu.Unlock() + + if browserStarted { + return []byte(`{"error": "Browser already started"}`) + } + + headless := true + if cfg != nil && !cfg.PlaywrightHeadless { + headless = false + } + + var err error + browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{ + Headless: playwright.Bool(headless), + }) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error())) + } + + page, err = browser.NewPage() + if err != nil { + browser.Close() + return []byte(fmt.Sprintf(`{"error": "failed to create page: %s"}`, err.Error())) + } + + browserStarted = true + return []byte(`{"success": true, "message": "Browser started"}`) +} + +func pwStop(args map[string]string) []byte { + browserStartMu.Lock() + defer browserStartMu.Unlock() + + if !browserStarted { + return []byte(`{"success": true, "message": "Browser was not running"}`) + } + + if page != nil { + page.Close() + page = nil + } + if browser != nil { + browser.Close() + browser = nil + } + + browserStarted = false + return []byte(`{"success": true, "message": "Browser stopped"}`) +} + +func pwIsRunning(args map[string]string) []byte { + if browserStarted { + return []byte(`{"running": true, "message": "Browser is running"}`) + } + return []byte(`{"running": false, "message": "Browser is not running"}`) +} + +func pwNavigate(args map[string]string) []byte { + url, ok := args["url"] + if !ok || url == "" { + return []byte(`{"error": "url not provided"}`) + } + + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + + _, err := page.Goto(url) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to navigate: %s"}`, err.Error())) + } + + title, _ := page.Title() + pageURL := page.URL() + return []byte(fmt.Sprintf(`{"success": true, "title": "%s", "url": "%s"}`, title, pageURL)) +} + +func pwClick(args map[string]string) []byte { + selector, ok := args["selector"] + if !ok || selector == "" { + return []byte(`{"error": "selector not provided"}`) + } + + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + + index := 0 + if args["index"] != "" { + fmt.Sscanf(args["index"], "%d", &index) + } + + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + + if index >= count { + return []byte(fmt.Sprintf(`{"error": "Element not found at index %d (found %d elements)"}`, index, count)) + } + + err = locator.Nth(index).Click() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error())) + } + + return []byte(`{"success": true, "message": "Clicked element"}`) +} + +func pwFill(args map[string]string) []byte { + selector, ok := args["selector"] + if !ok || selector == "" { + return []byte(`{"error": "selector not provided"}`) + } + + text := args["text"] + if text == "" { + text = "" + } + + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + + index := 0 + if args["index"] != "" { + fmt.Sscanf(args["index"], "%d", &index) + } + + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + + if index >= count { + return []byte(fmt.Sprintf(`{"error": "Element not found at index %d"}`, index)) + } + + err = locator.Nth(index).Fill(text) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to fill: %s"}`, err.Error())) + } + + return []byte(`{"success": true, "message": "Filled input"}`) +} + +func pwExtractText(args map[string]string) []byte { + selector := args["selector"] + if selector == "" { + selector = "body" + } + + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + + if count == 0 { + return []byte(`{"error": "No elements found"}`) + } + + if selector == "body" { + text, err := page.TextContent("body") + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to get text: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"text": "%s"}`, text)) + } + + var texts []string + for i := 0; i < count; i++ { + text, err := locator.Nth(i).TextContent() + if err != nil { + continue + } + texts = append(texts, text) + } + + return []byte(fmt.Sprintf(`{"text": "%s"}`, joinLines(texts))) +} + +func joinLines(lines []string) string { + result := "" + for i, line := range lines { + if i > 0 { + result += "\n" + } + result += line + } + return result +} + +func pwScreenshot(args map[string]string) []byte { + selector := args["selector"] + fullPage := args["full_page"] == "true" + + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + + path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid()) + + var err error + if selector != "" && selector != "body" { + locator := page.Locator(selector) + _, err = locator.Screenshot(playwright.LocatorScreenshotOptions{ + Path: playwright.String(path), + }) + } else { + _, err = page.Screenshot(playwright.PageScreenshotOptions{ + Path: playwright.String(path), + FullPage: playwright.Bool(fullPage), + }) + } + + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error())) + } + + return []byte(fmt.Sprintf(`{"path": "%s"}`, path)) +} + +func pwScreenshotAndView(args map[string]string) []byte { + selector := args["selector"] + fullPage := args["full_page"] == "true" + + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + + path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid()) + + var err error + if selector != "" && selector != "body" { + locator := page.Locator(selector) + _, err = locator.Screenshot(playwright.LocatorScreenshotOptions{ + Path: playwright.String(path), + }) + } else { + _, err = page.Screenshot(playwright.PageScreenshotOptions{ + Path: playwright.String(path), + FullPage: playwright.Bool(fullPage), + }) + } + + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error())) + } + + dataURL, err := models.CreateImageURLFromPath(path) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to create image URL: %s"}`, err.Error())) + } + + resp := models.MultimodalToolResp{ + Type: "multimodal_content", + Parts: []map[string]string{ + {"type": "text", "text": "Screenshot saved: " + path}, + {"type": "image_url", "url": dataURL}, + }, + } + jsonResult, err := json.Marshal(resp) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to marshal result: %s"}`, err.Error())) + } + return jsonResult +} + +func pwWaitForSelector(args map[string]string) []byte { + selector, ok := args["selector"] + if !ok || selector == "" { + return []byte(`{"error": "selector not provided"}`) + } + + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + + timeout := 30000 + if args["timeout"] != "" { + fmt.Sscanf(args["timeout"], "%d", &timeout) + } + + _, err := page.WaitForSelector(selector, playwright.PageWaitForSelectorOptions{ + Timeout: playwright.Float(float64(timeout)), + }) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "element not found: %s"}`, err.Error())) + } + + return []byte(`{"success": true, "message": "Element found"}`) +} + +func pwDrag(args map[string]string) []byte { + x1, ok := args["x1"] + if !ok { + return []byte(`{"error": "x1 not provided"}`) + } + + y1, ok := args["y1"] + if !ok { + return []byte(`{"error": "y1 not provided"}`) + } + + x2, ok := args["x2"] + if !ok { + return []byte(`{"error": "x2 not provided"}`) + } + + y2, ok := args["y2"] + if !ok { + return []byte(`{"error": "y2 not provided"}`) + } + + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + + var fx1, fy1, fx2, fy2 float64 + fmt.Sscanf(x1, "%f", &fx1) + fmt.Sscanf(y1, "%f", &fy1) + fmt.Sscanf(x2, "%f", &fx2) + fmt.Sscanf(y2, "%f", &fy2) + + mouse := page.Mouse() + + err := mouse.Move(fx1, fy1) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error())) + } + + err = mouse.Down() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error())) + } + + err = mouse.Move(fx2, fy2) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error())) + } + + err = mouse.Up() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error())) + } + + return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2)) +} + +func init() { + browserLogger = logger.With("component", "browser") + checkPlaywright() +} diff --git a/config/config.go b/config/config.go index 427edd5..7aed1c7 100644 --- a/config/config.go +++ b/config/config.go @@ -70,6 +70,8 @@ type Config struct { CharSpecificContextEnabled bool `toml:"CharSpecificContextEnabled"` CharSpecificContextTag string `toml:"CharSpecificContextTag"` AutoTurn bool `toml:"AutoTurn"` + // playwright browser + PlaywrightHeadless bool `toml:"PlaywrightHeadless"` } func LoadConfig(fn string) (*Config, error) { diff --git a/go.mod b/go.mod index 8753052..0ad3405 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/GrailFinder/google-translate-tts v0.1.3 github.com/GrailFinder/searchagent v0.2.0 github.com/PuerkitoBio/goquery v1.11.0 + github.com/deckarep/golang-set/v2 v2.8.0 github.com/gdamore/tcell/v2 v2.13.2 github.com/glebarez/go-sqlite v1.22.0 github.com/gopxl/beep/v2 v2.1.1 @@ -14,6 +15,7 @@ require ( github.com/jmoiron/sqlx v1.4.0 github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 github.com/neurosnap/sentences v1.1.2 + github.com/playwright-community/playwright-go v0.5700.1 github.com/rivo/tview v0.42.0 github.com/yuin/goldmark v1.4.13 ) @@ -24,6 +26,8 @@ require ( github.com/ebitengine/oto/v3 v3.4.0 // indirect github.com/ebitengine/purego v0.9.1 // indirect github.com/gdamore/encoding v1.0.1 // indirect + github.com/go-jose/go-jose/v3 v3.0.4 // indirect + github.com/go-stack/stack v1.8.1 // indirect github.com/google/uuid v1.6.0 // indirect github.com/hajimehoshi/go-mp3 v0.3.4 // indirect github.com/hajimehoshi/oto/v2 v2.3.1 // indirect diff --git a/go.sum b/go.sum index 2e32cfc..24ca3bd 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,11 @@ github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43 github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deckarep/golang-set/v2 v2.8.0 h1:swm0rlPCmdWn9mESxKOjWk8hXSqoxOp+ZlfuyaAdFlQ= +github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/bQ= @@ -24,8 +27,13 @@ github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx github.com/gdamore/tcell/v2 v2.13.2/go.mod h1:+Wfe208WDdB7INEtCsNrAN6O2m+wsTPk1RAovjaILlo= github.com/glebarez/go-sqlite v1.22.0 h1:uAcMJhaA6r3LHMTFgP0SifzgXg46yJkgxqyuyec+ruQ= github.com/glebarez/go-sqlite v1.22.0/go.mod h1:PlBIdHe0+aUEFn+r2/uthrWq4FxbzugL0L8Li6yQJbc= +github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY= +github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ= github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= +github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw= +github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= @@ -59,6 +67,8 @@ github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7 github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U= +github.com/playwright-community/playwright-go v0.5700.1/go.mod h1:MlSn1dZrx8rszbCxY6x3qK89ZesJUYVx21B2JnkoNF0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= @@ -67,6 +77,8 @@ github.com/rivo/tview v0.42.0 h1:b/ftp+RxtDsHSaynXTbJb+/n/BxDEi+W3UfF5jILK6c= github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= @@ -152,6 +164,8 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis= diff --git a/main.go b/main.go index 62a7cef..5459089 100644 --- a/main.go +++ b/main.go @@ -17,7 +17,7 @@ var ( shellHistoryPos int = -1 thinkingCollapsed = false toolCollapsed = true - statusLineTempl = "help (F12) | chat: [orange:-:b]%s[-:-:-] (F1) | [%s:-:b]tool use[-:-:-] (ctrl+k) | model: [%s:-:b]%s[-:-:-] (ctrl+l) | [%s:-:b]skip LLM resp[-:-:-] (F10)\nAPI: [orange:-:b]%s[-:-:-] (ctrl+v) | writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | bot will write as [orange:-:b]%s[-:-:-] (ctrl+x)" + statusLineTempl = "help (F12) | chat: [orange:-:b]%s[-:-:-] (F1) | [%s:-:b]tool use[-:-:-] (ctrl+k) | model: [%s:-:b]%s[-:-:-] (ctrl+l) | [%s:-:b]skip LLM resp[-:-:-] (F10) | API: [orange:-:b]%s[-:-:-] (ctrl+v)\nwriting as: [orange:-:b]%s[-:-:-] (ctrl+q) | bot will write as [orange:-:b]%s[-:-:-] (ctrl+x)" focusSwitcher = map[tview.Primitive]tview.Primitive{} ) diff --git a/tools.go b/tools.go index a9bb9c0..715701b 100644 --- a/tools.go +++ b/tools.go @@ -1362,6 +1362,18 @@ var fnMap = map[string]fnSig{ "todo_update": todoUpdate, "todo_delete": todoDelete, "summarize_chat": summarizeChat, + // playwright tools + "pw_start": pwStart, + "pw_stop": pwStop, + "pw_is_running": pwIsRunning, + "pw_navigate": pwNavigate, + "pw_click": pwClick, + "pw_fill": pwFill, + "pw_extract_text": pwExtractText, + "pw_screenshot": pwScreenshot, + "pw_screenshot_and_view": pwScreenshotAndView, + "pw_wait_for_selector": pwWaitForSelector, + "pw_drag": pwDrag, } func registerWindowTools() { @@ -1958,4 +1970,217 @@ func init() { }, ) } + if browserAvailable { + baseTools = append(baseTools, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_start", + Description: "Start a Playwright browser instance. Call this first before using other pw_ tools. Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{}, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_stop", + Description: "Stop the Playwright browser instance. Call when done with browser automation.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{}, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_is_running", + Description: "Check if Playwright browser is currently running.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{}, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_navigate", + Description: "Navigate to a URL in the browser.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"url"}, + Properties: map[string]models.ToolArgProps{ + "url": models.ToolArgProps{ + Type: "string", + Description: "URL to navigate to", + }, + }, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_click", + Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"selector"}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector for the element to click", + }, + "index": models.ToolArgProps{ + Type: "string", + Description: "optional index for multiple matches (default 0)", + }, + }, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_fill", + Description: "Fill an input field with text using CSS selector.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"selector", "text"}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector for the input element", + }, + "text": models.ToolArgProps{ + Type: "string", + Description: "text to fill into the input", + }, + "index": models.ToolArgProps{ + Type: "string", + Description: "optional index for multiple matches (default 0)", + }, + }, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_extract_text", + Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"selector"}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector (use 'body' for all page text)", + }, + }, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_screenshot", + Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "optional CSS selector for element to screenshot", + }, + "full_page": models.ToolArgProps{ + Type: "string", + Description: "optional: 'true' to capture full page (default false)", + }, + }, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_screenshot_and_view", + Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "optional CSS selector for element to screenshot", + }, + "full_page": models.ToolArgProps{ + Type: "string", + Description: "optional: 'true' to capture full page (default false)", + }, + }, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_wait_for_selector", + Description: "Wait for an element to appear on the page.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"selector"}, + Properties: map[string]models.ToolArgProps{ + "selector": models.ToolArgProps{ + Type: "string", + Description: "CSS selector to wait for", + }, + "timeout": models.ToolArgProps{ + Type: "string", + Description: "optional timeout in ms (default 30000)", + }, + }, + }, + }, + }, + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "pw_drag", + Description: "Drag the mouse from one point to another.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"x1", "y1", "x2", "y2"}, + Properties: map[string]models.ToolArgProps{ + "x1": models.ToolArgProps{ + Type: "string", + Description: "starting X coordinate", + }, + "y1": models.ToolArgProps{ + Type: "string", + Description: "starting Y coordinate", + }, + "x2": models.ToolArgProps{ + Type: "string", + Description: "ending X coordinate", + }, + "y2": models.ToolArgProps{ + Type: "string", + Description: "ending Y coordinate", + }, + }, + }, + }, + }, + ) + toolSysMsg += browserToolSysMsg + } } diff --git a/tools_playwright.go b/tools_playwright.go new file mode 100644 index 0000000..e8b3db5 --- /dev/null +++ b/tools_playwright.go @@ -0,0 +1,62 @@ +package main + +var browserToolSysMsg = ` +Additional browser automation tools (Playwright): +[ +{ + "name": "pw_start", + "args": [], + "when_to_use": "start a browser instance before doing any browser automation. Must be called first." +}, +{ + "name": "pw_stop", + "args": [], + "when_to_use": "stop the browser instance when done with automation." +}, +{ + "name": "pw_is_running", + "args": [], + "when_to_use": "check if browser is currently running." +}, +{ + "name": "pw_navigate", + "args": ["url"], + "when_to_use": "when asked to open a specific URL in a web browser." +}, +{ + "name": "pw_click", + "args": ["selector", "index"], + "when_to_use": "when asked to click on an element on the current webpage. 'index' is optional (default 0) to handle multiple matches." +}, +{ + "name": "pw_fill", + "args": ["selector", "text", "index"], + "when_to_use": "when asked to type text into an input field. 'index' is optional." +}, +{ + "name": "pw_extract_text", + "args": ["selector"], + "when_to_use": "when asked to get text content from the page or specific elements. Use selector 'body' for all page text." +}, +{ + "name": "pw_screenshot", + "args": ["selector", "full_page"], + "when_to_use": "when asked to take a screenshot of the page or a specific element. Returns a file path to the image." +}, +{ + "name": "pw_screenshot_and_view", + "args": ["selector", "full_page"], + "when_to_use": "when asked to take a screenshot and show it to the model. Returns image for viewing." +}, +{ + "name": "pw_wait_for_selector", + "args": ["selector", "timeout"], + "when_to_use": "when asked to wait for an element to appear on the page before proceeding." +}, +{ + "name": "pw_drag", + "args": ["x1", "y1", "x2", "y2"], + "when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)" +} +] +`