feat: add browser toolkit (#179)

Co-authored-by: Michael Neale <michael.neale@gmail.com>
block · Nov 6, 2024 · f54fd3a · f54fd3a
1 parent aab356b
commit f54fd3a
Show file tree

Hide file tree

Showing 6 changed files with 520 additions and 0 deletions.
diff --git a/.github/workflows/scripts/check_licenses.py b/.github/workflows/scripts/check_licenses.py
@@ -47,6 +47,7 @@ class LicenseConfig:
             "MIT",
             "BSD-3-Clause",
             "Apache-2.0",
+            "Apache License 2",
             "Apache Software License",
             "Python Software Foundation License",
             "BSD License",

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,9 @@ dependencies = [
     "langfuse>=2.38.2",
     "goose-plugins-block",
     "goose-context-management",
+    "selenium>=4.0.0",
+    "beautifulsoup4>=4.9.3",
+    "pyshadow<=0.0.5"
 ]
 author = [{ name = "Block", email = "ai-oss-tools@block.xyz" }]
 packages = [{ include = "goose", from = "src" }]
@@ -33,6 +36,7 @@ screen = "goose.toolkit.screen:Screen"
 reasoner = "goose.toolkit.reasoner:Reasoner"
 repo_context = "goose.toolkit.repo_context.repo_context:RepoContext"
 synopsis = "goose.synopsis.toolkit:SynopsisDeveloper"
+browser = "goose.toolkit.web_browser:BrowserToolkit"
 
 [project.entry-points."goose.profile"]
 default = "goose.profile:default_profile"

diff --git a/src/goose/cli/session_notifier.py b/src/goose/cli/session_notifier.py
@@ -11,6 +11,9 @@ def __init__(self, status_indicator: Status) -> None:
         self.status_indicator = status_indicator
         self.live = Live(self.status_indicator, refresh_per_second=8, transient=True)
 
+    def notify(self, message: str) -> None:
+        print(f"Notification: {message}")
+
     def log(self, content: RenderableType) -> None:
         print(content)
 

diff --git a/src/goose/toolkit/prompts/browser.jinja b/src/goose/toolkit/prompts/browser.jinja
@@ -0,0 +1,41 @@
+BrowserToolkit is a selenium-based toolset for automated web interactions. 
+This is useful when the best way to load content, or run a search, perform an action as a user on a page, test a page fill out a form etc requires a real browser to render, run javascript etc.
+
+You should keep the browser open if needed, as the user may be able to log in and interact to help out if you ask.
+
+Requests could include: 
+* searching for an item using a websites search feature
+* filling out a form
+* reading content 
+* testing a page or viewing a page
+* accessing social media (in which case you check user can log in)
+* performing a web search
+
+
+You will use combinations of these tools to take the relevant actions to satisfy the user's requests:
+
+- **navigate_to(url: str)**: Load and navigate to the specified URL in the web driver. The tool ensures the page has fully loaded before proceeding.
+
+- **get_html_content()**: Extract the HTML content of the current page and store it in a cached file. Use this to retrieve the latest cache file for offline HTML analysis.
+
+- **type_into_input(selector: str, text: str, click_enter: False, click_tab: False)**: Type specified text into an input element located by a CSS selector. Simulates human typing for natural input
+
+- **click_element(selector: str)**: Click an element (button/link) identified by a CSS selector. Use this to interact with webpage elements directly.
+
+- **find_element_by_text_soup(text: str, filename: str)**: Search for an element containing specific text using BeautifulSoup, sourcing from the cached HTML file. Useful for text-based element queries.
+
+- **take_browser_screenshot(filename: str)**: Capture a screenshot of the current browser window and save it to a file. Use this for visual verification.
+
+- **find_elements_of_type(tag_type: str, filename: str)**: Find all elements of a specific HTML tag type using BeautifulSoup, sourcing from the cached HTML file. Useful for retrieving multiple elements of the same type.
+
+### Important Note on Element Selection:
+
+When using tools that require CSS selectors or text identification, ensure that:
+
+1. **Precision**: Selectors must be accurate and precise. The specificity of CSS selectors should match the target element precisely to avoid selection errors.
+
+2. **DOM Considerations**: Some elements may reside within shadow DOMs, requiring special handling using tools like PyShadow, or may not be visible in the default DOM structure.
+
+3. **Element Types**: Elements may not always be of the expected type or have attributes you're searching for. Consider the tree structure and hierarchy when querying elements.
+
+This toolkit facilitates browser automation by scripting user interactions and processing web content efficiently.