diff --git a/redel/tools/browsing/impl.py b/redel/tools/browsing/impl.py index 834d22240..979220aa8 100644 --- a/redel/tools/browsing/impl.py +++ b/redel/tools/browsing/impl.py @@ -1,5 +1,4 @@ import asyncio -import contextlib import logging import tempfile from typing import Optional, TYPE_CHECKING @@ -12,12 +11,6 @@ import httpx import pymupdf import pymupdf4llm - from playwright.async_api import ( - BrowserContext, - TimeoutError as PlaywrightTimeoutError, - async_playwright, - Error as PlaywrightError, - ) except ImportError: raise ImportError( "You are missing required dependencies to use the bundled tools. Please install ReDel using `pip install" @@ -40,11 +33,6 @@ class Browsing(ToolBase): Renders webpages in Markdown and has basic support for reading PDFs. """ - # app-global browser instance - playwright = None - browser = None - browser_context = None - def __init__( self, *args, @@ -78,51 +66,6 @@ def __init__( "text/": self.html_content, } - # === resources + app lifecycle === - # noinspection PyMethodMayBeStatic - async def get_browser(self, **kwargs) -> BrowserContext: - """Get the current active browser context, or launch it on the first call.""" - if Browsing.playwright is None: - Browsing.playwright = await async_playwright().start() - if Browsing.browser is None: - Browsing.browser = await Browsing.playwright.chromium.launch(**kwargs) - if Browsing.browser_context is None: - Browsing.browser_context = await Browsing.browser.new_context() - return Browsing.browser_context - - async def get_page(self, create=True) -> Optional["Page"]: - """Get the current page. - - Returns None if the browser is not on a page unless `create` is True, in which case it creates a new page. - """ - if self.page is None and create: - context = await self.get_browser() - if self.page_concurrency_sem: - await self.page_concurrency_sem.acquire() - self.page = await context.new_page() - return self.page - - async def cleanup(self): - await super().cleanup() - if self.page is not None: - await self.page.close() - if self.page_concurrency_sem: - self.page_concurrency_sem.release() - self.page = None - - async def close(self): - await super().close() - try: - if (browser := Browsing.browser) is not None: - Browsing.browser = None - await browser.close() - if (pw := Browsing.playwright) is not None: - Browsing.playwright = None - await pw.stop() - except PlaywrightError: - # sometimes playwright doesn't like closing in parallel - pass - # ==== functions ==== @ai_function() async def search(self, query: str): @@ -190,21 +133,14 @@ async def json_content(self, href: str) -> str: async def html_content(self, href: str) -> str: """Default handler for all other content types.""" - page = await self.get_page() - await page.goto(href) - with contextlib.suppress(PlaywrightTimeoutError): - await page.wait_for_load_state("networkidle", timeout=10_000) - # header - title = await page.title() - header = f"{title}\n{'=' * len(title)}\n{page.url}\n\n" - - content_html = await page.content() - content = web_markdownify(content_html) + resp = await self.http.get(href) + resp.raise_for_status() + await resp.aread() + content = web_markdownify(resp.text) # summarization content = await self.maybe_summarize(content) # result - result = header + content - return result + return content # ==== helpers ==== async def maybe_summarize(self, content, max_len=None): diff --git a/server.py b/server.py index e825c2d08..43a64402d 100644 --- a/server.py +++ b/server.py @@ -13,7 +13,6 @@ - long engine: claude-3-opus (for summarizing long webpages, if ANTHROPIC_API_KEY is set) """ -import asyncio import logging import os from pathlib import Path @@ -38,8 +37,6 @@ else: long_engine = None -# only allow 3 chrome tabs at once to save my server -web_concurrency_sem = asyncio.Semaphore(3) # Define the configuration for each interactive session ai = ReDel( @@ -49,7 +46,7 @@ tool_configs={ Browsing: { "always_include": True, - "kwargs": {"long_engine": long_engine, "page_concurrency_sem": web_concurrency_sem}, + "kwargs": {"long_engine": long_engine}, }, }, max_delegation_depth=4,