use a contextual --config prefix to resolve method resolution order

nwaughachukwuma · Nov 15, 2024 · 719c1bd · 719c1bd
1 parent ecd7251
commit 719c1bd
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 44 deletions.
diff --git a/src/web_search/google.py b/src/web_search/google.py
@@ -11,10 +11,10 @@
 
 
 class GoogleSearch:
-    config: GoogleSearchConfig
+    google_config: GoogleSearchConfig
 
-    def __init__(self, config: GoogleSearchConfig | None = None):
-        self.config = config if config else GoogleSearchConfig()
+    def __init__(self, google_config: GoogleSearchConfig | None = None):
+        self.google_config = google_config if google_config else GoogleSearchConfig()
 
     async def _compile_google_search(self, query: str):
         results = await self._google_search(query)
@@ -26,28 +26,24 @@ async def _google_search(self, query: str, **kwargs):
         """
         params = {
             "q": unquote(query),
-            "key": self.config.api_key,
-            "cx": self.config.cse_id,
+            "key": self.google_config.api_key,
+            "cx": self.google_config.cse_id,
             "num": 5,
         }
         params.update(kwargs)
-        headers = {"Referer": self.config.app_domain}
+        headers = {"Referer": self.google_config.app_domain or ""}
 
         async with httpx.AsyncClient() as client:
-            response = await client.get(
-                GOOGLE_SEARCH_URL, params=params, headers=headers
-            )
+            response = await client.get(GOOGLE_SEARCH_URL, params=params, headers=headers)
             response.raise_for_status()
 
             json_data = response.json()
 
-        items = json_data.get("items", [])[: self.config.max_results]
+        items = json_data.get("items", [])[: self.google_config.max_results]
         result = await self.extract_relevant_items(items)
         return result
 
-    async def extract_relevant_items(
-        self, search_results: List[Dict[str, Any]]
-    ) -> List[SearchResult]:
+    async def extract_relevant_items(self, search_results: List[Dict[str, Any]]) -> List[SearchResult]:
         """
         Extract relevant items from the search results
         """
@@ -77,22 +73,15 @@ def _is_valid_url(self, url: str) -> bool:
             ".rar",
         )
         invalid_domains = ("youtube.com", "vimeo.com", "facebook.com", "twitter.com")
-        return not (
-            url.endswith(invalid_extensions)
-            or any(domain in url for domain in invalid_domains)
-        )
+        return not (url.endswith(invalid_extensions) or any(domain in url for domain in invalid_domains))
 
-    async def _process_search_item(
-        self, url: str, item: Dict, char_limit=2000
-    ) -> SearchResult | None:
+    async def _process_search_item(self, url: str, item: Dict, char_limit=2000) -> SearchResult | None:
         """
         Process and fetch the result of a single search item url
         """
         try:
             content = await self._scrape_page_content(url)
-            return SearchResult(
-                url=url, title=item.get("title", ""), preview=content[:char_limit]
-            )
+            return SearchResult(url=url, title=item.get("title", ""), preview=content[:char_limit])
         except Exception:
             return None
 
@@ -107,9 +96,7 @@ async def _scrape_page_content(self, url: str) -> str:
 
             soup = BeautifulSoup(response.text, "lxml")
             # Remove unwanted elements
-            for element in soup.find_all(
-                ["script", "style", "nav", "header", "footer", "ads"]
-            ):
+            for element in soup.find_all(["script", "style", "nav", "header", "footer", "ads"]):
                 element.decompose()
 
             content_elements = soup.find_all(
@@ -123,9 +110,7 @@ async def _scrape_page_content(self, url: str) -> str:
 
             # Extract text from found elements
             content = "\n".join(
-                element.get_text(strip=True)
-                for element in content_elements
-                if element.get_text(strip=True)
+                element.get_text(strip=True) for element in content_elements if element.get_text(strip=True)
             )
 
             # If still no content, try getting all text

diff --git a/src/web_search/knowledge.py b/src/web_search/knowledge.py
@@ -7,10 +7,10 @@
 
 
 class KnowledgeSearch:
-    config: KnowledgeSearchConfig
+    knowledge_config: KnowledgeSearchConfig
 
-    def __init__(self, config: KnowledgeSearchConfig | None = None):
-        self.config = config if config else KnowledgeSearchConfig()
+    def __init__(self, knowledge_config: KnowledgeSearchConfig | None = None):
+        self.knowledge_config = knowledge_config if knowledge_config else KnowledgeSearchConfig()
 
     async def fetch_knowledge(self, query: str):
         """
@@ -31,7 +31,7 @@ async def fetch_knowledge(self, query: str):
             if isinstance(result, list):
                 sources.extend(result)
 
-        sources = sources[: self.config.max_sources]
+        sources = sources[: self.knowledge_config.max_sources]
         return "\n\n".join(str(source) for source in sources if source.preview)
 
     async def _compile_wikipedia(self, query: str) -> str:
@@ -48,7 +48,7 @@ async def _search_wikipedia(self, query: str) -> list[SearchResult]:
         """
         try:
             sources: list[SearchResult] = []
-            search_results = wikipedia.search(query, results=self.config.max_results)
+            search_results = wikipedia.search(query, results=self.knowledge_config.max_results)
 
             for title in search_results:
                 try:
@@ -60,9 +60,7 @@ async def _search_wikipedia(self, query: str) -> list[SearchResult]:
                     if not preview:
                         continue
 
-                    sources.append(
-                        SearchResult(url=page.url, title=page.title, preview=preview)
-                    )
+                    sources.append(SearchResult(url=page.url, title=page.title, preview=preview))
                 except wikipedia.exceptions.DisambiguationError:
                     continue
                 except wikipedia.exceptions.PageError:
@@ -81,7 +79,7 @@ async def _search_arxiv_papers(self, query: str) -> list[SearchResult]:
             params = {
                 "search_query": f"all:{query}",
                 "start": 0,
-                "max_results": self.config.max_results,
+                "max_results": self.knowledge_config.max_results,
                 "sortBy": "relevance",
                 "sortOrder": "descending",
             }
@@ -129,7 +127,7 @@ def _extract_relevant_wiki_sections(self, content: str) -> str:
 
         result = ""
         for p in cleaned_paragraphs:
-            if len(result + p) <= self.config.max_preview_chars:
+            if len(result + p) <= self.knowledge_config.max_preview_chars:
                 result += p + "\n\n"
             else:
                 break

diff --git a/src/web_search/search.py b/src/web_search/search.py
@@ -1,18 +1,18 @@
 import asyncio
 from typing import Any, Coroutine, List
 
+from .config import WebSearchConfig
 from .google import GoogleSearch
 from .knowledge import KnowledgeSearch
-from .config import WebSearchConfig
 
 
 class WebSearch(GoogleSearch, KnowledgeSearch):
     def __init__(self, config: WebSearchConfig | None = None):
-        self.config = config if config else WebSearchConfig()
+        ws_config = config if config else WebSearchConfig()
 
-        self.sources = self.config.sources
-        GoogleSearch.__init__(self, self.config.google_config)
-        KnowledgeSearch.__init__(self, config=self.config.knowledge_config)
+        GoogleSearch.__init__(self, google_config=ws_config.google_config)
+        KnowledgeSearch.__init__(self, knowledge_config=ws_config.knowledge_config)
+        self.sources = ws_config.sources
 
     async def search(self, query: str):
         """