From 8e1e648f592423a0c35b84880ffcf4535b994db5 Mon Sep 17 00:00:00 2001
From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com>
Date: Mon, 15 Apr 2024 13:40:42 -0400
Subject: [PATCH] Move html_to_markdown to utils/misc. - Centralize this util.

---
 core/utils/misc.py        | 42 ++++++++++++++++++++++++++++++++++++++-
 exts/fandom_wiki.py       | 15 +++++---------
 exts/ff_metadata/utils.py | 39 +-----------------------------------
 requirements.txt          |  2 +-
 4 files changed, 48 insertions(+), 50 deletions(-)

diff --git a/core/utils/misc.py b/core/utils/misc.py
index abd9e2d..0994d4c 100644
--- a/core/utils/misc.py
+++ b/core/utils/misc.py
@@ -7,8 +7,10 @@
 import logging
 import time
 
+import lxml.html
 
-__all__ = ("catchtime",)
+
+__all__ = ("catchtime", "html_to_markdown")
 
 
 class catchtime:
@@ -33,3 +35,41 @@ def __exit__(self, *exc: object) -> None:
         self.total_time = time.perf_counter() - self.total_time
         if self.logger:
             self.logger.info("Time: %.3f seconds", self.total_time)
+
+
+def html_to_markdown(node: lxml.html.HtmlElement, *, include_spans: bool = False, base_url: str | None = None) -> str:
+    # Modified from RoboDanny code:
+    # https://github.com/Rapptz/RoboDanny/blob/6e54be1985793ed29fca6b7c5259677904b8e1ad/cogs/dictionary.py#L532
+
+    text: list[str] = []
+    italics_marker: str = "_"
+
+    if base_url is not None:
+        node.make_links_absolute("".join(base_url.partition(".com/wiki/")[0:-1]), resolve_base_href=True)
+
+    for child in node.iter():
+        child_text = child.text.strip() if child.text else ""
+
+        if child.tag in {"i", "em"}:
+            text.append(f"{italics_marker}{child_text}{italics_marker}")
+            if italics_marker == "*":  # type: ignore
+                italics_marker = "_"
+        elif child.tag in {"b", "strong"}:
+            if text and text[-1].endswith("*"):
+                text.append("\u200b")
+            text.append(f"**{child_text.strip()}**")
+        elif child.tag == "a":
+            # No markup for links
+            if base_url is None:
+                text.append(child_text)
+            else:
+                text.append(f"[{child.text}]({child.attrib['href']})")
+        elif child.tag == "p":
+            text.append(f"\n{child_text}\n")
+        elif include_spans and child.tag == "span":
+            text.append(child_text)
+
+        if child.tail:
+            text.append(child.tail)
+
+    return "".join(text).strip()
diff --git a/exts/fandom_wiki.py b/exts/fandom_wiki.py
index 7153e83..70aadb3 100644
--- a/exts/fandom_wiki.py
+++ b/exts/fandom_wiki.py
@@ -8,22 +8,17 @@
 import asyncio
 import logging
 import textwrap
-from typing import TYPE_CHECKING, Any
+from typing import Any
 from urllib.parse import quote as uriquote, urljoin
 
+import aiohttp
 import discord
 from discord.app_commands import Choice
 from discord.ext import commands
 from lxml import etree, html
 
 import core
-from core.utils import EMOJI_URL
-
-from .ff_metadata.utils import html_to_markdown
-
-
-if TYPE_CHECKING:
-    from aiohttp import ClientSession
+from core.utils import EMOJI_URL, html_to_markdown
 
 
 LOGGER = logging.getLogger(__name__)
@@ -70,7 +65,7 @@ def __init__(
         )
 
 
-async def load_wiki_all_pages(session: ClientSession, wiki_url: str) -> dict[str, str]:
+async def load_wiki_all_pages(session: aiohttp.ClientSession, wiki_url: str) -> dict[str, str]:
     pages_dict: dict[str, str] = {}
     next_path: str = urljoin(wiki_url, "/wiki/Special:AllPages")
     while True:
@@ -137,7 +132,7 @@ def clean_fandom_page(element: etree._Element) -> etree._Element:  # type: ignor
     return element
 
 
-async def process_fandom_page(session: ClientSession, url: str) -> tuple[str | None, str | None]:
+async def process_fandom_page(session: aiohttp.ClientSession, url: str) -> tuple[str | None, str | None]:
     """Extract the summary and image from a Fandom page."""
 
     async with session.get(url) as response:
diff --git a/exts/ff_metadata/utils.py b/exts/ff_metadata/utils.py
index a8cc0a8..5e800d8 100644
--- a/exts/ff_metadata/utils.py
+++ b/exts/ff_metadata/utils.py
@@ -10,7 +10,7 @@
 import fichub_api
 import lxml.html
 
-from core.utils import PaginatedSelectView
+from core.utils import PaginatedSelectView, html_to_markdown
 
 
 __all__ = (
@@ -64,43 +64,6 @@ class StoryWebsite(NamedTuple):
 )
 
 
-def html_to_markdown(node: lxml.html.HtmlElement, *, include_spans: bool = False, base_url: str | None = None) -> str:
-    # Modified from RoboDanny code:
-    # https://github.com/Rapptz/RoboDanny/blob/6e54be1985793ed29fca6b7c5259677904b8e1ad/cogs/dictionary.py#L532
-
-    text: list[str] = []
-    italics_marker: str = "_"
-
-    if base_url is not None:
-        node.make_links_absolute("".join(base_url.partition(".com/wiki/")[0:-1]), resolve_base_href=True)
-
-    for child in node.iter():
-        child_text = child.text.strip() if child.text else ""
-
-        if child.tag in {"i", "em"}:
-            text.append(f"{italics_marker}{child_text}{italics_marker}")
-            italics_marker = "_" if italics_marker == "*" else "*"  # type: ignore
-        elif child.tag in {"b", "strong"}:
-            if text and text[-1].endswith("*"):
-                text.append("\u200b")
-            text.append(f"**{child_text.strip()}**")
-        elif child.tag == "a":
-            # No markup for links
-            if base_url is None:
-                text.append(child_text)
-            else:
-                text.append(f"[{child.text}]({child.attrib['href']})")
-        elif child.tag == "p":
-            text.append(f"\n{child_text}\n")
-        elif include_spans and child.tag == "span":
-            text.append(child_text)
-
-        if child.tail:
-            text.append(child.tail)
-
-    return "".join(text).strip()
-
-
 def create_ao3_work_embed(work: ao3.Work) -> discord.Embed:
     """Create an embed that holds all the relevant metadata for an Archive of Our Own work.
 
diff --git a/requirements.txt b/requirements.txt
index 67812c2..3c52d9e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,7 +14,7 @@ msgspec[toml]
 openpyxl
 Pillow>=10.0.0
 types-lxml
-wavelink>=3.0.0
+wavelink>=3.2.0
 
 # To be used later:
 # parsedatetime