Skip to content

Commit

Permalink
Merge pull request #62 from datarootsio/add-tryparse-for-html
Browse files Browse the repository at this point in the history
add exception for html tables and try-except when parsing outputs (fa…
  • Loading branch information
murilo-cunha committed Nov 27, 2022
2 parents e09fd9c + d3f5cf8 commit 3978126
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
14 changes: 11 additions & 3 deletions databooks/data_models/cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from rich.text import Text

from databooks.data_models.base import DatabooksBase
from databooks.data_models.rich_helpers import HtmlTable
from databooks.data_models.rich_helpers import HtmlTable, RichHtmlTableError
from databooks.logging import get_logger

logger = get_logger(__file__)
Expand Down Expand Up @@ -146,9 +146,17 @@ class CellDisplayDataOutput(DatabooksBase):
@property
def rich_output(self) -> Sequence[ConsoleRenderable]:
"""Dynamically compute the rich output - also in `CellExecuteResultOutput`."""

def _try_parse_html(s: str) -> Optional[ConsoleRenderable]:
"""Try to parse HTML table, return `None` if any errors are raised."""
try:
return HtmlTable("".join(s)).rich()
except RichHtmlTableError:
logger.debug("Could not generate rich HTML table.")
return None

mime_func: Dict[str, Callable[[str], Optional[ConsoleRenderable]]] = {
"image/png": lambda s: None,
"text/html": lambda s: HtmlTable("".join(s)).rich(),
"text/html": lambda s: _try_parse_html(s),
"text/plain": lambda s: Text("".join(s)),
}
_rich = {
Expand Down
16 changes: 13 additions & 3 deletions databooks/data_models/rich_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
HtmlAttr = Tuple[str, Optional[str]]


class RichHtmlTableError(Exception):
"""Could not parse HTML table."""

def __init__(self, msg: str = "", *args: Any):
"""Use class docstring as error 'prefix'."""
if self.__doc__ is None:
raise ValueError("Exception docstring required - used in error message.")
super().__init__(" ".join((self.__doc__, msg)), *args)


class HtmlTable(HTMLParser):
"""Rich table from HTML string."""

Expand All @@ -23,13 +33,13 @@ def __init__(self, html: str, *args: Any, **kwargs: Any) -> None:
def handle_starttag(self, tag: str, attrs: List[HtmlAttr]) -> None:
"""Active tags are indicated via instance boolean properties."""
if getattr(self, tag, None):
raise ValueError(f"Already in `{tag}`.")
raise RichHtmlTableError(f"Already in `{tag}`.")
setattr(self, tag, True)

def handle_endtag(self, tag: str) -> None:
"""Write table properties when closing tags."""
if not getattr(self, tag):
raise ValueError(f"Cannot end unopened `{tag}`.")
raise RichHtmlTableError(f"Cannot end unopened `{tag}`.")

# If we are ending a row, either set a table header or row
if tag == "tr":
Expand All @@ -53,7 +63,7 @@ def rich(self, **tbl_kwargs: Any) -> Optional[Table]:
_ncols = len(self.rows[0])
_headers = [""] * (_ncols - len(self.headers)) + self.headers
if any(len(row) != _ncols for row in self.rows):
raise ValueError(f"Expected all rows to have {_ncols} columns.")
raise RichHtmlTableError(f"Expected all rows to have {_ncols} columns.")

_box = tbl_kwargs.pop("box", box.SIMPLE_HEAVY)
_row_styles = tbl_kwargs.pop("row_styles", ["on bright_black", ""])
Expand Down

0 comments on commit 3978126

Please sign in to comment.