From d2e6824d4796bc57ed951d0646ff6d2be2b5c12f Mon Sep 17 00:00:00 2001 From: Florian Scherf Date: Fri, 7 Jul 2023 14:20:50 +0200 Subject: [PATCH] html: add parse_html Signed-off-by: Florian Scherf --- doc/content/api-reference/html.rst | 34 +++++++++++++++++++++ lona/html/__init__.py | 2 +- lona/html/parsing.py | 49 ++++++++++++++++++++++++------ lona/html/widgets.py | 5 +-- 4 files changed, 78 insertions(+), 12 deletions(-) diff --git a/doc/content/api-reference/html.rst b/doc/content/api-reference/html.rst index 52523a84..2290b9d0 100644 --- a/doc/content/api-reference/html.rst +++ b/doc/content/api-reference/html.rst @@ -174,6 +174,40 @@ given selector. Using HTML Strings ~~~~~~~~~~~~~~~~~~ +To initialize an HTML tree you can use ``lona.html.parse_html``, which returns +a Lona HTML node or a list of Lona HTML nodes. + +``lona.html.parse_html`` uses high level nodes from the standard library like +``lona.html.TextInput`` which implement high level methods and properties. +To disable this and parse HTML into blank nodes you can set +``use_high_level_nodes=False``. + +When ``lona.html.parse_html`` parses a HTML string, that results in a HTML +tree with exacly one root node, and ``flat`` is set to ``True``, which is the +default, ``lona.html.parse_html`` will flatten the tree, by returning the root +node instead of the list. + +.. code-block:: python + + from lona.html import parse_html + + >>> parse_html('

Hello World

Lorem Ipsum

') + [

+ Hello World +

, +

+ Lorem Ipsum +

] + + >>> parse_html('

Hello World

') +

+ Hello World +

+ + +Using lona.html.HTML +++++++++++++++++++++ + .. note:: Added in 1.5: Support for high level nodes, the keyword diff --git a/lona/html/__init__.py b/lona/html/__init__.py index 50d34ffe..f2555955 100644 --- a/lona/html/__init__.py +++ b/lona/html/__init__.py @@ -125,13 +125,13 @@ from lona.html.nodes.scripting import NoScript, Script, Canvas from lona.html.nodes.forms.select2 import Select2, Option2 from lona.html.nodes.web_components import Template, Slot +from lona.html.parsing import NodeHTMLParser, parse_html from lona.html.nodes.forms.select import Select, Option from lona.html.nodes.demarcating_edits import Ins, Del from lona.html.nodes.svg_and_mathml import Math, SVG from lona.events.event_types import * # NOQA: F403 from lona.html.nodes.sectioning_root import Body from lona.html.nodes.raw_nodes import RawHTML -from lona.html.parsing import NodeHTMLParser from lona.html.widgets import HTML as HTML1 from lona.html.parsing import HTML as HTML2 from lona.compat import get_client_version diff --git a/lona/html/parsing.py b/lona/html/parsing.py index 5043b35e..1f13e4ed 100644 --- a/lona/html/parsing.py +++ b/lona/html/parsing.py @@ -5,7 +5,7 @@ tagfind_tolerant, HTMLParser, ) -from typing import List, Dict +from typing import List, Dict, cast from html import unescape import logging @@ -198,11 +198,33 @@ def handle_endtag(self, tag): self.set_current_node(self._node.parent) -def html_string_to_node_list(html_string, use_high_level_nodes=True, - node_classes=None): +def parse_html( + html_string: str, + use_high_level_nodes: bool = True, + node_classes: Dict[str, AbstractNode] | None = None, + flat: bool = True, +) -> AbstractNode | List[AbstractNode]: + + """ + Takes HTML as a string and returns a Lona HTML node or a list of Lona + HTML nodes. + + :use_high_level_nodes: When set to True, node classes from the standard + library get used. When set to False, + `lona.html.Node` will be used for all returned + nodes. + + :node_classes: A dict that contains node classes that should be + used for the returned HTML nodes. - root_node = Node() - nodes = [] + :flat: If set to True and the parsed HTML tree has exactly + one root node, this root node gets returned instead + of a list of one node. + + """ + + root_node: Node = Node() + nodes: List[AbstractNode] = [] html_parser = NodeHTMLParser( use_high_level_nodes=use_high_level_nodes, @@ -221,6 +243,9 @@ def html_string_to_node_list(html_string, use_high_level_nodes=True, node.remove() nodes.append(node) + if flat and len(nodes) == 1: + return nodes[0] + return nodes @@ -230,6 +255,8 @@ def HTML( node_classes: Dict[str, AbstractNode] | None = None, ) -> AbstractNode: + # TODO: remove HTML parsing in 2.0 + _nodes: List[AbstractNode] = [] for node in nodes: @@ -243,10 +270,14 @@ def HTML( # html string elif '<' in node or '>' in node: - parsed_nodes = html_string_to_node_list( - html_string=node, - use_high_level_nodes=use_high_level_nodes, - node_classes=node_classes or {}, + parsed_nodes = cast( + list, + parse_html( + html_string=node, + use_high_level_nodes=use_high_level_nodes, + node_classes=node_classes or {}, + flat=False, + ), ) if len(nodes) > 1: diff --git a/lona/html/widgets.py b/lona/html/widgets.py index 8c12f29e..ec61d23c 100644 --- a/lona/html/widgets.py +++ b/lona/html/widgets.py @@ -1,5 +1,5 @@ -from lona.html.parsing import html_string_to_node_list from lona.html.text_node import TextNode +from lona.html.parsing import parse_html from lona.html.widget import Widget @@ -22,10 +22,11 @@ def __init__(self, *nodes, use_high_level_nodes=True, node_classes=None): self.nodes.append(HTML(node)) else: - self.nodes = html_string_to_node_list( + self.nodes = parse_html( html_string=node, use_high_level_nodes=use_high_level_nodes, node_classes=node_classes or {}, + flat=False, ) else: