Merge pull request #436 from lona-web-org/fscherf/add-parse-html

html: add parse_html
lona-web-org · Jul 14, 2023 · 363b389 · 363b389
2 parents 413569c + d2e6824
commit 363b389
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 12 deletions.
diff --git a/doc/content/api-reference/html.rst b/doc/content/api-reference/html.rst
@@ -174,6 +174,40 @@ given selector.
 Using HTML Strings
 ~~~~~~~~~~~~~~~~~~
 
+To initialize an HTML tree you can use ``lona.html.parse_html``, which returns
+a Lona HTML node or a list of Lona HTML nodes.
+
+``lona.html.parse_html`` uses high level nodes from the standard library like
+``lona.html.TextInput`` which implement high level methods and properties.
+To disable this and parse HTML into blank nodes you can set
+``use_high_level_nodes=False``.
+
+When ``lona.html.parse_html`` parses a HTML string, that results in a HTML
+tree with exacly one root node, and ``flat`` is set to ``True``, which is the
+default, ``lona.html.parse_html`` will flatten the tree, by returning the root
+node instead of the list.
+
+.. code-block:: python
+
+ from lona.html import parse_html
+
+ >>> parse_html('<h1>Hello World</h1><p>Lorem Ipsum</p>')
+ [<h1 data-lona-node-id="9">
+ Hello World
+ </h1>,
+ <p data-lona-node-id="11">
+ Lorem Ipsum
+ </p>]
+
+ >>> parse_html('<h1>Hello World</h1>')
+ <h1 data-lona-node-id="14">
+ Hello World
+ </h1>
+
+
+Using lona.html.HTML
+++++++++++++++++++++
+
 .. note::
 
  Added in 1.5: Support for high level nodes, the keyword

diff --git a/lona/html/__init__.py b/lona/html/__init__.py
@@ -125,13 +125,13 @@
 from lona.html.nodes.scripting import NoScript, Script, Canvas
 from lona.html.nodes.forms.select2 import Select2, Option2
 from lona.html.nodes.web_components import Template, Slot
+from lona.html.parsing import NodeHTMLParser, parse_html
 from lona.html.nodes.forms.select import Select, Option
 from lona.html.nodes.demarcating_edits import Ins, Del
 from lona.html.nodes.svg_and_mathml import Math, SVG
 from lona.events.event_types import * # NOQA: F403
 from lona.html.nodes.sectioning_root import Body
 from lona.html.nodes.raw_nodes import RawHTML
-from lona.html.parsing import NodeHTMLParser
 from lona.html.widgets import HTML as HTML1
 from lona.html.parsing import HTML as HTML2
 from lona.compat import get_client_version

diff --git a/lona/html/parsing.py b/lona/html/parsing.py
@@ -5,7 +5,7 @@
  tagfind_tolerant,
  HTMLParser,
 )
-from typing import List, Dict
+from typing import List, Dict, cast
 from html import unescape
 import logging
 
@@ -198,11 +198,33 @@ def handle_endtag(self, tag):
  self.set_current_node(self._node.parent)
 
 
-def html_string_to_node_list(html_string, use_high_level_nodes=True,
- node_classes=None):
+def parse_html(
+ html_string: str,
+ use_high_level_nodes: bool = True,
+ node_classes: Dict[str, AbstractNode] | None = None,
+ flat: bool = True,
+) -> AbstractNode | List[AbstractNode]:
+
+ """
+ Takes HTML as a string and returns a Lona HTML node or a list of Lona
+ HTML nodes.
+
+ :use_high_level_nodes: When set to True, node classes from the standard
+ library get used. When set to False,
+ `lona.html.Node` will be used for all returned
+ nodes.
+
+ :node_classes: A dict that contains node classes that should be
+ used for the returned HTML nodes.
 
- root_node = Node()
- nodes = []
+ :flat: If set to True and the parsed HTML tree has exactly
+ one root node, this root node gets returned instead
+ of a list of one node.
+
+ """
+
+ root_node: Node = Node()
+ nodes: List[AbstractNode] = []
 
  html_parser = NodeHTMLParser(
  use_high_level_nodes=use_high_level_nodes,
@@ -221,6 +243,9 @@ def html_string_to_node_list(html_string, use_high_level_nodes=True,
  node.remove()
  nodes.append(node)
 
+ if flat and len(nodes) == 1:
+ return nodes[0]
+
  return nodes
 
 
@@ -230,6 +255,8 @@ def HTML(
  node_classes: Dict[str, AbstractNode] | None = None,
 ) -> AbstractNode:
 
+ # TODO: remove HTML parsing in 2.0
+
  _nodes: List[AbstractNode] = []
 
  for node in nodes:
@@ -243,10 +270,14 @@ def HTML(
 
  # html string
  elif '<' in node or '>' in node:
- parsed_nodes = html_string_to_node_list(
- html_string=node,
- use_high_level_nodes=use_high_level_nodes,
- node_classes=node_classes or {},
+ parsed_nodes = cast(
+ list,
+ parse_html(
+ html_string=node,
+ use_high_level_nodes=use_high_level_nodes,
+ node_classes=node_classes or {},
+ flat=False,
+ ),
  )
 
  if len(nodes) > 1:

diff --git a/lona/html/widgets.py b/lona/html/widgets.py
@@ -1,5 +1,5 @@
-from lona.html.parsing import html_string_to_node_list
 from lona.html.text_node import TextNode
+from lona.html.parsing import parse_html
 from lona.html.widget import Widget
 
 
@@ -22,10 +22,11 @@ def __init__(self, *nodes, use_high_level_nodes=True, node_classes=None):
  self.nodes.append(HTML(node))
 
  else:
- self.nodes = html_string_to_node_list(
+ self.nodes = parse_html(
  html_string=node,
  use_high_level_nodes=use_high_level_nodes,
  node_classes=node_classes or {},
+ flat=False,
  )
 
  else: