Skip to content

Commit

Permalink
Merge pull request #436 from lona-web-org/fscherf/add-parse-html
Browse files Browse the repository at this point in the history
html: add parse_html
  • Loading branch information
fscherf committed Jul 14, 2023
2 parents 413569c + d2e6824 commit 363b389
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 12 deletions.
34 changes: 34 additions & 0 deletions doc/content/api-reference/html.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,40 @@ given selector.
Using HTML Strings
~~~~~~~~~~~~~~~~~~
To initialize an HTML tree you can use ``lona.html.parse_html``, which returns
a Lona HTML node or a list of Lona HTML nodes.
``lona.html.parse_html`` uses high level nodes from the standard library like
``lona.html.TextInput`` which implement high level methods and properties.
To disable this and parse HTML into blank nodes you can set
``use_high_level_nodes=False``.
When ``lona.html.parse_html`` parses a HTML string, that results in a HTML
tree with exacly one root node, and ``flat`` is set to ``True``, which is the
default, ``lona.html.parse_html`` will flatten the tree, by returning the root
node instead of the list.
.. code-block:: python
from lona.html import parse_html
>>> parse_html('<h1>Hello World</h1><p>Lorem Ipsum</p>')
[<h1 data-lona-node-id="9">
Hello World
</h1>,
<p data-lona-node-id="11">
Lorem Ipsum
</p>]
>>> parse_html('<h1>Hello World</h1>')
<h1 data-lona-node-id="14">
Hello World
</h1>
Using lona.html.HTML
++++++++++++++++++++
.. note::
Added in 1.5: Support for high level nodes, the keyword
Expand Down
2 changes: 1 addition & 1 deletion lona/html/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@
from lona.html.nodes.scripting import NoScript, Script, Canvas
from lona.html.nodes.forms.select2 import Select2, Option2
from lona.html.nodes.web_components import Template, Slot
from lona.html.parsing import NodeHTMLParser, parse_html
from lona.html.nodes.forms.select import Select, Option
from lona.html.nodes.demarcating_edits import Ins, Del
from lona.html.nodes.svg_and_mathml import Math, SVG
from lona.events.event_types import * # NOQA: F403
from lona.html.nodes.sectioning_root import Body
from lona.html.nodes.raw_nodes import RawHTML
from lona.html.parsing import NodeHTMLParser
from lona.html.widgets import HTML as HTML1
from lona.html.parsing import HTML as HTML2
from lona.compat import get_client_version
Expand Down
49 changes: 40 additions & 9 deletions lona/html/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
tagfind_tolerant,
HTMLParser,
)
from typing import List, Dict
from typing import List, Dict, cast
from html import unescape
import logging

Expand Down Expand Up @@ -198,11 +198,33 @@ def handle_endtag(self, tag):
self.set_current_node(self._node.parent)


def html_string_to_node_list(html_string, use_high_level_nodes=True,
node_classes=None):
def parse_html(
html_string: str,
use_high_level_nodes: bool = True,
node_classes: Dict[str, AbstractNode] | None = None,
flat: bool = True,
) -> AbstractNode | List[AbstractNode]:

"""
Takes HTML as a string and returns a Lona HTML node or a list of Lona
HTML nodes.
:use_high_level_nodes: When set to True, node classes from the standard
library get used. When set to False,
`lona.html.Node` will be used for all returned
nodes.
:node_classes: A dict that contains node classes that should be
used for the returned HTML nodes.
root_node = Node()
nodes = []
:flat: If set to True and the parsed HTML tree has exactly
one root node, this root node gets returned instead
of a list of one node.
"""

root_node: Node = Node()
nodes: List[AbstractNode] = []

html_parser = NodeHTMLParser(
use_high_level_nodes=use_high_level_nodes,
Expand All @@ -221,6 +243,9 @@ def html_string_to_node_list(html_string, use_high_level_nodes=True,
node.remove()
nodes.append(node)

if flat and len(nodes) == 1:
return nodes[0]

return nodes


Expand All @@ -230,6 +255,8 @@ def HTML(
node_classes: Dict[str, AbstractNode] | None = None,
) -> AbstractNode:

# TODO: remove HTML parsing in 2.0

_nodes: List[AbstractNode] = []

for node in nodes:
Expand All @@ -243,10 +270,14 @@ def HTML(

# html string
elif '<' in node or '>' in node:
parsed_nodes = html_string_to_node_list(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
parsed_nodes = cast(
list,
parse_html(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
flat=False,
),
)

if len(nodes) > 1:
Expand Down
5 changes: 3 additions & 2 deletions lona/html/widgets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from lona.html.parsing import html_string_to_node_list
from lona.html.text_node import TextNode
from lona.html.parsing import parse_html
from lona.html.widget import Widget


Expand All @@ -22,10 +22,11 @@ def __init__(self, *nodes, use_high_level_nodes=True, node_classes=None):
self.nodes.append(HTML(node))

else:
self.nodes = html_string_to_node_list(
self.nodes = parse_html(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
flat=False,
)

else:
Expand Down

0 comments on commit 363b389

Please sign in to comment.