Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

html: add parse_html #436

Merged
merged 1 commit into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions doc/content/api-reference/html.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,40 @@ given selector.
Using HTML Strings
~~~~~~~~~~~~~~~~~~

To initialize an HTML tree you can use ``lona.html.parse_html``, which returns
a Lona HTML node or a list of Lona HTML nodes.

``lona.html.parse_html`` uses high level nodes from the standard library like
``lona.html.TextInput`` which implement high level methods and properties.
To disable this and parse HTML into blank nodes you can set
``use_high_level_nodes=False``.

When ``lona.html.parse_html`` parses a HTML string, that results in a HTML
tree with exacly one root node, and ``flat`` is set to ``True``, which is the
default, ``lona.html.parse_html`` will flatten the tree, by returning the root
node instead of the list.

.. code-block:: python

from lona.html import parse_html

>>> parse_html('<h1>Hello World</h1><p>Lorem Ipsum</p>')
[<h1 data-lona-node-id="9">
Hello World
</h1>,
<p data-lona-node-id="11">
Lorem Ipsum
</p>]

>>> parse_html('<h1>Hello World</h1>')
<h1 data-lona-node-id="14">
Hello World
</h1>


Using lona.html.HTML
++++++++++++++++++++

.. note::

Added in 1.5: Support for high level nodes, the keyword
Expand Down
2 changes: 1 addition & 1 deletion lona/html/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@
from lona.html.nodes.scripting import NoScript, Script, Canvas
from lona.html.nodes.forms.select2 import Select2, Option2
from lona.html.nodes.web_components import Template, Slot
from lona.html.parsing import NodeHTMLParser, parse_html
from lona.html.nodes.forms.select import Select, Option
from lona.html.nodes.demarcating_edits import Ins, Del
from lona.html.nodes.svg_and_mathml import Math, SVG
from lona.events.event_types import * # NOQA: F403
from lona.html.nodes.sectioning_root import Body
from lona.html.nodes.raw_nodes import RawHTML
from lona.html.parsing import NodeHTMLParser
from lona.html.widgets import HTML as HTML1
from lona.html.parsing import HTML as HTML2
from lona.compat import get_client_version
Expand Down
49 changes: 40 additions & 9 deletions lona/html/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
tagfind_tolerant,
HTMLParser,
)
from typing import List, Dict
from typing import List, Dict, cast
from html import unescape
import logging

Expand Down Expand Up @@ -198,11 +198,33 @@ def handle_endtag(self, tag):
self.set_current_node(self._node.parent)


def html_string_to_node_list(html_string, use_high_level_nodes=True,
node_classes=None):
def parse_html(
html_string: str,
use_high_level_nodes: bool = True,
node_classes: Dict[str, AbstractNode] | None = None,
flat: bool = True,
) -> AbstractNode | List[AbstractNode]:

"""
Takes HTML as a string and returns a Lona HTML node or a list of Lona
HTML nodes.

:use_high_level_nodes: When set to True, node classes from the standard
library get used. When set to False,
`lona.html.Node` will be used for all returned
nodes.

:node_classes: A dict that contains node classes that should be
used for the returned HTML nodes.

root_node = Node()
nodes = []
:flat: If set to True and the parsed HTML tree has exactly
one root node, this root node gets returned instead
of a list of one node.

"""

root_node: Node = Node()
nodes: List[AbstractNode] = []

html_parser = NodeHTMLParser(
use_high_level_nodes=use_high_level_nodes,
Expand All @@ -221,6 +243,9 @@ def html_string_to_node_list(html_string, use_high_level_nodes=True,
node.remove()
nodes.append(node)

if flat and len(nodes) == 1:
return nodes[0]

return nodes


Expand All @@ -230,6 +255,8 @@ def HTML(
node_classes: Dict[str, AbstractNode] | None = None,
) -> AbstractNode:

# TODO: remove HTML parsing in 2.0

_nodes: List[AbstractNode] = []

for node in nodes:
Expand All @@ -243,10 +270,14 @@ def HTML(

# html string
elif '<' in node or '>' in node:
parsed_nodes = html_string_to_node_list(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
parsed_nodes = cast(
list,
parse_html(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
flat=False,
),
)

if len(nodes) > 1:
Expand Down
5 changes: 3 additions & 2 deletions lona/html/widgets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from lona.html.parsing import html_string_to_node_list
from lona.html.text_node import TextNode
from lona.html.parsing import parse_html
from lona.html.widget import Widget


Expand All @@ -22,10 +22,11 @@ def __init__(self, *nodes, use_high_level_nodes=True, node_classes=None):
self.nodes.append(HTML(node))

else:
self.nodes = html_string_to_node_list(
self.nodes = parse_html(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
flat=False,
)

else:
Expand Down