From 3ecb27b2dfb700d415ea487f374f8667831e0d5a Mon Sep 17 00:00:00 2001 From: KevinL10 Date: Fri, 25 Aug 2023 15:12:38 -0700 Subject: [PATCH] Add block-based layout --- browser/graphics.py | 27 ++-- browser/layout.py | 229 +++++++++++++++++++++++++++----- browser/main.py | 5 +- browser/{lexer.py => parser.py} | 34 ++--- cpp_browser/test.cpp | 88 ++++++++++++ cpp_browser/tmp.cpp | 97 ++++++++++++++ tests/index.html | 12 +- tests/nested-p.html | 12 ++ 8 files changed, 446 insertions(+), 58 deletions(-) rename browser/{lexer.py => parser.py} (91%) create mode 100644 cpp_browser/test.cpp create mode 100644 cpp_browser/tmp.cpp create mode 100644 tests/nested-p.html diff --git a/browser/graphics.py b/browser/graphics.py index 64db911..6fe1205 100644 --- a/browser/graphics.py +++ b/browser/graphics.py @@ -1,7 +1,7 @@ -from browser.lexer import HTMLParser +from browser.parser import HTMLParser from browser.request import request from browser.constants import VSTEP, HEIGHT, WIDTH, SCROLL_STEP -from browser.layout import Layout +from browser.layout import DocumentLayout import tkinter import tkinter.font @@ -25,7 +25,9 @@ def scrollup(self, e): self.draw() def scrolldown(self, e): - self.scroll += SCROLL_STEP + # Prevent scrolling past bottom of the page + max_y = max(self.document.height - HEIGHT, 0) + self.scroll = min(self.scroll + SCROLL_STEP, max_y) self.draw() @@ -34,18 +36,25 @@ def scrolldown(self, e): # (i.e. how far the user is down the page) def draw(self): self.canvas.delete("all") - for x, y, c, font in self.display_list: - if y > self.scroll + HEIGHT: + for cmd in self.display_list: + if cmd.top > self.scroll + HEIGHT: continue - if y + VSTEP < self.scroll: + if cmd.bottom + VSTEP < self.scroll: continue - self.canvas.create_text(x, y - self.scroll, text=c, anchor='nw', font=font) + + cmd.execute(self.scroll, self.canvas) + # Renders the contents of the url to the canvas def load(self, url): headers, body = request(url) - self.node = HTMLParser(body).parse() - self.display_list = Layout(self.node).display_list + self.nodes = HTMLParser(body).parse() + self.document = DocumentLayout(self.nodes) + self.document.layout() + + # The display_list consists of commands like DrawText and DrawRect + self.display_list = [] + self.document.paint(self.display_list) self.draw() diff --git a/browser/layout.py b/browser/layout.py index cda0abc..950f0a2 100644 --- a/browser/layout.py +++ b/browser/layout.py @@ -1,5 +1,5 @@ from browser.constants import HSTEP, VSTEP, WIDTH -from browser.lexer import Text +from browser.parser import Text, Element import tkinter.font FONTS = {} @@ -16,29 +16,178 @@ def get_font(size, weight, slant): ) return FONTS[key] -# Represents the layout of the web page (including font, position, size, etc.) -class Layout: - def __init__(self, node): + +# DrawText represents a display_list command to draw text to screen +class DrawText: + def __init__(self, x1, y1, text, font): + self.top = y1 + self.left = x1 + self.text = text + self.font = font + self.bottom = y1 + font.metrics("linespace") + + # Draws text to the given canvas + def execute(self, scroll, canvas): + canvas.create_text( + self.left, + self.top - scroll, + text=self.text, + anchor="nw", + font=self.font, + ) + + +# DrawRect represents a display_list command to draw rectangles to screen +class DrawRect: + def __init__(self, x1, y1, x2, y2, color): + self.top = y1 + self.left = x1 + self.bottom = y2 + self.right = x2 + self.color = color + + # Draws rectangle to the given canvas + def execute(self, scroll, canvas: tkinter.Canvas): + canvas.create_rectangle( + self.left, self.top - scroll, + self.right, self.bottom - scroll, + width=0, # border width + fill=self.color, + ) + + +# Represents the layout of a block element +class BlockLayout: + BLOCK_ELEMENTS = [ + "html", + "body", + "article", + "section", + "nav", + "aside", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "hgroup", + "header", + "footer", + "address", + "p", + "hr", + "pre", + "blockquote", + "ol", + "ul", + "menu", + "li", + "dl", + "dt", + "dd", + "figure", + "figcaption", + "main", + "div", + "table", + "form", + "fieldset", + "legend", + "details", + "summary", + ] + + def __init__(self, node: Element, parent: Element, previous: "BlockLayout"): + self.node = node + self.parent = parent + self.previous = previous + self.children = [] self.display_list = [] - self.cursor_x = HSTEP - self.cursor_y = VSTEP - self.weight = "normal" - self.style = "roman" - self.size = 16 - - # A list of (x, word, font) tuples representing the current line - # The final display_list is computed by aligning the words along the - # bottom of the line - self.line = [] - self.recurse(node) + def paint(self, display_list): + if isinstance(self.node, Element) and self.node.tag == "pre": + x2, y2 = self.x + self.width, self.y + self.height + display_list.append(DrawRect(self.x, self.y, x2, y2, "gray")) + + for child in self.children: + child.paint(display_list) + + for x, y, word, font in self.display_list: + display_list.append(DrawText(x, y, word, font)) + + """ + Returns the type of layout of the given HTML nodes + """ + + @staticmethod + def layout_mode(node): + if isinstance(node, Text): + return "inline" + elif node.children: + if any( + [ + isinstance(child, Element) + and child.tag in BlockLayout.BLOCK_ELEMENTS + for child in node.children + ] + ): + return "block" + else: + return "inline" + else: + return "block" + + def layout(self): + # Compute x, y, and width from parent/previous sibling element + self.width = self.parent.width + self.x = self.parent.x + + if self.previous: + self.y = self.previous.y + self.previous.height + else: + self.y = self.parent.y + + mode = BlockLayout.layout_mode(self.node) + if mode == "block": + previous = None + # Create a BlockLayout for every child in the HTML tree of the current node + for child in self.node.children: + next = BlockLayout(child, self, previous) + self.children.append(next) + previous = next + else: + self.display_list = [] + self.cursor_x = 0 + self.cursor_y = 0 + self.weight = "normal" + self.style = "roman" + self.size = 16 + + # A list of (x, word, font) tuples representing the current line + # The final display_list is computed by aligning the words along the + # bottom of the line + self.line = [] - # Flushy any remaining layout elements - self.flush() + self.recurse(self.node) - ''' + # Flushy any remaining layout elements + self.flush() + + # Recursively layout each block child + for child in self.children: + child.layout() + self.display_list.extend(child.display_list) + + if mode == "block": + self.height = sum([child.height for child in self.children]) + else: + self.height = self.cursor_y + + """ Recursively layout the parsed HTML tree - ''' + """ + def recurse(self, node): if isinstance(node, Text): self.add_text(node) @@ -47,10 +196,11 @@ def recurse(self, node): for child in node.children: self.recurse(child) self.close_tag(node.tag) - - ''' + + """ Updates the current weight/style/size based on the given open tag - ''' + """ + def open_tag(self, tag): if tag == "i": self.style = "italic" @@ -65,9 +215,10 @@ def open_tag(self, tag): elif tag == "br": self.flush() - ''' + """ Updates the current weight/style/size based on the given close tag - ''' + """ + def close_tag(self, tag): if tag == "i": self.style = "roman" @@ -87,10 +238,8 @@ def add_text(self, token): font = get_font(self.size, self.weight, self.style) for word in token.text.split(): w = font.measure(word) - - if self.cursor_x + w >= WIDTH - HSTEP: + if self.cursor_x + w > self.width: self.flush() - self.line.append((self.cursor_x, word, font)) self.cursor_x += w + font.measure(" ") @@ -107,11 +256,31 @@ def flush(self): max_descent = max([metric["descent"] for metric in metrics]) baseline = self.cursor_y + max_ascent - for x, word, font in self.line: - y = baseline - font.metrics("ascent") + for rel_x, word, font in self.line: + x = self.x + rel_x + y = self.y + baseline - font.metrics("ascent") self.display_list.append((x, y, word, font)) self.cursor_y = baseline + 1.25 * max_descent - self.cursor_x = HSTEP + self.cursor_x = 0 self.line = [] + + +class DocumentLayout: + def __init__(self, node): + self.node = node + self.parent = None + self.children = [] + + def paint(self, display_list): + self.children[0].paint(display_list) + + def layout(self): + child = BlockLayout(self.node, self, None) + self.children.append(child) + self.x = HSTEP + self.y = VSTEP + self.width = WIDTH - 2 * HSTEP + child.layout() + self.height = child.height + 2 * VSTEP diff --git a/browser/main.py b/browser/main.py index c41a55e..3427bbf 100644 --- a/browser/main.py +++ b/browser/main.py @@ -8,6 +8,9 @@ if len(sys.argv) == 2: browser.load(sys.argv[1]) else: - browser.load("https://example.org") + browser.load("file:///home/vever/cs/browser/browser/tests/index.html") + browser.load("https://browser.engineering/layout.html") + + # browser.load("https://www.w3.org/Style/CSS/Test/CSS1/current/test5526c.htm") tkinter.mainloop() \ No newline at end of file diff --git a/browser/lexer.py b/browser/parser.py similarity index 91% rename from browser/lexer.py rename to browser/parser.py index 3f1808e..df05d7c 100644 --- a/browser/lexer.py +++ b/browser/parser.py @@ -153,20 +153,20 @@ def print_tree(node, indent=0): print_tree(child, indent + 2) -k = HTMLParser( - """ - - - - - - Document - - - Hi! this is a test - bold text - -""" -) - -print_tree(k.parse()) +# k = HTMLParser( +# """ +# +# +# +# +# +# Document +# +# +# Hi! this is a test +# bold text +# +# """ +# ) + +# print_tree(k.parse()) diff --git a/cpp_browser/test.cpp b/cpp_browser/test.cpp new file mode 100644 index 0000000..42e1b3c --- /dev/null +++ b/cpp_browser/test.cpp @@ -0,0 +1,88 @@ +// #include "request.h" +// #include +// using namespace std; + +// int main(int argc, char **argv) { +// if (argc > 2) { +// cout << "Usage: ./browser \n"; +// return 0; +// } + +// string url = (argc == 1) ? "https://example.org" : argv[1]; +// HttpResponse httpResponse = sendGetRequest(url); + +// httpResponse.print(); + + +// gtk_init(&argc, &argv); + +// } + + +#include +#include +#include "request.h" +class MyWindow : public Gtk::Window +{ +public: + MyWindow(); + +protected: + Gtk::Button m_button; + Gtk::TextView m_TextView; + void on_button_clicked(); +}; + +MyWindow::MyWindow() +:m_button("Hello World") +{ + set_title("Basic application"); + set_default_size(600, 400); + // Sets the margin around the button. + m_button.set_margin(10); + + // When the button receives the "clicked" signal, it will call the + // on_button_clicked() method defined below. + m_button.signal_clicked().connect(sigc::mem_fun(*this, + &MyWindow::on_button_clicked)); + + // This packs the button into the Window (a container). + // set_child(m_button); + + + auto refTagMatch = Gtk::TextBuffer::Tag::create(); + refTagMatch->property_background() = "orange"; + auto refTagTable = Gtk::TextBuffer::TagTable::create(); + refTagTable->add(refTagMatch); + //Hopefully a future version of gtkmm will have a set_tag_table() method, + //for use after creation of the buffer. + auto refBuffer = Gtk::TextBuffer::create(refTagTable); + refBuffer->insert(refBuffer->get_iter_at_offset(0), "abcd"); + + auto regTextBuffer = Gtk::TextBuffer::create(); + regTextBuffer->set_text("this is some text"); + m_TextView.set_buffer(refBuffer); + + m_TextView.set_editable(false); + m_TextView.set_cursor_visible(false); + set_child(m_TextView); + // auto refBuffer = m_View1.get_buffer(); + // Glib::RefPtr refTag = refBuffer->create_tag("heading"); + // refTag->property_weight() = Pango::Weight::BOLD; + // refTag->property_size() = 15 * Pango::SCALE; + // auto iter = refBuffer->get_iter_at_offset(0); + // refBuffer->insert(iter, "The text widget can display text with all kinds of nifty attributes. It also supports multiple views of the same buffer; this demo is showing the same buffer in two places.\n\n"); +} + +void MyWindow::on_button_clicked(){ + std::cout<<"Hello World\n"; + HttpResponse httpResponse = sendGetRequest("https://example.org/index.html"); + httpResponse.print(); +} + +int main(int argc, char* argv[]) +{ + auto app = Gtk::Application::create("org.gtkmm.examples.base"); + + return app->make_window_and_run(argc, argv); +} \ No newline at end of file diff --git a/cpp_browser/tmp.cpp b/cpp_browser/tmp.cpp new file mode 100644 index 0000000..a353036 --- /dev/null +++ b/cpp_browser/tmp.cpp @@ -0,0 +1,97 @@ +#ifndef LEXER_H +#define LEXER_H + +#include +#include +using namespace std; + +enum TokenType { TEXT = 0, TAG = 1 }; + +struct Token { + string text; + TokenType type; + + Token(string t, TokenType ty); +}; + +// Tokenizes the given HTTP body into tags ("

", "

") and text ("hello") +vector lex(string body); + +/** + * Composite design pattern; element represents a node in the parsed HTML tree + */ +class Element { + public: + string text; + Element* parent; + virtual string toString(); +}; + +class TextElement : public Element { + TextElement(string t, Element* p); +}; + +class TagElement : public Element { + vector children; + map attributes; + TagElement(string t, map attr, Element* p); +}; + +/** + * An non-leaf element node (open and close tag) that contains zero or multiple + * children elements + */ +struct Element { + string tag; + vector children; + map attributes; + Element* parent; + + Element(string t, map attr, Element* p); + string toString(); +}; + +/** + * A leaf-node text element (e.g. the "abcd" in

"abcd"

) + */ +struct Text { + string text; + Element* parent; + + Text(string t, Element* p); + string toString(); +}; + +class HTMLParser { + public: + // Reads the tokens of the body and updates the unfinished list of tags + // Returns the root element of the HTML page + Element* parse(); + HTMLParser(string body); + + private: + vector unfinished; + + // Adds the current text or tag Element to the tree + void add_text(string text); + void add_tag(string tag); + + // Convert the incomplete tree (

unfinished) to a complete tree by + // closing off the open tags. Returns the root element of the HTML page + Element* finish(); + + // Returns the pair {tag, attributes} for a tag, e.g. + static pair> getAttributes(string text); + + // The raw HTML string to parse + string body; +}; + +void print_tree(Element* root, int indent = 0); + +const vector selfClosingTags = { + "area", "base", "br", "col", "embed", "hr", "img", + "input", "link", "meta", "param", "source", "track", "wbr", +}; + +#endif \ No newline at end of file diff --git a/tests/index.html b/tests/index.html index c453c7e..10f1d87 100644 --- a/tests/index.html +++ b/tests/index.html @@ -16,8 +16,15 @@

title

Some text in a paragraph

+
+ some text in a div + more text in dev +
+ +

+ this is a big text! +

- this is a big text! bold again ! @@ -28,6 +35,9 @@

title

some text tm +
+        this is some code
+    
CSS (Cascading Style Sheets) diff --git a/tests/nested-p.html b/tests/nested-p.html new file mode 100644 index 0000000..75f6f40 --- /dev/null +++ b/tests/nested-p.html @@ -0,0 +1,12 @@ +abcd +bcde +
+ abcd +
+abcd +
+

another div

+
+ + +

hello

\ No newline at end of file