Python module refactor (#158)

* switch to python3.10 * change Python API names - Enum Format.ST = "syntax-tree" - Class data member USFMParser.syntax_tree - Class data member USFMParser.USFM_bytes - Class member function USFMParser.to_syntax_tree() - Class member function USFMParser.to_dict() - Class member function USFMParser.to_list() - Class member function USFMParser.to_markdown() - Class member function USFMParser.to_usx() * use match-case in place of if-else when useful * update the API guide jupyter notebook with new names * use lxml library instead of xml * keep class members all in lowercase: usfm, usfm_bytes
Bridgeconn · Aug 10, 2022 · 990f8b1 · 990f8b1
1 parent 897de5c
commit 990f8b1
Show file tree

Hide file tree

Showing 4 changed files with 222 additions and 300 deletions.
diff --git a/.gitignore b/.gitignore
@@ -35,6 +35,7 @@ build/Release
 # Dependency directories
 */node_modules/
 */ENV/
+*/ENV*/
 jspm_packages/
 
 # Typescript v1 declaration files

diff --git a/python-usfm-parser/API guide for python usfm_grammar.ipynb b/python-usfm-parser/API guide for python usfm_grammar.ipynb
@@ -8,7 +8,7 @@
    "outputs": [],
    "source": [
     "import sys\n",
-    "sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV/lib/python3.8/site-packages')\n"
+    "sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV3.10/lib/python3.10/site-packages')\n"
    ]
   },
   {
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "b3d034a2",
    "metadata": {},
    "outputs": [],
@@ -89,7 +89,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "my_parser.toDict()"
+    "my_parser.to_dict()"
    ]
   },
   {
@@ -99,7 +99,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "my_parser.toDict(Filter.ALL.value)"
+    "my_parser.to_dict(Filter.ALL.value)"
    ]
   },
   {
@@ -109,7 +109,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "my_parser.toDict(Filter.NOTES.value)"
+    "my_parser.to_dict(Filter.NOTES.value)"
    ]
   },
   {
@@ -118,7 +118,9 @@
    "id": "e4f49981",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "my_parser.to_dict(Filter.NOTES_TEXT.value)"
+   ]
   },
   {
    "cell_type": "code",
@@ -127,7 +129,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "table_output = my_parser.toTable()\n",
+    "table_output = my_parser.to_list()\n",
     "table_output\n"
    ]
   },
@@ -156,7 +158,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "table_output = my_parser.toTable(Filter.NOTES.value)\n",
+    "table_output = my_parser.to_list(Filter.NOTES.value)\n",
     "print(\"\\n\".join([\"\\t\".join(row) for row in table_output]))\n"
    ]
   },
@@ -175,7 +177,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "my_parser.toDict(Filter.SCRIPTURE_PARAGRAPHS.value)"
+    "my_parser.to_dict(Filter.SCRIPTURE_PARAGRAPHS.value)"
    ]
   },
   {
@@ -185,7 +187,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "table_output = my_parser.toTable(Filter.SCRIPTURE_PARAGRAPHS.value)\n",
+    "table_output = my_parser.to_list(Filter.SCRIPTURE_PARAGRAPHS.value)\n",
     "print(\"\\n\".join([\"\\t\".join(row) for row in table_output]))\n"
    ]
   },
@@ -206,7 +208,7 @@
    "source": [
     "import xml.etree.ElementTree as ET\n",
     "\n",
-    "usx_elem = my_parser.toUSX()\n",
+    "usx_elem = my_parser.to_usx()\n",
     "usx_str = ET.tostring(usx_elem, encoding=\"unicode\")"
    ]
   },
@@ -226,40 +228,25 @@
    "id": "295dae47",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "!pip install lxml"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "583efddc",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "!pip install rnc2rng"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "2bd40ba2",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'<usx version=\"3.0\"><book code=\"GEN\" style=\"id\" /><chapter number=\"1\" style=\"c\" sid=\"GEN 1\" /><para style=\"p\" /><chapter eid=\"GEN 1\" /></usx>'"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import sys\n",
-    "sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV/lib/python3.8/site-packages')\n",
+    "sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV3.10/lib/python3.10/site-packages')\n",
     "\n",
     "\n",
     "from usfm_grammar import USFMParser, Filter\n",
@@ -268,15 +255,15 @@
     "input_usfm_str = open(\"origin.usfm\",\"r\", encoding='utf8').read()\n",
     "my_parser = USFMParser(input_usfm_str)\n",
     "\n",
-    "usx_elem = my_parser.toUSX()\n",
+    "usx_elem = my_parser.to_usx()\n",
     "usx_str = ET.tostring(usx_elem, encoding=\"unicode\")\n",
     "\n",
     "usx_str"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "id": "a680a0b6",
    "metadata": {},
    "outputs": [],
@@ -289,18 +276,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "id": "0fac8a56",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "valid\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "\n",
@@ -323,40 +302,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "id": "1ea6bb28",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "valid\n"
-     ]
-    }
-   ],
-   "source": [
-    "origin_usx_str = '''<usx version=\"3.0\">\n",
-    "  <book code=\"GEN\" style=\"id\" />\n",
-    "  <para style=\"mt1\">MARK</para>\n",
-    "  <chapter number=\"1\" style=\"c\" sid=\"GEN 1\" />\n",
-    "  <para style=\"p\">\n",
-    "    <verse number=\"1\" style=\"v\" sid=\"GEN 1:1\" />\n",
-    "    verse one \n",
-    "    <verse eid=\"GEN 1:1\" />\n",
-    "    <verse number=\"2\" style=\"v\" sid=\"GEN 1:2\" />\n",
-    "    verse two\n",
-    "    <verse eid=\"GEN 1:2\" />\n",
-    "  </para>\n",
-    "  <chapter eid=\"GEN 1\" />\n",
-    "</usx>'''\n",
-    "usx_f = StringIO(origin_usx_str)\n",
-    "doc = etree.parse(usx_f)\n",
-    "if relaxng.validate(doc):\n",
-    "    print(\"valid\")\n",
-    "else:\n",
-    "    relaxng.assertValid(doc)"
-   ]
+   "outputs": [],
+   "source": []
   },
   {
    "cell_type": "code",
@@ -368,56 +318,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "id": "8d12593b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "valid\n"
-     ]
-    }
-   ],
-   "source": [
-    "empty_usx_str = '''<usx version=\"3.0.0\">\n",
-    "  <book code=\"GEN\" style=\"id\" />\n",
-    "  <chapter number=\"1\" style=\"c\" sid=\"GEN 1\" />\n",
-    "  <para style=\"p\">\n",
-    "    <verse number=\"1\" style=\"v\" altnumber=\"2\" pubnumber=\"B\" sid=\"GEN 1:22\" />\n",
-    "    verse one\n",
-    "  </para>\n",
-    "  <chapter eid=\"GEN 1\" />\n",
-    "\n",
-    "</usx>'''\n",
-    "usx_f = StringIO(empty_usx_str)\n",
-    "doc = etree.parse(usx_f)\n",
-    "if relaxng.validate(doc):\n",
-    "    print(\"valid\")\n",
-    "else:\n",
-    "    relaxng.assertValid(doc)"
-   ]
+   "outputs": [],
+   "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "818e36d9",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'(File (book (id (bookcode) (description))) (mtBlock (mt (numberedLevelMax4) (text))) (chapter (c (chapterNumber)) (paragraph (p))))'"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "my_parser.toAST()"
+    "my_parser.to_syntax_tree()"
    ]
   },
   {
@@ -445,7 +359,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.10.6"
   }
  },
  "nbformat": 4,

diff --git a/python-usfm-parser/requirements.txt b/python-usfm-parser/requirements.txt
@@ -1,2 +1,4 @@
 tree-sitter==0.20.0
-jupyterlab==3.3.2
+jupyterlab==3.4.4
+rnc2rng==2.6.6
+lxml==4.9.1