Skip to content

Commit

Permalink
Python module refactor (#158)
Browse files Browse the repository at this point in the history
* switch to python3.10

* change Python API names
 - Enum Format.ST = "syntax-tree"
 - Class data member USFMParser.syntax_tree
 - Class data member USFMParser.USFM_bytes
 - Class member function USFMParser.to_syntax_tree()
 - Class member function USFMParser.to_dict()
 - Class member function USFMParser.to_list()
 - Class member function USFMParser.to_markdown()
 - Class member function USFMParser.to_usx()

* use match-case in place of if-else when useful

* update the API guide jupyter notebook with new names

* use lxml library instead of xml

* keep class members all in lowercase: usfm, usfm_bytes
  • Loading branch information
kavitharaju authored Aug 10, 2022
1 parent 897de5c commit 990f8b1
Show file tree
Hide file tree
Showing 4 changed files with 222 additions and 300 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ build/Release
# Dependency directories
*/node_modules/
*/ENV/
*/ENV*/
jspm_packages/

# Typescript v1 declaration files
Expand Down
150 changes: 32 additions & 118 deletions python-usfm-parser/API guide for python usfm_grammar.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV/lib/python3.8/site-packages')\n"
"sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV3.10/lib/python3.10/site-packages')\n"
]
},
{
Expand All @@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "b3d034a2",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -89,7 +89,7 @@
"metadata": {},
"outputs": [],
"source": [
"my_parser.toDict()"
"my_parser.to_dict()"
]
},
{
Expand All @@ -99,7 +99,7 @@
"metadata": {},
"outputs": [],
"source": [
"my_parser.toDict(Filter.ALL.value)"
"my_parser.to_dict(Filter.ALL.value)"
]
},
{
Expand All @@ -109,7 +109,7 @@
"metadata": {},
"outputs": [],
"source": [
"my_parser.toDict(Filter.NOTES.value)"
"my_parser.to_dict(Filter.NOTES.value)"
]
},
{
Expand All @@ -118,7 +118,9 @@
"id": "e4f49981",
"metadata": {},
"outputs": [],
"source": []
"source": [
"my_parser.to_dict(Filter.NOTES_TEXT.value)"
]
},
{
"cell_type": "code",
Expand All @@ -127,7 +129,7 @@
"metadata": {},
"outputs": [],
"source": [
"table_output = my_parser.toTable()\n",
"table_output = my_parser.to_list()\n",
"table_output\n"
]
},
Expand Down Expand Up @@ -156,7 +158,7 @@
"metadata": {},
"outputs": [],
"source": [
"table_output = my_parser.toTable(Filter.NOTES.value)\n",
"table_output = my_parser.to_list(Filter.NOTES.value)\n",
"print(\"\\n\".join([\"\\t\".join(row) for row in table_output]))\n"
]
},
Expand All @@ -175,7 +177,7 @@
"metadata": {},
"outputs": [],
"source": [
"my_parser.toDict(Filter.SCRIPTURE_PARAGRAPHS.value)"
"my_parser.to_dict(Filter.SCRIPTURE_PARAGRAPHS.value)"
]
},
{
Expand All @@ -185,7 +187,7 @@
"metadata": {},
"outputs": [],
"source": [
"table_output = my_parser.toTable(Filter.SCRIPTURE_PARAGRAPHS.value)\n",
"table_output = my_parser.to_list(Filter.SCRIPTURE_PARAGRAPHS.value)\n",
"print(\"\\n\".join([\"\\t\".join(row) for row in table_output]))\n"
]
},
Expand All @@ -206,7 +208,7 @@
"source": [
"import xml.etree.ElementTree as ET\n",
"\n",
"usx_elem = my_parser.toUSX()\n",
"usx_elem = my_parser.to_usx()\n",
"usx_str = ET.tostring(usx_elem, encoding=\"unicode\")"
]
},
Expand All @@ -226,40 +228,25 @@
"id": "295dae47",
"metadata": {},
"outputs": [],
"source": [
"!pip install lxml"
]
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "583efddc",
"metadata": {},
"outputs": [],
"source": [
"!pip install rnc2rng"
]
"source": []
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "2bd40ba2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'<usx version=\"3.0\"><book code=\"GEN\" style=\"id\" /><chapter number=\"1\" style=\"c\" sid=\"GEN 1\" /><para style=\"p\" /><chapter eid=\"GEN 1\" /></usx>'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV/lib/python3.8/site-packages')\n",
"sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV3.10/lib/python3.10/site-packages')\n",
"\n",
"\n",
"from usfm_grammar import USFMParser, Filter\n",
Expand All @@ -268,15 +255,15 @@
"input_usfm_str = open(\"origin.usfm\",\"r\", encoding='utf8').read()\n",
"my_parser = USFMParser(input_usfm_str)\n",
"\n",
"usx_elem = my_parser.toUSX()\n",
"usx_elem = my_parser.to_usx()\n",
"usx_str = ET.tostring(usx_elem, encoding=\"unicode\")\n",
"\n",
"usx_str"
]
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": null,
"id": "a680a0b6",
"metadata": {},
"outputs": [],
Expand All @@ -289,18 +276,10 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": null,
"id": "0fac8a56",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"valid\n"
]
}
],
"outputs": [],
"source": [
"\n",
"\n",
Expand All @@ -323,40 +302,11 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": null,
"id": "1ea6bb28",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"valid\n"
]
}
],
"source": [
"origin_usx_str = '''<usx version=\"3.0\">\n",
" <book code=\"GEN\" style=\"id\" />\n",
" <para style=\"mt1\">MARK</para>\n",
" <chapter number=\"1\" style=\"c\" sid=\"GEN 1\" />\n",
" <para style=\"p\">\n",
" <verse number=\"1\" style=\"v\" sid=\"GEN 1:1\" />\n",
" verse one \n",
" <verse eid=\"GEN 1:1\" />\n",
" <verse number=\"2\" style=\"v\" sid=\"GEN 1:2\" />\n",
" verse two\n",
" <verse eid=\"GEN 1:2\" />\n",
" </para>\n",
" <chapter eid=\"GEN 1\" />\n",
"</usx>'''\n",
"usx_f = StringIO(origin_usx_str)\n",
"doc = etree.parse(usx_f)\n",
"if relaxng.validate(doc):\n",
" print(\"valid\")\n",
"else:\n",
" relaxng.assertValid(doc)"
]
"outputs": [],
"source": []
},
{
"cell_type": "code",
Expand All @@ -368,56 +318,20 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": null,
"id": "8d12593b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"valid\n"
]
}
],
"source": [
"empty_usx_str = '''<usx version=\"3.0.0\">\n",
" <book code=\"GEN\" style=\"id\" />\n",
" <chapter number=\"1\" style=\"c\" sid=\"GEN 1\" />\n",
" <para style=\"p\">\n",
" <verse number=\"1\" style=\"v\" altnumber=\"2\" pubnumber=\"B\" sid=\"GEN 1:22\" />\n",
" verse one\n",
" </para>\n",
" <chapter eid=\"GEN 1\" />\n",
"\n",
"</usx>'''\n",
"usx_f = StringIO(empty_usx_str)\n",
"doc = etree.parse(usx_f)\n",
"if relaxng.validate(doc):\n",
" print(\"valid\")\n",
"else:\n",
" relaxng.assertValid(doc)"
]
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"id": "818e36d9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'(File (book (id (bookcode) (description))) (mtBlock (mt (numberedLevelMax4) (text))) (chapter (c (chapterNumber)) (paragraph (p))))'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"my_parser.toAST()"
"my_parser.to_syntax_tree()"
]
},
{
Expand Down Expand Up @@ -445,7 +359,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.10.6"
}
},
"nbformat": 4,
Expand Down
4 changes: 3 additions & 1 deletion python-usfm-parser/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
tree-sitter==0.20.0
jupyterlab==3.3.2
jupyterlab==3.4.4
rnc2rng==2.6.6
lxml==4.9.1
Loading

0 comments on commit 990f8b1

Please sign in to comment.