From 5fd2aa89c2f5bc9a7279f996ac2a1e670a633bff Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 01:55:28 +0300 Subject: [PATCH 01/16] perform some refactorings --- simple_ddl_parser/ddl_parser.py | 5 +-- simple_ddl_parser/exception.py | 8 +++++ simple_ddl_parser/output/core.py | 2 +- simple_ddl_parser/output/table_data.py | 10 +++--- simple_ddl_parser/parser.py | 8 ++--- simple_ddl_parser/tokens.py | 20 ++++++------ simple_ddl_parser/utils.py | 43 +++++++++++++------------- tests/test_utils.py | 42 +++++++++++++++++++++++++ 8 files changed, 91 insertions(+), 47 deletions(-) create mode 100644 simple_ddl_parser/exception.py create mode 100644 tests/test_utils.py diff --git a/simple_ddl_parser/ddl_parser.py b/simple_ddl_parser/ddl_parser.py index a3b2980..2162ccb 100755 --- a/simple_ddl_parser/ddl_parser.py +++ b/simple_ddl_parser/ddl_parser.py @@ -2,6 +2,7 @@ from ply.lex import LexToken +from simple_ddl_parser.exception import DDLParserError from simple_ddl_parser import tokens as tok from simple_ddl_parser.dialects import ( HQL, @@ -19,10 +20,6 @@ from simple_ddl_parser.parser import Parser -class DDLParserError(Exception): - pass - - class Dialects( SparkSQL, Snowflake, diff --git a/simple_ddl_parser/exception.py b/simple_ddl_parser/exception.py new file mode 100644 index 0000000..98e17bc --- /dev/null +++ b/simple_ddl_parser/exception.py @@ -0,0 +1,8 @@ +__all__ = [ + "DDLParserError", +] + + +class DDLParserError(Exception): + """ Base exception in simple ddl parser library """ + pass diff --git a/simple_ddl_parser/output/core.py b/simple_ddl_parser/output/core.py index f25330c..e21969d 100644 --- a/simple_ddl_parser/output/core.py +++ b/simple_ddl_parser/output/core.py @@ -123,7 +123,7 @@ def group_by_type_result(self) -> None: else: _type.extend(item["comments"]) break - if result_as_dict["comments"] == []: + if not result_as_dict["comments"]: del result_as_dict["comments"] self.final_result = result_as_dict diff --git a/simple_ddl_parser/output/table_data.py b/simple_ddl_parser/output/table_data.py index df07b10..207ddf5 100644 --- a/simple_ddl_parser/output/table_data.py +++ b/simple_ddl_parser/output/table_data.py @@ -13,7 +13,7 @@ def get_dialect_class(cls, kwargs: dict): if output_mode and output_mode != "sql": main_cls = dialect_by_name.get(output_mode) - cls = dataclass( + cls_ = dataclass( type( f"{main_cls.__name__}{cls.cls_prefix}", (main_cls, CommonDialectsFieldsMixin), @@ -21,12 +21,12 @@ def get_dialect_class(cls, kwargs: dict): ) ) else: - cls = BaseData + cls_ = BaseData - return cls + return cls_ @staticmethod - def pre_process_kwargs(kwargs: dict, aliased_fields: dict) -> dict: + def pre_process_kwargs(kwargs: dict, aliased_fields: dict) -> None: for alias, field_name in aliased_fields.items(): if alias in kwargs: kwargs[field_name] = kwargs[alias] @@ -40,7 +40,7 @@ def pre_process_kwargs(kwargs: dict, aliased_fields: dict) -> dict: kwargs["fields_terminated_by"] = "','" @classmethod - def pre_load_mods(cls, main_cls, kwargs): + def pre_load_mods(cls, main_cls, kwargs) -> dict: if kwargs.get("output_mode") == "bigquery": if kwargs.get("schema"): kwargs["dataset"] = kwargs["schema"] diff --git a/simple_ddl_parser/parser.py b/simple_ddl_parser/parser.py index ae60d33..78d11c1 100755 --- a/simple_ddl_parser/parser.py +++ b/simple_ddl_parser/parser.py @@ -6,12 +6,10 @@ from ply import lex, yacc +from simple_ddl_parser.exception import DDLParserError from simple_ddl_parser.output.core import Output, dump_data_to_file from simple_ddl_parser.output.dialects import dialect_by_name -from simple_ddl_parser.utils import ( - SimpleDDLParserException, - find_first_unpair_closed_par, -) +from simple_ddl_parser.utils import find_first_unpair_closed_par # open comment OP_COM = "/*" @@ -348,7 +346,7 @@ def run( Dict == one entity from ddl - one table or sequence or type. """ if output_mode not in dialect_by_name: - raise SimpleDDLParserException( + raise DDLParserError( f"Output mode can be one of possible variants: {dialect_by_name.keys()}" ) self.tables = self.parse_data() diff --git a/simple_ddl_parser/tokens.py b/simple_ddl_parser/tokens.py index acecbbd..6cfde35 100644 --- a/simple_ddl_parser/tokens.py +++ b/simple_ddl_parser/tokens.py @@ -150,8 +150,8 @@ tokens = tuple( - set( - [ + { + *[ "ID", "DOT", "STRING_BASE", @@ -161,14 +161,14 @@ "LT", "RT", "COMMAT", - ] - + list(definition_statements.values()) - + list(common_statements.values()) - + list(columns_definition.values()) - + list(sequence_reserved.values()) - + list(after_columns_tokens.values()) - + list(alter_tokens.values()) - ) + ], + *definition_statements.values(), + *common_statements.values(), + *columns_definition.values(), + *sequence_reserved.values(), + *after_columns_tokens.values(), + *alter_tokens.values(), + } ) symbol_tokens = { diff --git a/simple_ddl_parser/utils.py b/simple_ddl_parser/utils.py index e60fed3..74db81a 100644 --- a/simple_ddl_parser/utils.py +++ b/simple_ddl_parser/utils.py @@ -1,12 +1,17 @@ import re -from typing import List +from typing import List, Tuple, Optional def remove_par(p_list: List[str]) -> List[str]: - remove_list = ["(", ")"] - for symbol in remove_list: - while symbol in p_list: - p_list.remove(symbol) + remove_set = {"(", ")"} + i = j = 0 + while i < len(p_list): + if p_list[i] not in remove_set: + p_list[j] = p_list[i] + j += 1 + i += 1 + while j < len(p_list): + p_list.pop() return p_list @@ -30,18 +35,16 @@ def check_spec(value: str) -> str: return replace_value -def find_first_unpair_closed_par(str_: str) -> int: - stack = [] - n = -1 - for i in str_: - n += 1 - if i == ")": - if not stack: - return n - else: - stack.pop(-1) - elif i == "(": - stack.append(i) +def find_first_unpair_closed_par(str_: str) -> Optional[int]: + count_open = 0 + for i, char in enumerate(str_): + if char == '(': + count_open += 1 + if char == ')': + count_open -= 1 + if count_open < 0: + return i + return None def normalize_name(name: str) -> str: @@ -50,12 +53,8 @@ def normalize_name(name: str) -> str: return re.sub(clean_up_re, "", name).lower() -def get_table_id(schema_name: str, table_name: str): +def get_table_id(schema_name: str, table_name: str) -> Tuple[str, str]: table_name = normalize_name(table_name) if schema_name: schema_name = normalize_name(schema_name) return (table_name, schema_name) - - -class SimpleDDLParserException(Exception): - pass diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..3ee4da9 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,42 @@ +import pytest + +import utils + + +@pytest.mark.parametrize( + "expression, expected_result", + [ + (")", 0), + (")()", 0), + ("())", 2), + ("()())", 4), + ("", None), + ("text", None), + ("()", None), + ("(balanced) (brackets)", None), + ("(not)) (balanced) (brackets", 5) + ] +) +def test_find_first_unpair_closed_par(expression, expected_result): + assert utils.find_first_unpair_closed_par(expression) == expected_result + + +@pytest.mark.parametrize( + "expression, expected_result", + [ + ([], []), + (["("], []), + ([")"], []), + (["(", ")"], []), + (["(", ")"], []), + (["(", "A", ")"], ["A"]), + (["A", ")", ")"], ["A"]), + (["(", "(", "A"], ["A"]), + (["A", "(", "(", "B", "C", "("], ["A", "B", "C"]), + (["A", ")", "B", ")", "(", "C"], ["A", "B", "C"]), + + ] +) +def test_remove_par(expression, expected_result): + assert utils.remove_par(expression) == expected_result + assert utils.old_version(expression) == expected_result From 1d4272d123a10a76048a8ba4d4f927708e8e5606 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 02:06:00 +0300 Subject: [PATCH 02/16] clean up debugging code --- tests/test_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 3ee4da9..4644b73 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -39,4 +39,3 @@ def test_find_first_unpair_closed_par(expression, expected_result): ) def test_remove_par(expression, expected_result): assert utils.remove_par(expression) == expected_result - assert utils.old_version(expression) == expected_result From c1a6e0c6393976a5ad37df4b62579b59fb8b4659 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 02:17:20 +0300 Subject: [PATCH 03/16] refactor table_data.py --- simple_ddl_parser/output/table_data.py | 37 +++++++++++++------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/simple_ddl_parser/output/table_data.py b/simple_ddl_parser/output/table_data.py index 207ddf5..792d4cd 100644 --- a/simple_ddl_parser/output/table_data.py +++ b/simple_ddl_parser/output/table_data.py @@ -3,6 +3,21 @@ from simple_ddl_parser.output.base_data import BaseData from simple_ddl_parser.output.dialects import CommonDialectsFieldsMixin, dialect_by_name +__all__ = [ + "TableData", +] + + +def _pre_process_kwargs(kwargs: dict, aliased_fields: dict) -> None: + for alias, field_name in aliased_fields.items(): + if alias in kwargs: + kwargs[field_name] = kwargs[alias] + del kwargs[alias] + + # todo: need to figure out how workaround it normally + if kwargs.get("fields_terminated_by") == "_ddl_parser_comma_only_str": + kwargs["fields_terminated_by"] = "','" + class TableData: cls_prefix = "Dialect" @@ -13,31 +28,15 @@ def get_dialect_class(cls, kwargs: dict): if output_mode and output_mode != "sql": main_cls = dialect_by_name.get(output_mode) - cls_ = dataclass( + return dataclass( type( f"{main_cls.__name__}{cls.cls_prefix}", (main_cls, CommonDialectsFieldsMixin), {}, ) ) - else: - cls_ = BaseData - - return cls_ - - @staticmethod - def pre_process_kwargs(kwargs: dict, aliased_fields: dict) -> None: - for alias, field_name in aliased_fields.items(): - if alias in kwargs: - kwargs[field_name] = kwargs[alias] - del kwargs[alias] - # todo: need to figure out how workaround it normally - if ( - "fields_terminated_by" in kwargs - and "_ddl_parser_comma_only_str" == kwargs["fields_terminated_by"] - ): - kwargs["fields_terminated_by"] = "','" + return BaseData @classmethod def pre_load_mods(cls, main_cls, kwargs) -> dict: @@ -55,7 +54,7 @@ def pre_load_mods(cls, main_cls, kwargs) -> dict: for name, value in cls_fields.items() if value.metadata and "alias" in value.metadata } - cls.pre_process_kwargs(kwargs, aliased_fields) + _pre_process_kwargs(kwargs, aliased_fields) table_main_args = { k.lower(): v for k, v in kwargs.items() if k.lower() in cls_fields } From d32f7aaf9d4f9f17d4bd502624879cb9b810ea9a Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 02:19:45 +0300 Subject: [PATCH 04/16] improve test --- tests/test_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4644b73..6273bcd 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -29,12 +29,15 @@ def test_find_first_unpair_closed_par(expression, expected_result): ([")"], []), (["(", ")"], []), (["(", ")"], []), + (["(", "A"], ["A"]), + (["A", ")"], ["A"]), (["(", "A", ")"], ["A"]), (["A", ")", ")"], ["A"]), (["(", "(", "A"], ["A"]), + (["A", "B", "C"], ["A", "B", "C"]), (["A", "(", "(", "B", "C", "("], ["A", "B", "C"]), (["A", ")", "B", ")", "(", "C"], ["A", "B", "C"]), - + (["(", "A", ")", "B", "C", ")"], ["A", "B", "C"]), ] ) def test_remove_par(expression, expected_result): From 427cca6cf0ad4660239ee2d674d35db6e49ef3b2 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 02:45:23 +0300 Subject: [PATCH 05/16] refactor check_spec --- simple_ddl_parser/utils.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/simple_ddl_parser/utils.py b/simple_ddl_parser/utils.py index 74db81a..17ca186 100644 --- a/simple_ddl_parser/utils.py +++ b/simple_ddl_parser/utils.py @@ -1,12 +1,24 @@ import re from typing import List, Tuple, Optional +__all__ = [ + "remove_par", + "check_spec", + "find_first_unpair_closed_par", + "normalize_name", + "get_table_id", +] + +_parentheses = {'(', ')'} + def remove_par(p_list: List[str]) -> List[str]: - remove_set = {"(", ")"} + """ + Remove the parentheses from the given list + """ i = j = 0 while i < len(p_list): - if p_list[i] not in remove_set: + if p_list[i] not in _parentheses: p_list[j] = p_list[i] j += 1 i += 1 @@ -23,16 +35,15 @@ def remove_par(p_list: List[str]) -> List[str]: } +# TODO: Add tests def check_spec(value: str) -> str: replace_value = spec_mapper.get(value) - if not replace_value: - for item in spec_mapper: - if item in value: - replace_value = value.replace(item, spec_mapper[item]) - break - else: - replace_value = value - return replace_value + if replace_value: + return replace_value + for item in spec_mapper: + if item in value: + return value.replace(item, spec_mapper[item]) + return value def find_first_unpair_closed_par(str_: str) -> Optional[int]: @@ -48,7 +59,9 @@ def find_first_unpair_closed_par(str_: str) -> Optional[int]: def normalize_name(name: str) -> str: - # clean up [] and " symbols from names + """ + Clean up [] and " characters from the given name + """ clean_up_re = r'[\[\]"]' return re.sub(clean_up_re, "", name).lower() From afceb608506b917c6146cdf6212eee2549db8b15 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 13:40:57 +0300 Subject: [PATCH 06/16] fix tests (fix import) --- tests/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 6273bcd..aff9598 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ import pytest -import utils +from simple_ddl_parser import utils @pytest.mark.parametrize( @@ -28,7 +28,7 @@ def test_find_first_unpair_closed_par(expression, expected_result): (["("], []), ([")"], []), (["(", ")"], []), - (["(", ")"], []), + ([")", "("], []), (["(", "A"], ["A"]), (["A", ")"], ["A"]), (["(", "A", ")"], ["A"]), From 489ac0b8684690c19556f39e69291da4146c536f Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 15:31:54 +0300 Subject: [PATCH 07/16] fix edge case with unhashable types --- simple_ddl_parser/utils.py | 8 +++++--- tests/test_utils.py | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/simple_ddl_parser/utils.py b/simple_ddl_parser/utils.py index 17ca186..53b5f7b 100644 --- a/simple_ddl_parser/utils.py +++ b/simple_ddl_parser/utils.py @@ -1,5 +1,5 @@ import re -from typing import List, Tuple, Optional +from typing import List, Tuple, Optional, Union, Any __all__ = [ "remove_par", @@ -9,12 +9,14 @@ "get_table_id", ] -_parentheses = {'(', ')'} +_parentheses = ('(', ')') -def remove_par(p_list: List[str]) -> List[str]: +def remove_par(p_list: List[Union[str, Any]]) -> List[str]: """ Remove the parentheses from the given list + + Warn: p_list may contain unhashable types for some unexplored reasons """ i = j = 0 while i < len(p_list): diff --git a/tests/test_utils.py b/tests/test_utils.py index aff9598..3530c8c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -38,6 +38,7 @@ def test_find_first_unpair_closed_par(expression, expected_result): (["A", "(", "(", "B", "C", "("], ["A", "B", "C"]), (["A", ")", "B", ")", "(", "C"], ["A", "B", "C"]), (["(", "A", ")", "B", "C", ")"], ["A", "B", "C"]), + ([dict()], [dict()]), # Edge case (unhashable types) ] ) def test_remove_par(expression, expected_result): From 168eb00415aafaf3ea51bf6b8fe6264c56144617 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 15:32:22 +0300 Subject: [PATCH 08/16] fix typing --- simple_ddl_parser/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simple_ddl_parser/utils.py b/simple_ddl_parser/utils.py index 53b5f7b..4209781 100644 --- a/simple_ddl_parser/utils.py +++ b/simple_ddl_parser/utils.py @@ -12,7 +12,7 @@ _parentheses = ('(', ')') -def remove_par(p_list: List[Union[str, Any]]) -> List[str]: +def remove_par(p_list: List[Union[str, Any]]) -> List[Union[str, Any]]: """ Remove the parentheses from the given list From 211249f7bdc2bed66c992e51fdb04fc26b085077 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 17:17:26 +0300 Subject: [PATCH 09/16] Add tests --- simple_ddl_parser/utils.py | 25 +++++++++------- tests/test_utils.py | 60 ++++++++++++++++++++++++++------------ 2 files changed, 57 insertions(+), 28 deletions(-) diff --git a/simple_ddl_parser/utils.py b/simple_ddl_parser/utils.py index 4209781..e26f80b 100644 --- a/simple_ddl_parser/utils.py +++ b/simple_ddl_parser/utils.py @@ -29,7 +29,7 @@ def remove_par(p_list: List[Union[str, Any]]) -> List[Union[str, Any]]: return p_list -spec_mapper = { +_spec_mapper = { "'pars_m_t'": "'\t'", "'pars_m_n'": "'\n'", "'pars_m_dq'": '"', @@ -37,18 +37,23 @@ def remove_par(p_list: List[Union[str, Any]]) -> List[Union[str, Any]]: } -# TODO: Add tests -def check_spec(value: str) -> str: - replace_value = spec_mapper.get(value) - if replace_value: - return replace_value - for item in spec_mapper: - if item in value: - return value.replace(item, spec_mapper[item]) - return value +def check_spec(string: str) -> str: + """ + Replace escape tokens to their representation + """ + if string in _spec_mapper: + return _spec_mapper[string] + for replace_from, replace_to in _spec_mapper.items(): + if replace_from in string: + return string.replace(replace_from, replace_to) + return string def find_first_unpair_closed_par(str_: str) -> Optional[int]: + """ + Returns index of first unpair close parentheses. + Or returns None, if there is no one. + """ count_open = 0 for i, char in enumerate(str_): if char == '(': diff --git a/tests/test_utils.py b/tests/test_utils.py index 3530c8c..38b2980 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,24 +3,6 @@ from simple_ddl_parser import utils -@pytest.mark.parametrize( - "expression, expected_result", - [ - (")", 0), - (")()", 0), - ("())", 2), - ("()())", 4), - ("", None), - ("text", None), - ("()", None), - ("(balanced) (brackets)", None), - ("(not)) (balanced) (brackets", 5) - ] -) -def test_find_first_unpair_closed_par(expression, expected_result): - assert utils.find_first_unpair_closed_par(expression) == expected_result - - @pytest.mark.parametrize( "expression, expected_result", [ @@ -43,3 +25,45 @@ def test_find_first_unpair_closed_par(expression, expected_result): ) def test_remove_par(expression, expected_result): assert utils.remove_par(expression) == expected_result + + +@pytest.mark.parametrize( + "expression, expected_result", + [ + ("", ""), + ("simple", "simple"), + + ("'pars_m_t'", "'\t'"), + ("'pars_m_n'", "'\n'"), + ("'pars_m_dq'", '"'), + ("pars_m_single", "'"), + + ("STRING_'pars_m_t'STRING", "STRING_'\t'STRING"), + ("STRING_'pars_m_n'STRING", "STRING_'\n'STRING"), + ("STRING_'pars_m_dq'STRING", "STRING_\"STRING"), + ("STRING_pars_m_singleSTRING", "STRING_'STRING"), + + ("pars_m_single pars_m_single", "' '"), + ("'pars_m_t''pars_m_n'", "'\t''pars_m_n'"), # determined by dict element order + ] +) +def test_check_spec(expression, expected_result): + assert utils.check_spec(expression) == expected_result + + +@pytest.mark.parametrize( + "expression, expected_result", + [ + (")", 0), + (")()", 0), + ("())", 2), + ("()())", 4), + ("", None), + ("text", None), + ("()", None), + ("(balanced) (brackets)", None), + ("(not)) (balanced) (brackets", 5) + ] +) +def test_find_first_unpair_closed_par(expression, expected_result): + assert utils.find_first_unpair_closed_par(expression) == expected_result From bf8be3180fb2001608071830a6b26394085116c5 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Fri, 14 Jun 2024 17:21:15 +0300 Subject: [PATCH 10/16] fix docstr --- simple_ddl_parser/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simple_ddl_parser/utils.py b/simple_ddl_parser/utils.py index e26f80b..8f5933b 100644 --- a/simple_ddl_parser/utils.py +++ b/simple_ddl_parser/utils.py @@ -16,7 +16,7 @@ def remove_par(p_list: List[Union[str, Any]]) -> List[Union[str, Any]]: """ Remove the parentheses from the given list - Warn: p_list may contain unhashable types for some unexplored reasons + Warn: p_list may contain unhashable types, such as 'dict'. """ i = j = 0 while i < len(p_list): From 768a42587a78893401617a9d0bcb7ccc0d359e26 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Mon, 29 Jul 2024 21:31:52 +0300 Subject: [PATCH 11/16] revert rename: DDLParserError -> SimpleDDLParserException --- docs/README.rst | 6 ++++-- simple_ddl_parser/__init__.py | 4 ++-- simple_ddl_parser/ddl_parser.py | 6 +++--- simple_ddl_parser/exception.py | 4 ++-- simple_ddl_parser/parser.py | 4 ++-- tests/non_statement_tests/test_common.py | 4 ++-- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/README.rst b/docs/README.rst index b7228f4..ddecbe1 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -96,8 +96,10 @@ How to use Extract additional information from HQL (& other dialects) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In some dialects like HQL there is a lot of additional information about table like, fore example, is it external table, STORED AS, location & etc. This property will be always empty in 'classic' SQL DB like PostgreSQL or MySQL and this is the reason, why by default this information are 'hidden'. -Also some fields hidden in HQL, because they are simple not exists in HIVE, for example 'deferrable_initially' +In some dialects like HQL there is a lot of additional information about table like, fore example, is it external table, +STORED AS, location & etc. This property will be always empty in 'classic' SQL DB like PostgreSQL or MySQL +and this is the reason, why by default this information is 'hidden'. +Also some fields are hidden in HQL, because they are simple not exists in HIVE, for example 'deferrable_initially' To get this 'hql' specific details about table in output please use 'output_mode' argument in run() method. example: diff --git a/simple_ddl_parser/__init__.py b/simple_ddl_parser/__init__.py index b204c1c..1f6bddf 100644 --- a/simple_ddl_parser/__init__.py +++ b/simple_ddl_parser/__init__.py @@ -1,6 +1,6 @@ -from simple_ddl_parser.ddl_parser import DDLParser, DDLParserError, parse_from_file +from simple_ddl_parser.ddl_parser import DDLParser, SimpleDDLParserException, parse_from_file from simple_ddl_parser.output.dialects import dialect_by_name supported_dialects = dialect_by_name -__all__ = ["DDLParser", "parse_from_file", "DDLParserError", "supported_dialects"] +__all__ = ["DDLParser", "parse_from_file", "SimpleDDLParserException", "supported_dialects"] diff --git a/simple_ddl_parser/ddl_parser.py b/simple_ddl_parser/ddl_parser.py index 2162ccb..2ebdbb3 100755 --- a/simple_ddl_parser/ddl_parser.py +++ b/simple_ddl_parser/ddl_parser.py @@ -2,7 +2,7 @@ from ply.lex import LexToken -from simple_ddl_parser.exception import DDLParserError +from simple_ddl_parser.exception import SimpleDDLParserException from simple_ddl_parser import tokens as tok from simple_ddl_parser.dialects import ( HQL, @@ -256,11 +256,11 @@ def p_string(self, p): p[0] = "".join(list(p[1:])) def t_error(self, t: LexToken): - raise DDLParserError("Unknown symbol %r" % (t.value[0],)) + raise SimpleDDLParserException("Unknown symbol %r" % (t.value[0],)) def p_error(self, p): if not self.silent: - raise DDLParserError(f"Unknown statement at {p}") + raise SimpleDDLParserException(f"Unknown statement at {p}") def parse_from_file( diff --git a/simple_ddl_parser/exception.py b/simple_ddl_parser/exception.py index 98e17bc..bde2843 100644 --- a/simple_ddl_parser/exception.py +++ b/simple_ddl_parser/exception.py @@ -1,8 +1,8 @@ __all__ = [ - "DDLParserError", + "SimpleDDLParserException", ] -class DDLParserError(Exception): +class SimpleDDLParserException(Exception): """ Base exception in simple ddl parser library """ pass diff --git a/simple_ddl_parser/parser.py b/simple_ddl_parser/parser.py index 78d11c1..ba53245 100755 --- a/simple_ddl_parser/parser.py +++ b/simple_ddl_parser/parser.py @@ -6,7 +6,7 @@ from ply import lex, yacc -from simple_ddl_parser.exception import DDLParserError +from simple_ddl_parser.exception import SimpleDDLParserException from simple_ddl_parser.output.core import Output, dump_data_to_file from simple_ddl_parser.output.dialects import dialect_by_name from simple_ddl_parser.utils import find_first_unpair_closed_par @@ -346,7 +346,7 @@ def run( Dict == one entity from ddl - one table or sequence or type. """ if output_mode not in dialect_by_name: - raise DDLParserError( + raise SimpleDDLParserException( f"Output mode can be one of possible variants: {dialect_by_name.keys()}" ) self.tables = self.parse_data() diff --git a/tests/non_statement_tests/test_common.py b/tests/non_statement_tests/test_common.py index edab4f1..0353d05 100644 --- a/tests/non_statement_tests/test_common.py +++ b/tests/non_statement_tests/test_common.py @@ -1,6 +1,6 @@ import pytest -from simple_ddl_parser import DDLParser, DDLParserError +from simple_ddl_parser import DDLParser, SimpleDDLParserException from simple_ddl_parser.output.core import get_table_id @@ -29,7 +29,7 @@ def test_silent_false_flag(): created_timestamp TIMESTAMPTZ NOT NULL DEFAULT ALTER (now() at time zone 'utc') ); """ - with pytest.raises(DDLParserError) as e: + with pytest.raises(SimpleDDLParserException) as e: DDLParser(ddl, silent=False).run(group_by_type=True) assert "Unknown statement" in e.value[1] From 48446ae9298dea725aa7160629359a3bd13adbd6 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Mon, 29 Jul 2024 21:39:26 +0300 Subject: [PATCH 12/16] add backward compatibility import --- simple_ddl_parser/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/simple_ddl_parser/utils.py b/simple_ddl_parser/utils.py index 8f5933b..8fcbc11 100644 --- a/simple_ddl_parser/utils.py +++ b/simple_ddl_parser/utils.py @@ -1,12 +1,16 @@ import re from typing import List, Tuple, Optional, Union, Any +# Backward compatibility import +from simple_ddl_parser.exception import SimpleDDLParserException + __all__ = [ "remove_par", "check_spec", "find_first_unpair_closed_par", "normalize_name", "get_table_id", + "SimpleDDLParserException" ] _parentheses = ('(', ')') From e2871fe4035fc45e8e7bc2726d4ba558e36199f8 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Mon, 29 Jul 2024 21:52:42 +0300 Subject: [PATCH 13/16] fix DDLParserError for backward compatibility --- simple_ddl_parser/ddl_parser.py | 6 +++--- simple_ddl_parser/exception.py | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/simple_ddl_parser/ddl_parser.py b/simple_ddl_parser/ddl_parser.py index 2ebdbb3..2162ccb 100755 --- a/simple_ddl_parser/ddl_parser.py +++ b/simple_ddl_parser/ddl_parser.py @@ -2,7 +2,7 @@ from ply.lex import LexToken -from simple_ddl_parser.exception import SimpleDDLParserException +from simple_ddl_parser.exception import DDLParserError from simple_ddl_parser import tokens as tok from simple_ddl_parser.dialects import ( HQL, @@ -256,11 +256,11 @@ def p_string(self, p): p[0] = "".join(list(p[1:])) def t_error(self, t: LexToken): - raise SimpleDDLParserException("Unknown symbol %r" % (t.value[0],)) + raise DDLParserError("Unknown symbol %r" % (t.value[0],)) def p_error(self, p): if not self.silent: - raise SimpleDDLParserException(f"Unknown statement at {p}") + raise DDLParserError(f"Unknown statement at {p}") def parse_from_file( diff --git a/simple_ddl_parser/exception.py b/simple_ddl_parser/exception.py index bde2843..bb73bc2 100644 --- a/simple_ddl_parser/exception.py +++ b/simple_ddl_parser/exception.py @@ -6,3 +6,8 @@ class SimpleDDLParserException(Exception): """ Base exception in simple ddl parser library """ pass + + +class DDLParserError(SimpleDDLParserException): + """ An alias for backward compatibility """ + pass From 0bea915236d91dfd3a8b633ba4dfa427cfc12160 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Mon, 29 Jul 2024 21:54:57 +0300 Subject: [PATCH 14/16] fix DDLParserError for backward compatibility --- simple_ddl_parser/ddl_parser.py | 3 ++- simple_ddl_parser/exception.py | 4 ---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/simple_ddl_parser/ddl_parser.py b/simple_ddl_parser/ddl_parser.py index 2162ccb..425f828 100755 --- a/simple_ddl_parser/ddl_parser.py +++ b/simple_ddl_parser/ddl_parser.py @@ -2,7 +2,6 @@ from ply.lex import LexToken -from simple_ddl_parser.exception import DDLParserError from simple_ddl_parser import tokens as tok from simple_ddl_parser.dialects import ( HQL, @@ -17,6 +16,8 @@ Snowflake, SparkSQL, ) +# "DDLParserError" is an alias for backward compatibility +from simple_ddl_parser.exception import SimpleDDLParserException as DDLParserError from simple_ddl_parser.parser import Parser diff --git a/simple_ddl_parser/exception.py b/simple_ddl_parser/exception.py index bb73bc2..5a5070e 100644 --- a/simple_ddl_parser/exception.py +++ b/simple_ddl_parser/exception.py @@ -7,7 +7,3 @@ class SimpleDDLParserException(Exception): """ Base exception in simple ddl parser library """ pass - -class DDLParserError(SimpleDDLParserException): - """ An alias for backward compatibility """ - pass From 5ebab6fe7e4858ce8e9164f948a92a19b3a50d2d Mon Sep 17 00:00:00 2001 From: demitryfly Date: Mon, 29 Jul 2024 21:56:39 +0300 Subject: [PATCH 15/16] one more fix --- simple_ddl_parser/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simple_ddl_parser/__init__.py b/simple_ddl_parser/__init__.py index 1f6bddf..b204c1c 100644 --- a/simple_ddl_parser/__init__.py +++ b/simple_ddl_parser/__init__.py @@ -1,6 +1,6 @@ -from simple_ddl_parser.ddl_parser import DDLParser, SimpleDDLParserException, parse_from_file +from simple_ddl_parser.ddl_parser import DDLParser, DDLParserError, parse_from_file from simple_ddl_parser.output.dialects import dialect_by_name supported_dialects = dialect_by_name -__all__ = ["DDLParser", "parse_from_file", "SimpleDDLParserException", "supported_dialects"] +__all__ = ["DDLParser", "parse_from_file", "DDLParserError", "supported_dialects"] From 1fae5babdcb705753165068321c3c3c005d98728 Mon Sep 17 00:00:00 2001 From: demitryfly Date: Mon, 29 Jul 2024 22:19:27 +0300 Subject: [PATCH 16/16] refactor a bit --- simple_ddl_parser/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/simple_ddl_parser/utils.py b/simple_ddl_parser/utils.py index 8fcbc11..f8f87d8 100644 --- a/simple_ddl_parser/utils.py +++ b/simple_ddl_parser/utils.py @@ -22,12 +22,11 @@ def remove_par(p_list: List[Union[str, Any]]) -> List[Union[str, Any]]: Warn: p_list may contain unhashable types, such as 'dict'. """ - i = j = 0 - while i < len(p_list): + j = 0 + for i in range(len(p_list)): if p_list[i] not in _parentheses: p_list[j] = p_list[i] j += 1 - i += 1 while j < len(p_list): p_list.pop() return p_list