-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
11f1a3a
commit 142aa81
Showing
28 changed files
with
146 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,15 @@ | ||
from dedoc.structure_extractors.patterns.bracket_list_pattern import BracketListPattern | ||
from dedoc.structure_extractors.patterns.bracket_roman_list_pattern import BracketRomanListPattern | ||
from dedoc.structure_extractors.patterns.bullet_list_pattern import BulletListPattern | ||
from dedoc.structure_extractors.patterns.dotted_list_pattern import DottedListPattern | ||
from dedoc.structure_extractors.patterns.letter_list_pattern import LetterListPattern | ||
from dedoc.structure_extractors.patterns.regexp_pattern import RegexpPattern | ||
from dedoc.structure_extractors.patterns.roman_list_pattern import RomanListPattern | ||
from dedoc.structure_extractors.patterns.start_word_pattern import StartWordPattern | ||
from dedoc.structure_extractors.patterns.tag_header_pattern import TagHeaderPattern | ||
from dedoc.structure_extractors.patterns.tag_list_pattern import TagListPattern | ||
from dedoc.structure_extractors.patterns.tag_pattern import TagPattern | ||
from dedoc.structure_extractors.patterns.tag_type_pattern import TagTypePattern | ||
|
||
__all__ = ["BracketListPattern", "BulletListPattern", "DottedListPattern", "LetterListPattern", "TagHeaderPattern", "TagListPattern"] | ||
__all__ = ["BracketListPattern", "BracketRomanListPattern", "BulletListPattern", "DottedListPattern", "LetterListPattern", "RegexpPattern", "RomanListPattern", | ||
"StartWordPattern", "TagHeaderPattern", "TagListPattern", "TagPattern", "TagTypePattern"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,23 @@ | ||
import re | ||
from typing import Optional | ||
|
||
from dedoc.data_structures.hierarchy_level import HierarchyLevel | ||
from dedoc.data_structures.line_with_meta import LineWithMeta | ||
from dedoc.structure_extractors.patterns.abstract_pattern import AbstractPattern | ||
|
||
|
||
class RegexpPattern(AbstractPattern): | ||
__name = "regexp" | ||
|
||
def __init__(self, regexp: str or re.Pattern, line_type: str, level_1: int, level_2: Optional[int] = None, can_be_multiline: bool = False) -> None: | ||
def __init__(self, | ||
regexp: str or re.Pattern, | ||
line_type: Optional[str] = None, | ||
level_1: Optional[int] = None, | ||
level_2: Optional[int] = None, | ||
can_be_multiline: Optional[bool] = None) -> None: | ||
super().__init__(line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline) | ||
self._regexp = re.compile(regexp) if isinstance(regexp, str) else regexp | ||
|
||
def match(self, line: LineWithMeta) -> bool: | ||
text = line.line.strip().lower() | ||
match = self._regexp.match(text) | ||
return match is not None | ||
|
||
def get_hierarchy_level(self, line: LineWithMeta) -> HierarchyLevel: | ||
return HierarchyLevel(line_type=self._line_type, level_1=self._level_1, level_2=self._level_2, can_be_multiline=self._can_be_multiline) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,21 @@ | ||
from typing import Optional | ||
|
||
from dedoc.data_structures.hierarchy_level import HierarchyLevel | ||
from dedoc.data_structures.line_with_meta import LineWithMeta | ||
from dedoc.structure_extractors.patterns.abstract_pattern import AbstractPattern | ||
|
||
|
||
class StartWordPattern(AbstractPattern): | ||
__name = "start_word" | ||
|
||
def __init__(self, start_word: str, line_type: str, level_1: int, level_2: Optional[int] = None, can_be_multiline: bool = False) -> None: | ||
def __init__(self, | ||
start_word: str, | ||
line_type: Optional[str] = None, | ||
level_1: Optional[int] = None, | ||
level_2: Optional[int] = None, | ||
can_be_multiline: Optional[bool] = None) -> None: | ||
super().__init__(line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline) | ||
self.__start_word = start_word.strip().lower() | ||
|
||
def match(self, line: LineWithMeta) -> bool: | ||
text = line.line.strip().lower() | ||
return text.startswith(self.__start_word) | ||
|
||
def get_hierarchy_level(self, line: LineWithMeta) -> HierarchyLevel: | ||
return HierarchyLevel(line_type=self._line_type, level_1=self._level_1, level_2=self._level_2, can_be_multiline=self._can_be_multiline) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from dedoc.data_structures.line_with_meta import LineWithMeta | ||
from dedoc.structure_extractors.patterns.abstract_pattern import AbstractPattern | ||
|
||
|
||
class TagPattern(AbstractPattern): | ||
__name = "tag" | ||
|
||
def match(self, line: LineWithMeta) -> bool: | ||
return line.metadata.tag_hierarchy_level is not None |
Oops, something went wrong.