Skip to content

Commit

Permalink
move names into separate file
Browse files Browse the repository at this point in the history
  • Loading branch information
kyleclo committed Mar 14, 2024
1 parent 2385c23 commit d1c6fb5
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 43 deletions.
14 changes: 6 additions & 8 deletions papermage/magelib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@
"""


from .box import Box
from .document import Document, Prediction
from .entity import Entity
from .image import Image
from .indexer import EntityBoxIndexer, EntitySpanIndexer
from .layer import Layer
from .document import (
from .metadata import Metadata
from .names import (
AbstractsFieldName,
AlgorithmsFieldName,
AuthorsFieldName,
BibliographiesFieldName,
BlocksFieldName,
CaptionsFieldName,
Document,
Prediction,
EntitiesFieldName,
EquationsFieldName,
FiguresFieldName,
Expand All @@ -40,10 +42,6 @@
TokensFieldName,
WordsFieldName,
)
from .entity import Entity
from .image import Image
from .indexer import EntityBoxIndexer, EntitySpanIndexer
from .metadata import Metadata
from .span import Span

__all__ = [
Expand Down
40 changes: 8 additions & 32 deletions papermage/magelib/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,16 @@
from .image import Image
from .layer import Layer
from .metadata import Metadata
from .names import (
EntitiesFieldName,
ImagesFieldName,
MetadataFieldName,
RelationsFieldName,
SymbolsFieldName,
TokensFieldName,
)
from .span import Span

# document field names
SymbolsFieldName = "symbols"
ImagesFieldName = "images"
MetadataFieldName = "metadata"
EntitiesFieldName = "entities"
RelationsFieldName = "relations"

PagesFieldName = "pages"
TokensFieldName = "tokens"
RowsFieldName = "rows"
BlocksFieldName = "blocks"
WordsFieldName = "words"
SentencesFieldName = "sentences"
ParagraphsFieldName = "paragraphs"

# these come from vila
TitlesFieldName = "titles"
AuthorsFieldName = "authors"
AbstractsFieldName = "abstracts"
KeywordsFieldName = "keywords"
SectionsFieldName = "sections"
ListsFieldName = "lists"
BibliographiesFieldName = "bibliographies"
EquationsFieldName = "equations"
AlgorithmsFieldName = "algorithms"
FiguresFieldName = "figures"
TablesFieldName = "tables"
CaptionsFieldName = "captions"
HeadersFieldName = "headers"
FootersFieldName = "footers"
FootnotesFieldName = "footnotes"


class Prediction(NamedTuple):
name: str
Expand Down
5 changes: 2 additions & 3 deletions papermage/magelib/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from .box import Box
from .image import Image
from .metadata import Metadata
from .names import TokensFieldName
from .span import Span

if TYPE_CHECKING:
from .document import TokensFieldName
from .layer import Layer


Expand Down Expand Up @@ -160,8 +160,7 @@ def symbols_from_boxes(self) -> List[str]:
if self.layer.doc.symbols is None:
raise ValueError("This Entity's Document is missing symbols")

# TODO: maybe just an import error for TokensFieldName?
matched_tokens = self.intersect_by_box(name="tokens")
matched_tokens = self.intersect_by_box(name=TokensFieldName)
return [self.layer.doc.symbols[span.start : span.end] for t in matched_tokens for span in t.spans]

@property
Expand Down
37 changes: 37 additions & 0 deletions papermage/magelib/names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
@kylel
"""

# document field names
SymbolsFieldName = "symbols"
ImagesFieldName = "images"
MetadataFieldName = "metadata"
EntitiesFieldName = "entities"
RelationsFieldName = "relations"

PagesFieldName = "pages"
TokensFieldName = "tokens"
RowsFieldName = "rows"
BlocksFieldName = "blocks"
WordsFieldName = "words"
SentencesFieldName = "sentences"
ParagraphsFieldName = "paragraphs"

# these come from vila
TitlesFieldName = "titles"
AuthorsFieldName = "authors"
AbstractsFieldName = "abstracts"
KeywordsFieldName = "keywords"
SectionsFieldName = "sections"
ListsFieldName = "lists"
BibliographiesFieldName = "bibliographies"
EquationsFieldName = "equations"
AlgorithmsFieldName = "algorithms"
FiguresFieldName = "figures"
TablesFieldName = "tables"
CaptionsFieldName = "captions"
HeadersFieldName = "headers"
FootersFieldName = "footers"
FootnotesFieldName = "footnotes"

0 comments on commit d1c6fb5

Please sign in to comment.