Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ocr showcase2 #875

Closed
wants to merge 37 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
2e156e2
payload factory and its test cases
hepengfe Jul 6, 2022
b94097b
add audio and image payload meta ontology
hepengfe Jul 6, 2022
4f156ab
add meta ontology
hepengfe Jul 6, 2022
793c35c
Merge branch 'master' into lazy_loading
hepengfe Jul 6, 2022
aa7427d
Merge branch 'master' into lazy_loading
hepengfe Jul 6, 2022
6f76e2f
reconstruct the payload ontology
hepengfe Jul 6, 2022
054b4af
add some docstring
hepengfe Jul 6, 2022
514c17c
correct importing path
hepengfe Jul 6, 2022
969c4b4
move payload to a separate ontology file
hepengfe Jul 6, 2022
313e570
Merge branch 'lazy_loading' into ocr_showcase
hepengfe Jul 7, 2022
1f7211a
move Modality Payload: base_ontology -> payload_ontology
hepengfe Jul 7, 2022
d06d21d
move Modality Payload: base_ontology -> payload_ontology
hepengfe Jul 7, 2022
ccbe2e6
move Modality Payload: base_ontology -> payload_ontology
hepengfe Jul 7, 2022
9a0f0ad
payload ontology
hepengfe Jul 7, 2022
ba2e9c3
add DataPack.grids back as it's used in some test cases
hepengfe Jul 7, 2022
91a2bda
correct modality error message
hepengfe Jul 7, 2022
55a4e89
change to a hashable meta info for registering in payload factory
hepengfe Jul 7, 2022
5589d63
payload ontology: ft/onto/base_ontology.py -> ft/onto/payload_ontolog…
hepengfe Jul 7, 2022
f00cafb
remove used import
hepengfe Jul 7, 2022
6c2d598
correct import path
hepengfe Jul 7, 2022
9794cfd
correct import path
hepengfe Jul 7, 2022
1386f8d
add audio encoding
hepengfe Jul 8, 2022
7725fec
rm pdb
hepengfe Jul 8, 2022
5cd139d
pylint
hepengfe Jul 8, 2022
a67baa2
pylint
hepengfe Jul 8, 2022
f85b456
Merge branch 'lazy_loading' into ocr_showcase
hepengfe Jul 8, 2022
eafb17c
recover init methods to debug ontology generation
hepengfe Jul 8, 2022
045392d
Merge branch 'lazy_loading' into ocr_showcase
hepengfe Jul 8, 2022
add6582
ocr example file
hepengfe Jul 8, 2022
e0ea6fa
minor changes
hepengfe Jul 8, 2022
3f4d32a
Merge branch 'master' into lazy_loading
hepengfe Jul 8, 2022
388da05
Merge branch 'lazy_loading' into ocr_showcase
hepengfe Jul 8, 2022
8eddea4
Merge branch 'asyml:master' into ocr_showcase
Jul 11, 2022
a543fba
add ocr processor
hepengfe Jul 12, 2022
a4dada9
move ocr.ipynb under docs/
hepengfe Jul 12, 2022
0b86e4b
include ocr tutorial in ch9
hepengfe Jul 12, 2022
1544a23
pylint: unused import
hepengfe Jul 12, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/ch9.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ Chapter 9. Tasks on Other modalities
toc/audio_processing.md

toc/image_processing.md
notebook_tutorial/ocr.ipynb
535 changes: 535 additions & 0 deletions docs/notebook_tutorial/ocr.ipynb

Large diffs are not rendered by default.

14 changes: 5 additions & 9 deletions forte/data/data_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@

import numpy as np
from sortedcontainers import SortedList

from forte.common.exception import (
ProcessExecutionException,
UnknownOntologyClassException,
Expand All @@ -47,13 +46,13 @@
from forte.data.ontology.core import EntryType
from forte.data.ontology.top import (
Annotation,
Grids,
Link,
Group,
SinglePackEntries,
Generics,
AudioAnnotation,
ImageAnnotation,
Grids,
Payload,
)

Expand Down Expand Up @@ -171,8 +170,8 @@ def __init__(self, pack_name: Optional[str] = None):
self._data_store: DataStore = DataStore()
self._entry_converter: EntryConverter = EntryConverter()
self.image_annotations: List[ImageAnnotation] = []
self.grids: List[Grids] = []

self.grids: List[Grids] = []
self.text_payloads: List[Payload] = []
self.audio_payloads: List[Payload] = []
self.image_payloads: List[Payload] = []
Expand Down Expand Up @@ -244,7 +243,7 @@ def text(self) -> str:
@property
def audio(self) -> Optional[np.ndarray]:
r"""Return the audio of the data pack"""
return self.get_payload_data_at(Modality.Audio, 0)
return cast(np.ndarray, self.get_payload_data_at(Modality.Audio, 0))

@property
def all_annotations(self) -> Iterator[Annotation]:
Expand Down Expand Up @@ -448,15 +447,12 @@ def get_payload_at(
supported_modality = [enum.name for enum in Modality]

try:
# if modality.name == "text":
if modality == Modality.Text:
payloads_length = len(self.text_payloads)
payload = self.text_payloads[payload_index]
# elif modality.name == "audio":
elif modality == Modality.Audio:
payloads_length = len(self.audio_payloads)
payload = self.audio_payloads[payload_index]
# elif modality.name == "image":
elif modality == Modality.Image:
payloads_length = len(self.image_payloads)
payload = self.image_payloads[payload_index]
Expand Down Expand Up @@ -569,7 +565,7 @@ def set_text(
# temporary solution for backward compatibility
# past API use this method to add a single text in the datapack
if len(self.text_payloads) == 0 and text_payload_index == 0:
from ft.onto.base_ontology import ( # pylint: disable=import-outside-toplevel
from ft.onto.payload_ontology import ( # pylint: disable=import-outside-toplevel
TextPayload,
)

Expand Down Expand Up @@ -601,7 +597,7 @@ def set_audio(
# temporary solution for backward compatibility
# past API use this method to add a single audio in the datapack
if len(self.audio_payloads) == 0 and audio_payload_index == 0:
from ft.onto.base_ontology import ( # pylint: disable=import-outside-toplevel
from ft.onto.payload_ontology import ( # pylint: disable=import-outside-toplevel
AudioPayload,
)

Expand Down
31 changes: 29 additions & 2 deletions forte/data/ontology/top.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"Box",
"BoundingBox",
"Payload",
"Meta",
]

QueryType = Union[Dict[str, Any], np.ndarray]
Expand Down Expand Up @@ -1253,7 +1254,7 @@ def __init__(
payload_idx: int = 0,
uri: Optional[str] = None,
):
from ft.onto.base_ontology import ( # pylint: disable=import-outside-toplevel
from ft.onto.payload_ontology import ( # pylint: disable=import-outside-toplevel
TextPayload,
AudioPayload,
ImagePayload,
Expand Down Expand Up @@ -1282,6 +1283,12 @@ def __init__(
self.replace_back_operations: Sequence[Tuple] = []
self.processed_original_spans: Sequence[Tuple] = []
self.orig_text_len: int = 0
self.payloading = None
self.meta = None

def set_meta(self, meta):
# there might be a better way to set meta
self.meta = meta

def get_type(self) -> type:
"""
Expand Down Expand Up @@ -1324,11 +1331,22 @@ def payload_index(self) -> int:

@property
def uri(self) -> Optional[str]:
"""
Universal resource identifier of the data source.

Returns:
Optional[str]: Universal resource identifier of the data source.
"""
return self._uri

def load(self):
fn = self.payloading.route(self.meta)
self._cache = fn(self.uri)

def set_cache(self, data: Union[str, np.ndarray]):
"""
Load cache data into the payload.
Set cache data in the the payload. This method can be useful when users
want to set a new cache.

Args:
data: data to be set in the payload. It can be str for text data or
Expand Down Expand Up @@ -1365,6 +1383,14 @@ def __setstate__(self, state):
self._modality = getattr(Modality, state["_modality"])


class Meta(Generics):
def __init__(
self, pack: PackType
): # pylint: disable=useless-super-delegation
super().__init__(pack)
self.source_type = None # data source type, it can be 'local` or 'web'


SinglePackEntries = (
Link,
Group,
Expand All @@ -1373,5 +1399,6 @@ def __setstate__(self, state):
AudioAnnotation,
ImageAnnotation,
Payload,
Meta,
)
MultiPackEntries = (MultiPackLink, MultiPackGroup, MultiPackGeneric)
23 changes: 0 additions & 23 deletions forte/ontology_specs/base_ontology.json
Original file line number Diff line number Diff line change
Expand Up @@ -444,29 +444,6 @@
"type": "str"
}
]
},
{
"entry_name": "ft.onto.base_ontology.AudioPayload",
"parent_entry": "forte.data.ontology.top.Payload",
"description": "A payload that caches audio data",
"attributes":[
{
"name": "sample_rate",
"type": "int"
}
]
},
{
"entry_name": "ft.onto.base_ontology.TextPayload",
"parent_entry": "forte.data.ontology.top.Payload",
"description": "A payload that caches text data",
"attributes": []
},
{
"entry_name": "ft.onto.base_ontology.ImagePayload",
"parent_entry": "forte.data.ontology.top.Payload",
"description": "A payload that caches image data",
"attributes":[]
}
]
}
76 changes: 76 additions & 0 deletions forte/ontology_specs/payload_ontology.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"name": "payload_ontology",
"definitions": [
{
"entry_name": "ft.onto.payload_ontology.AudioPayload",
"parent_entry": "forte.data.ontology.top.Payload",
"description": "A payload that caches audio data",
"attributes":[
{
"name": "sample_rate",
"type": "int"
}
]
},
{
"entry_name": "ft.onto.payload_ontology.TextPayload",
"parent_entry": "forte.data.ontology.top.Payload",
"description": "A payload that caches text data",
"attributes": []
},
{
"entry_name": "ft.onto.payload_ontology.ImagePayload",
"parent_entry": "forte.data.ontology.top.Payload",
"description": "A payload that caches image data",
"attributes":[]
},
{
"entry_name": "ft.onto.payload_ontology.JpegMeta",
"parent_entry": "forte.data.ontology.top.Meta",
"attributes":[
{
"name": "extension",
"type": "str"
},
{"name": "mime",
"type": "str"},
{"name": "type_code",
"type": "str"},
{"name": "version",
"type": "str"}
]
},
{
"entry_name": "ft.onto.payload_ontology.AudioMeta",
"parent_entry": "forte.data.ontology.top.Meta",
"attributes":[
{
"name": "sample_rate",
"type": "int"
},
{"name": "channels",
"type": "int"},
{"name": "bits_per_sample",
"type": "int"},
{"name": "duration",
"type": "float"},
{"name": "bitrate",
"type": "int"},
{"name": "encoding",
"type": "str"},
{"name": "dtype",
"type": "str"}
]
},
{
"entry_name": "ft.onto.payload_ontology.JpegPayload",
"parent_entry": "ft.onto.payload_ontology.ImagePayload",
"attributes":[
{
"name": "meta",
"type": "ft.onto.payload_ontology.JpegMeta"
}
]
}
]
}
Loading