Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change Grid from ontology to a data structure in core.py #876

Merged
merged 23 commits into from
Jul 25, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions forte/data/data_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
Generics,
AudioAnnotation,
ImageAnnotation,
Grids,
Payload,
)

Expand Down Expand Up @@ -171,7 +170,7 @@ def __init__(self, pack_name: Optional[str] = None):
self._data_store: DataStore = DataStore()
self._entry_converter: EntryConverter = EntryConverter()
self.image_annotations: List[ImageAnnotation] = []
self.grids: List[Grids] = []
self.grids: List[Grid] = []
mylibrar marked this conversation as resolved.
Show resolved Hide resolved

self.text_payloads: List[Payload] = []
self.audio_payloads: List[Payload] = []
Expand Down
2 changes: 0 additions & 2 deletions forte/data/data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from forte.data.ontology.top import (
Annotation,
AudioAnnotation,
Grids,
Group,
ImageAnnotation,
Link,
Expand Down Expand Up @@ -775,7 +774,6 @@ def _add_entry_raw(
Group,
Generics,
ImageAnnotation,
Grids,
Payload,
MultiPackLink,
MultiPackGroup,
Expand Down
10 changes: 0 additions & 10 deletions forte/data/entry_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
Generics,
AudioAnnotation,
ImageAnnotation,
Grids,
MultiPackGeneric,
MultiPackGroup,
MultiPackLink,
Expand Down Expand Up @@ -124,15 +123,6 @@ def save_entry_object(
tid=entry.tid,
allow_duplicate=allow_duplicate,
)
elif data_store_ref._is_subclass(entry.entry_type(), Grids):
# Will be deprecated in future
data_store_ref.add_entry_raw(
type_name=entry.entry_type(),
attribute_data=[entry.image_payload_idx, None],
base_class=Grids,
tid=entry.tid,
allow_duplicate=allow_duplicate,
)
elif data_store_ref._is_subclass(entry.entry_type(), MultiPackLink):
data_store_ref.add_entry_raw(
type_name=entry.entry_type(),
Expand Down
208 changes: 207 additions & 1 deletion forte/data/ontology/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from typing import (
Iterable,
Optional,
Tuple,
Type,
Hashable,
TypeVar,
Expand All @@ -33,7 +34,7 @@
overload,
List,
)

import math
import numpy as np

from forte.data.container import ContainerType
Expand Down Expand Up @@ -635,5 +636,210 @@ def index_key(self) -> int:
return self.tid


class Grid:
"""
Regular grid with a grid configuration dependent on the image size.
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
It is a data structure used to retrieve grid-related objects such as grid
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
cells from the image. Grid itself doesn't store any data.
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
Based the image size and the grid shape,
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
we compute the height and the width of grid cells.
For example, if the image size (image_height,image_width) is (640, 480)
and the grid shape (height, width) is (2, 3)
the size of grid cells (self.c_h, self.c_w) will be (320, 160).
However, when the image size is not divisible by the grid shape, we round
up the resulting size(floating number) to an integer.
In this way, as each grid
cell taking one more pixel, we make the last grid cell per column and row
size(height and width) to be the remainder of the image size divided by the
grid cell size which is smaller than other grid cell.
For example, if the image
size is (128, 128) and the grid shape is (13, 13), the first 11 grid cells
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
per column and row will have a size of (10, 10) since 128/13=9.85, so we
round up to 10. The last grid cell per column and row will have a size of
(8, 8) since 128%10=8.
We require each grid to be bounded/intialized with one image size since
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
the number of different image shapes are limited per computer vision task.
For example, we can only have one image size (640, 480) from a CV dataset,
and we could augment the dataset with few other image sizes
(320, 240), (480, 640). Then there are only three image sizes.
Therefore, it won't be troublesome to
have a grid for each image size, and we can check the image size during the
initialization of the grid.
By contrast, if the grid is totally "free-form"
that we don't initialize it with any
image size and pass the image size directly into the method/operation on
the fly, the API would be more complex and image size check would be
repeated everytime the method is called.
Args:
height: the number of grid cell per column, the unit is one grid cell.
width: the number of grid cell per row, the unit is one grid cell.
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
image_height: the number of pixels per column in the image.
image_width: the number of pixels per row in the image.
"""

def __init__(
self,
height: int,
width: int,
image_height: int,
image_width: int,
):
if image_height <= 0 or image_width <= 0:
raise ValueError(
"both image height and width must be positive"
f"but the image shape is {(image_height, image_width)}"
"please input a valid image shape"
)
if height <= 0 or width <= 0:
raise ValueError(
f"height({height}) and "
f"width({width}) both must be larger than 0"
)
if height >= image_height or width >= image_width:
raise ValueError(
"Grid height and width must be smaller than image height and width"
)

self._height = height
self._width = width

self._image_height = image_height
self._image_width = image_width

# if the resulting size of grid is not an integer, we round it up.
# The last grid cell per row and column might be out of the image size
# since we constrain the maximum pixel locations by the image size
self.c_h, self.c_w = (
math.ceil(image_height / self._height),
math.ceil(image_width / self._width),
)

if self.c_h <= 0 or self.c_w <= 0:
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
"cell height and width must be positive"
f"but the cell shape is {(self.c_h, self.c_w)}"
"please adjust image shape or grid shape accordingly"
)

def get_grid_cell(self, img_arr: np.ndarray, h_idx: int, w_idx: int):
"""
Get the array data of a grid cell from image of the image payload index.
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
The array is a masked version of the original image, and it has
the same size as the original image. The array entries that are not
within the grid cell will masked as zeros. The image array entries that
are within the grid cell will kept.
Note: all indices are zero-based and counted from top left corner of
the image.
Args:
img_arr: image data represented as a numpy array.
h_idx: the zero-based height(row) index of the grid cell in the
grid, the unit is one grid cell.
w_idx: the zero-based width(column) index of the grid cell in the
grid, the unit is one grid cell.
Raises:
ValueError: ``h_idx`` is out of the range specified by ``height``.
ValueError: ``w_idx`` is out of the range specified by ``width``.
Returns:
numpy array that represents the grid cell.
"""
if not 0 <= h_idx < self._height:
raise ValueError(
f"input parameter h_idx ({h_idx}) is"
"out of scope of h_idx range"
f" {(0, self._height)}"
)
if not 0 <= w_idx < self._width:
raise ValueError(
f"input parameter w_idx ({w_idx}) is"
"out of scope of w_idx range"
f" {(0, self._width)}"
)
# initialize a numpy zeros array
array = np.zeros((self._image_height, self._image_width))
# set grid cell entry values to the values of the original image array
# (entry values outside of grid cell remain zeros)
# An example of computing grid height index range is
# index * cell height : min((index + 1) * cell height, image_height).
# It's similar for computing cell width index range
# Plus, we constrain the maximum pixel locations by the image size as
# the last grid cell per row and column might be out of the image size
array[
h_idx * self.c_h : min((h_idx + 1) * self.c_h, self._image_height),
w_idx * self.c_w : min((w_idx + 1) * self.c_w, self._image_width),
] = img_arr[
h_idx * self.c_h : min((h_idx + 1) * self.c_h, self._image_height),
w_idx * self.c_w : min((w_idx + 1) * self.c_w, self._image_width),
]
return array

def get_grid_cell_center(self, h_idx: int, w_idx: int) -> Tuple[int, int]:
"""
Get the center pixel position of the grid cell at the specific height
index and width index in the ``Grid``.
The computation of the center position of the grid cell is
dividing the grid cell height range (unit: pixel) and
width range (unit: pixel) by 2 (round down)
Suppose an extreme case that a grid cell has a height range
mylibrar marked this conversation as resolved.
Show resolved Hide resolved
(unit: pixel) of (0, 3)
and a width range (unit: pixel) of (0, 3) the grid cell center
would be (1, 1).
Since the grid cell size is usually very large,
the offset of the grid cell center is minor.
Note: all indices are zero-based and counted from top left corner of
the grid.
Args:
h_idx: the height(row) index of the grid cell in the grid,
the unit is one grid cell.
w_idx: the width(column) index of the grid cell in the
grid, the unit is one grid cell.
Returns:
A tuple of (y index, x index)
"""

return (
(h_idx * self.c_h + min((h_idx + 1) * self.c_h, self._image_height))
// 2,
(w_idx * self.c_w + min((w_idx + 1) * self.c_w, self._image_width))
// 2,
)

@property
def num_grid_cells(self):
return self._height * self._width

@property
def height(self):
return self._height

@property
def width(self):
return self._width

def __repr__(self):
return str(
(self._height, self._width, self._image_height, self._image_width)
)

def __eq__(self, other):
if other is None:
return False
return (
self._height,
self._width,
self._image_height,
self._image_width,
) == (
other._height,
other._width,
other.image_height,
other.image_width,
)

def __hash__(self):
return hash(
(self._height, self._width, self._image_height, self._image_width)
)


GroupType = TypeVar("GroupType", bound=BaseGroup)
LinkType = TypeVar("LinkType", bound=BaseLink)
Loading