Skip to content

Commit

Permalink
Merge pull request #411 from loris-imageserver/jp2-siz-segment
Browse files Browse the repository at this point in the history
Move the SIZ marker segment parsing into a separate method
  • Loading branch information
alexwlchan authored Mar 7, 2018
2 parents 47a3f68 + c239a9f commit 72847e0
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 25 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ show_missing = True
exclude_lines =
pragma: no cover
assert False, "Should not be reachable"
else: # unreachable
102 changes: 82 additions & 20 deletions loris/jp2_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,19 @@
import os
import struct

import attr

from loris.loris_exception import LorisException

logger = logging.getLogger(__name__)


@attr.s(slots=True)
class Dimensions(object):
height = attr.ib()
width = attr.ib()


class JP2ExtractionError(LorisException):
"""Raised for errors when extracting data from a JP2 image."""
pass
Expand Down Expand Up @@ -168,7 +176,7 @@ def _get_dimensions_from_image_header_box(self, jp2):
# height and width. Consume the rest of the box before returning.
jp2.read(22 - 16)

return (height, width)
return Dimensions(width=width, height=height)

def _parse_colour_specification_box(self, jp2):
"""
Expand Down Expand Up @@ -276,9 +284,54 @@ def _parse_colour_specification_box(self, jp2):
return (['gray', 'color'], profile_bytes)

# This should be unreachable; we include it for completeness.
else:
else: # unreachable
assert False, meth

def _parse_siz_marker_segment(self, jp2):
"""
The SIZ marker segment provides information about the uncompressed
image, including (for our purposes) the width/height of the image.
The layout of the component is as follows:
SIZ Marker code, 2 bytes. Should have value 0xFF51.
Lsiz Length of the marker segment, 2 bytes.
Rsiz 2 bytes, irrelevant to us.
Xsiz 4 bytes, irrelvant to us.
Ysiz 4 bytes, irrelvant to us.
XOsiz 4 bytes, irrelevant to us.
YOsiz 4 bytes, irrelevant to us.
XTsiz: Width of one reference tile wrt the ref grid. 4 bytes.
YTsiz: Height of one reference tile wrt the ref grid. 4 bytes.
We don't care about the rest of the fields, and can skip them.
See § A.5.1 for details.
"""
marker_code = jp2.read(2)
if marker_code != b'\xFF\x51':
raise JP2ExtractionError(
"Bad marker code in the SIZ marker segment: %r" % marker_code
)

# Now we read through the irrelevant fields:
#
# Lsiz 2
# Rsiz 2
# Xsiz 4
# Ysiz 4
# XOsiz 4
# YOsiz 4
# = 20
#
jp2.read(20)

# Now we're on the XTsiz and YTsiz components, so read those.
xt_siz = struct.unpack('>I', jp2.read(4))[0]
yt_siz = struct.unpack('>I', jp2.read(4))[0]

return Dimensions(width=xt_siz, height=yt_siz)

def extract_jp2(self, jp2):
"""
Given a file-like object that contains a JP2 image, attempt
Expand All @@ -305,14 +358,15 @@ def extract_jp2(self, jp2):
# box in the JP2 Header box (see § I.5.3). In particular, it gives
# us the height and the width.
dimensions = self._get_dimensions_from_image_header_box(jp2)
self.height, self.width = dimensions
self.height = dimensions.height
self.width = dimensions.width
logger.debug("width: %d", self.width)
logger.debug("height: %d", self.height)

# After the Image Header box, there are a number of other boxes inside
# the JP2 Header box, which can potentially appear in any order.
# We're only interested in a Colour Specification box, which has
# type 'colr', so skip forward until we find that. ()
# type 'colr', so skip forward until we find that.
#
# Note: a JP2 Header box may contain more than one colr box; for now
# we only use the first and ignore the rest.
Expand All @@ -327,23 +381,31 @@ def extract_jp2(self, jp2):
self.color_profile_bytes = profile_bytes
logger.debug('qualities: %s', self.profile.description['qualities'])

scaleFactors = []
# This is all the information we need from the JP2 Header box.

window = deque(jp2.read(2), 2)
# start of codestream
while ((window[0] != b'\xFF') or (window[1] != b'\x4F')): # (SOC - required, see pg 14)
window.append(jp2.read(1))
while ((window[0] != b'\xFF') or (window[1] != b'\x51')): # (SIZ - required, see pg 14)
window.append(jp2.read(1))
jp2.read(20) # through Lsiz (16), Rsiz (16), Xsiz (32), Ysiz (32), XOsiz (32), YOsiz (32)
tile_width = int(struct.unpack(">I", jp2.read(4))[0]) # XTsiz (32)
tile_height = int(struct.unpack(">I", jp2.read(4))[0]) # YTsiz (32)
logger.debug("tile width: %s", tile_width)
logger.debug("tile height: %s", tile_height)
self.tiles.append( { 'width' : tile_width } )
if tile_width != tile_height:
self.tiles[0]['height'] = tile_height
jp2.read(10) # XTOsiz (32), YTOsiz (32), Csiz (16)
# Now we want to get tile and size data from the Contiguous Codestream
# box, which contains the complete JPEG 2000 codestream (see § I.5.4).
#
# Specifically, we're interested in the Image and Tile Size (SIZ),
# which includes the width and height of the reference grid and tiles.
# This starts with a marker code 'SIZ = 0xFF51'.
#
# There is only one SIZ per codestream, so it suffices to find the
# first instance (see § A.5).
_read_jp2_until_match(jp2, b'\xFF\x51')

tile_dimensions = self._parse_siz_marker_segment(jp2)
if tile_dimensions.height == tile_dimensions.width:
self.tiles.append({
'width': tile_dimensions.width
})
else:
self.tiles.append({
'width': tile_dimensions.width,
'height': tile_dimensions.height
})

scaleFactors = []

window = deque(jp2.read(2), 2)
while ((window[0] != b'\xFF') or (window[1] != b'\x52')): # (COD - required, see pg 14)
Expand Down
54 changes: 49 additions & 5 deletions tests/jp2_extractor_t.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from hypothesis.strategies import binary
import pytest

from loris.jp2_extractor import JP2Extractor, JP2ExtractionError
from loris.jp2_extractor import Dimensions, JP2Extractor, JP2ExtractionError


@pytest.fixture
Expand Down Expand Up @@ -105,10 +105,22 @@ def test_file_type_box_is_ok_or_error(self, extractor, file_type_box):
assert 'File Type box' in str(err)

@pytest.mark.parametrize('header_box_bytes, expected_dimensions', [
(b'\x00\x00\x00\x01\x00\x00\x00\x01', (1, 1)),
(b'\x00\x00\x00\x11\x00\x00\x00\x00', (17, 0)),
(b'\x00\x00\x00\x00\x00\x00\x00\x11', (0, 17)),
(b'\x01\x01\x01\x01\x02\x02\x02\x02', (16843009, 33686018)),
(
b'\x00\x00\x00\x01\x00\x00\x00\x01',
Dimensions(height=1, width=1)
),
(
b'\x00\x00\x00\x11\x00\x00\x00\x00',
Dimensions(height=17, width=0)
),
(
b'\x00\x00\x00\x00\x00\x00\x00\x11',
Dimensions(height=0, width=17)
),
(
b'\x01\x01\x01\x01\x02\x02\x02\x02',
Dimensions(height=16843009, width=33686018)
),
])
def test_reading_dimensions_from_headr_box(
self, extractor, header_box_bytes, expected_dimensions
Expand Down Expand Up @@ -254,3 +266,35 @@ def test_parse_colour_specification_box_is_okay_or_error(
qualities, profile_bytes = result
assert isinstance(qualities, list)
assert isinstance(profile_bytes, bytes)

@pytest.mark.parametrize('marker_code', [b'\xFF\x52', b'\xFE\x52', b'00'])
def test_bad_siz_marker_code_is_error(self, extractor, marker_code):
jp2 = BytesIO(marker_code)
with pytest.raises(JP2ExtractionError) as err:
extractor._parse_siz_marker_segment(jp2)
assert 'Bad marker code in the SIZ marker segment' in str(err.value)

@pytest.mark.parametrize('xtsiz_ytsiz, expected_dimensions', [
(
b'\x00\x00\x00\x01\x00\x00\x00\x01',
Dimensions(width=1, height=1)
),
(
b'\x00\x00\x00\x11\x00\x00\x00\x00',
Dimensions(width=17, height=0)
),
(
b'\x00\x00\x00\x00\x00\x00\x00\x11',
Dimensions(width=0, height=17)
),
(
b'\x01\x01\x01\x01\x02\x02\x02\x02',
Dimensions(width=16843009, height=33686018)
),
])
def test_get_dimensions_from_siz_marker_segment(
self, extractor, xtsiz_ytsiz, expected_dimensions
):
jp2 = BytesIO(b'\xFF\x51' + b'\x00' * 20 + xtsiz_ytsiz)
dimensions = extractor._parse_siz_marker_segment(jp2)
assert dimensions == expected_dimensions

0 comments on commit 72847e0

Please sign in to comment.