Skip to content

Commit

Permalink
Merge pull request #45 from forrestfwilliams/develop
Browse files Browse the repository at this point in the history
Release v0.5.0
  • Loading branch information
forrestfwilliams authored Aug 10, 2023
2 parents ffedad5 + 1e4cf07 commit 4b5210c
Show file tree
Hide file tree
Showing 13 changed files with 126 additions and 28 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build-and-deploy-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
fetch-depth: 0

- name: Build wheels
uses: pypa/cibuildwheel@v2.12.1
uses: pypa/cibuildwheel@v2.14.1

- uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
name: artifact
path: dist

- uses: pypa/gh-action-pypi-publish@v1.8.1
- uses: pypa/gh-action-pypi-publish@v1.8.8
with:
user: __token__
password: ${{ secrets.PYPI_TEST_PAK }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build-and-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
fetch-depth: 0

- name: Build wheels
uses: pypa/cibuildwheel@v2.12.1
uses: pypa/cibuildwheel@v2.14.1

- uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
name: artifact
path: dist

- uses: pypa/gh-action-pypi-publish@v1.8.1
- uses: pypa/gh-action-pypi-publish@v1.8.8
with:
user: __token__
password: ${{ secrets.PYPI_PAK }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/bump-version.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
call-bump-version-workflow:
uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.7.1
uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.8.1
with:
user: zran-bot
email: ffwilliams2@alaska.edu
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/changelog-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ on:

jobs:
call-changelog-check-workflow:
uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.7.1
uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.8.1
secrets:
USER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2 changes: 1 addition & 1 deletion .github/workflows/labeled-pr-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ on:

jobs:
call-labeled-pr-check-workflow:
uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.7.1
uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.8.1
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ on:
jobs:
call-pytest-workflow:
# Docs: https://github.com/ASFHyP3/actions
uses: ASFHyP3/actions/.github/workflows/reusable-pytest.yml@v0.7.1
uses: ASFHyP3/actions/.github/workflows/reusable-pytest.yml@v0.8.1
with:
local_package_name: zran
2 changes: 1 addition & 1 deletion .github/workflows/release-checklist-comment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:

jobs:
call-release-workflow:
uses: ASFHyP3/actions/.github/workflows/reusable-release-checklist-comment.yml@v0.7.1
uses: ASFHyP3/actions/.github/workflows/reusable-release-checklist-comment.yml@v0.8.1
permissions:
pull-requests: write
secrets:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
call-release-workflow:
uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.7.1
uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.8.1
with:
release_prefix: ZRAN
release_branch: main # Optional; default shown
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/static-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on: [pull_request]
jobs:
call-secrets-analysis-workflow:
# Docs: https://github.com/ASFHyP3/actions
uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.7.1
uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.8.1

check-with-black:
runs-on: ubuntu-latest
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/)
and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.0.5]
### Added
* Set the window info to all zeros for first point in first point.bits != 0 case. This decreased compressed index size
* New default for `create_modified_index` is to remove the last stop point, since the final point represents the end of the data
* Update testing to increase coverage of `create_modified_index` corner cases

## [0.0.4]
### Added
* New information to the README.md concerning contributions and similar projects
Expand Down
40 changes: 34 additions & 6 deletions src/zran/zranlib.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# vim: filetype=python
import struct as py_struct
import zlib
import warnings
from collections import namedtuple
from operator import attrgetter
from typing import Iterable, List
Expand Down Expand Up @@ -147,6 +148,21 @@ def build_deflate_index(input_bytes: bytes, span: off_t = 2**20) -> WrapperDefla


def decompress(input_bytes: bytes, index: Index, offset: off_t, length: int) -> bytes: # noqa
first_bit_zero = index.points[0].bits == 0
if index.have > 1:
offset_before_second_point = offset < index.points[1].outloc
else:
offset_before_second_point = False

if not first_bit_zero and offset_before_second_point:
raise ValueError(
'When first index bit != 0, offset must be at or after second index point'
f' ({index.points[1].outloc} for this index)'
)

if offset + length > index.uncompressed_size:
raise ValueError('Offset and length specified would result in reading past the file bounds')

compressed_data = cython.declare(cython.p_char, PyBytes_AsString(input_bytes))
compressed_data_length = cython.declare(off_t, PyBytes_Size(input_bytes))
infile = fmemopen(compressed_data, compressed_data_length, b"r")
Expand Down Expand Up @@ -235,7 +251,7 @@ class Index:
def to_c_index(self):
return WrapperDeflateIndex.from_python_index(self.mode, self.uncompressed_size, self.have, self.points)

def create_modified_index(self, starts=[], stops=[]):
def create_modified_index(self, starts=[], stops=[], remove_last_stop=True):
"""Modifies a set of access Points so that they only contain the needed data
Args:
starts: uncompressed locations to provide indexes before.
Expand Down Expand Up @@ -270,16 +286,28 @@ class Index:

inloc_offset = desired_points[0].inloc - compressed_offsets[0]
outloc_offset = desired_points[0].outloc
desired_points = [
Point(x.outloc - outloc_offset, x.inloc - inloc_offset, x.bits, x.window) for x in desired_points
]

output_points = []
for i, point in enumerate(desired_points):
if i == 0:
window = bytearray(WINDOW_LENGTH)
else:
window = point.window
new_point = Point(point.outloc - outloc_offset, point.inloc - inloc_offset, point.bits, window)
output_points.append(new_point)

if stops and remove_last_stop:
if len(output_points) <= 2:
warnings.warn(UserWarning('Indexes must have at least two points, not removing last stop'))
else:
output_points = output_points[:-1]

modified_index = Index(
self.have,
compressed_range[1] - compressed_range[0],
uncompressed_range[1] - uncompressed_range[0],
len(desired_points),
desired_points,
len(output_points),
output_points,
)
return compressed_range, uncompressed_range, modified_index

Expand Down
8 changes: 6 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import random
import zlib

import pytest
Expand Down Expand Up @@ -31,7 +32,7 @@ def input_data():


def create_compressed_data(uncompressed_data, wbits, start=None, stop=None):
compress_obj = zlib.compressobj(wbits=wbits)
compress_obj = zlib.compressobj(wbits=wbits, level=9)
compressed = compress_obj.compress(uncompressed_data)
compressed += compress_obj.flush()

Expand All @@ -57,7 +58,10 @@ def gz_points():

@pytest.fixture(scope='module')
def data():
out = os.urandom(2**22)
# Can't use os.random directly because there needs to be some
# repitition in order for compression to be effective
words = [os.urandom(8) for _ in range(1000)]
out = b''.join([random.choice(words) for _ in range(524288)])
return out


Expand Down
78 changes: 69 additions & 9 deletions tests/test_zran.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ def test_create_index(compressed_gz_data):
assert len(points[0].window) == 32768


@pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
# @pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
def test_create_index_fail_head(data, compressed_gz_data_no_head):
with pytest.raises(zran.ZranError, match='zran: compressed data error in input file'):
zran.Index.create_index(compressed_gz_data_no_head)


@pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
# @pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
def test_create_index_fail_tail(data, compressed_gz_data_no_tail):
with pytest.raises(zran.ZranError, match='zran: input file ended prematurely'):
zran.Index.create_index(compressed_gz_data_no_tail)
Expand Down Expand Up @@ -68,7 +68,7 @@ def test_decompress(data, compressed_file):
assert data[start : start + length] == test_data


@pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
# @pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
def test_decompress_fail(data, compressed_gz_data, compressed_gz_data_no_head):
start = 100
length = 1000
Expand All @@ -86,25 +86,85 @@ def test_get_closest_point():
assert r2.outloc == 4


def test_modify_index_and_head_decompress(data, compressed_dfl_data):
index = zran.Index.create_index(compressed_dfl_data, span=2**18)
start = 0
stop = 100

compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop], False)
length = start - uncompressed_range[0]
offset = stop - start
test_data = zran.decompress(
compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, length, offset
)
assert data[start:stop] == test_data


@pytest.mark.parametrize('start_index,stop_index', ((0, 5), (4, 10), (9, -1)))
def test_modify_index_and_decompress(start_index, stop_index, data, compressed_dfl_data):
def test_modify_index_and_interior_decompress(start_index, stop_index, data, compressed_dfl_data):
index = zran.Index.create_index(compressed_dfl_data, span=2**18)
start = index.points[start_index].outloc + 100
stop = index.points[stop_index].outloc + 100

compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
length = start - uncompressed_range[0]
offset = stop - start
test_data = zran.decompress(
compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, length, offset
)
assert data[start:stop] == test_data


def test_modify_index_and_tail_decompress(data, compressed_dfl_data):
index = zran.Index.create_index(compressed_dfl_data, span=2**18)
start = index.points[-1].outloc + 100
stop = len(data)

compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop], False)
length = start - uncompressed_range[0]
offset = stop - start
test_data = zran.decompress(
compressed_dfl_data[compressed_range[0] : compressed_range[1]],
new_index,
start - uncompressed_range[0],
stop - start,
compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, length, offset
)
assert data[start:stop] == test_data


def test_index_after_end_decompress(data, compressed_dfl_data):
index = zran.Index.create_index(compressed_dfl_data, span=2**18)
with pytest.raises(ValueError, match='Offset and length specified would result in reading past the file bounds'):
zran.decompress(compressed_dfl_data, index, 0, len(data) + 1)


def test_modified_index_before_start_decompress(data, compressed_dfl_data):
index = zran.Index.create_index(compressed_dfl_data, span=2**18)
start = index.points[5].outloc
stop = index.points[10].outloc

compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
if new_index.points[0].bits != 0:
msg = 'When first index bit != 0, offset must be at or after second index point *'
with pytest.raises(ValueError, match=msg):
zran.decompress(compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, 0, 10)


def test_modified_after_end_decompress(data, compressed_dfl_data):
index = zran.Index.create_index(compressed_dfl_data, span=2**18)
start = index.points[5].outloc
stop = index.points[10].outloc

compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
with pytest.raises(ValueError, match='Offset and length specified would result in reading past the file bounds'):
zran.decompress(
compressed_dfl_data[compressed_range[0] : compressed_range[1]],
new_index,
new_index.points[1].outloc + 10,
new_index.uncompressed_size,
)


@pytest.mark.skip(reason='Integration test. Only run if testing Sentinel-1 SLC burst compatibility')
@pytest.mark.parametrize('burst', offset_list)
def test_safe(burst, input_data):
def test_burst_extraction(burst, input_data):
swath, golden, index = input_data
compressed_range, uncompressed_range, new_index = index.create_modified_index([burst.start], [burst.stop])
data_subset = swath[compressed_range[0] : compressed_range[1]]
Expand Down

0 comments on commit 4b5210c

Please sign in to comment.