From 637aa7df1e0990de5a03857a2c685c2df6ea6683 Mon Sep 17 00:00:00 2001
From: Forrest Williams <ffwilliams2@alaska.edu>
Date: Wed, 9 Aug 2023 10:17:46 -0500
Subject: [PATCH 1/7] use less random input data for test and add more
 modify_index tests

---
 tests/conftest.py  |  8 ++++++--
 tests/test_zran.py | 38 ++++++++++++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 8d4eb0b..b9d2f61 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,5 @@
 import os
+import random
 import zlib
 
 import pytest
@@ -31,7 +32,7 @@ def input_data():
 
 
 def create_compressed_data(uncompressed_data, wbits, start=None, stop=None):
-    compress_obj = zlib.compressobj(wbits=wbits)
+    compress_obj = zlib.compressobj(wbits=wbits, level=9)
     compressed = compress_obj.compress(uncompressed_data)
     compressed += compress_obj.flush()
 
@@ -57,7 +58,10 @@ def gz_points():
 
 @pytest.fixture(scope='module')
 def data():
-    out = os.urandom(2**22)
+    # Can't use os.random directly because there needs to be some
+    # repitition in order for compression to be effective
+    words = [os.urandom(8) for _ in range(1000)]
+    out = b''.join([random.choice(words) for _ in range(524288)])
     return out
 
 
diff --git a/tests/test_zran.py b/tests/test_zran.py
index 4e3ca23..5b59f20 100644
--- a/tests/test_zran.py
+++ b/tests/test_zran.py
@@ -32,13 +32,13 @@ def test_create_index(compressed_gz_data):
     assert len(points[0].window) == 32768
 
 
-@pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
+# @pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
 def test_create_index_fail_head(data, compressed_gz_data_no_head):
     with pytest.raises(zran.ZranError, match='zran: compressed data error in input file'):
         zran.Index.create_index(compressed_gz_data_no_head)
 
 
-@pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
+# @pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
 def test_create_index_fail_tail(data, compressed_gz_data_no_tail):
     with pytest.raises(zran.ZranError, match='zran: input file ended prematurely'):
         zran.Index.create_index(compressed_gz_data_no_tail)
@@ -68,7 +68,7 @@ def test_decompress(data, compressed_file):
     assert data[start : start + length] == test_data
 
 
-@pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
+# @pytest.mark.skip(reason='Currently unstable. Will sometimes not fail if data has certain (unknown) properties')
 def test_decompress_fail(data, compressed_gz_data, compressed_gz_data_no_head):
     start = 100
     length = 1000
@@ -86,8 +86,23 @@ def test_get_closest_point():
     assert r2.outloc == 4
 
 
+def test_modify_index_and_beginning_decompress(data, compressed_dfl_data):
+    index = zran.Index.create_index(compressed_dfl_data, span=2**18)
+    start = 0
+    stop = 100
+
+    compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
+    test_data = zran.decompress(
+        compressed_dfl_data[compressed_range[0] : compressed_range[1]],
+        new_index,
+        start - uncompressed_range[0],
+        stop - start,
+    )
+    assert data[start:stop] == test_data
+
+
 @pytest.mark.parametrize('start_index,stop_index', ((0, 5), (4, 10), (9, -1)))
-def test_modify_index_and_decompress(start_index, stop_index, data, compressed_dfl_data):
+def test_modify_index_and_interior_decompress(start_index, stop_index, data, compressed_dfl_data):
     index = zran.Index.create_index(compressed_dfl_data, span=2**18)
     start = index.points[start_index].outloc + 100
     stop = index.points[stop_index].outloc + 100
@@ -102,6 +117,21 @@ def test_modify_index_and_decompress(start_index, stop_index, data, compressed_d
     assert data[start:stop] == test_data
 
 
+def test_modify_index_and_end_decompress(data, compressed_dfl_data):
+    index = zran.Index.create_index(compressed_dfl_data, span=2**18)
+    start = index.points[-1].outloc + 100
+    stop = len(data)
+
+    compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
+    test_data = zran.decompress(
+        compressed_dfl_data[compressed_range[0] : compressed_range[1]],
+        new_index,
+        start - uncompressed_range[0],
+        stop - start,
+    )
+    assert data[start:stop] == test_data
+
+
 @pytest.mark.skip(reason='Integration test. Only run if testing Sentinel-1 SLC burst compatibility')
 @pytest.mark.parametrize('burst', offset_list)
 def test_safe(burst, input_data):

From 67a8af24f7e6147aea916184eadf88a41c33a11f Mon Sep 17 00:00:00 2001
From: Forrest Williams <ffwilliams2@alaska.edu>
Date: Wed, 9 Aug 2023 10:37:44 -0500
Subject: [PATCH 2/7] modified_index: last window is zero to save space

---
 src/zran/zranlib.pyx | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/zran/zranlib.pyx b/src/zran/zranlib.pyx
index e38aca6..311d309 100644
--- a/src/zran/zranlib.pyx
+++ b/src/zran/zranlib.pyx
@@ -270,16 +270,24 @@ class Index:
 
         inloc_offset = desired_points[0].inloc - compressed_offsets[0]
         outloc_offset = desired_points[0].outloc
-        desired_points = [
-            Point(x.outloc - outloc_offset, x.inloc - inloc_offset, x.bits, x.window) for x in desired_points
-        ]
+
+        output_points = []
+        start_point_is_last_in_origional = start_index == len(compressed_offsets) - 1
+        for i, point in enumerate(desired_points):
+            last_point_in_new_index = i == len(desired_points) - 1
+            if last_point_in_new_index and not start_point_is_last_in_origional:
+                window = bytearray(WINDOW_LENGTH)
+            else:
+                window = point.window
+            new_point = Point(point.outloc - outloc_offset, point.inloc - inloc_offset, point.bits, window)
+            output_points.append(new_point)
 
         modified_index = Index(
             self.have,
             compressed_range[1] - compressed_range[0],
             uncompressed_range[1] - uncompressed_range[0],
-            len(desired_points),
-            desired_points,
+            len(output_points),
+            output_points,
         )
         return compressed_range, uncompressed_range, modified_index
 

From e95f4637c861d9bed421d8c2d6b8f54e556edc98 Mon Sep 17 00:00:00 2001
From: Forrest Williams <ffwilliams2@alaska.edu>
Date: Wed, 9 Aug 2023 10:46:37 -0500
Subject: [PATCH 3/7] modified_index: first window is zero to save space

---
 src/zran/zranlib.pyx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/zran/zranlib.pyx b/src/zran/zranlib.pyx
index 311d309..e8af646 100644
--- a/src/zran/zranlib.pyx
+++ b/src/zran/zranlib.pyx
@@ -275,7 +275,9 @@ class Index:
         start_point_is_last_in_origional = start_index == len(compressed_offsets) - 1
         for i, point in enumerate(desired_points):
             last_point_in_new_index = i == len(desired_points) - 1
-            if last_point_in_new_index and not start_point_is_last_in_origional:
+            if i == 0:
+                window = bytearray(WINDOW_LENGTH)
+            elif last_point_in_new_index and not start_point_is_last_in_origional:
                 window = bytearray(WINDOW_LENGTH)
             else:
                 window = point.window

From 2089fd22f12501e4e2de5cdb21e7663ab8023747 Mon Sep 17 00:00:00 2001
From: Forrest Williams <ffwilliams2@alaska.edu>
Date: Wed, 9 Aug 2023 16:33:19 -0500
Subject: [PATCH 4/7] working on prevent out of bounds decompression

---
 src/zran/zranlib.pyx |  9 ++++++++-
 tests/test_zran.py   | 21 +++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/zran/zranlib.pyx b/src/zran/zranlib.pyx
index e8af646..de90dde 100644
--- a/src/zran/zranlib.pyx
+++ b/src/zran/zranlib.pyx
@@ -146,7 +146,10 @@ def build_deflate_index(input_bytes: bytes, span: off_t = 2**20) -> WrapperDefla
     return index
 
 
-def decompress(input_bytes: bytes, index: Index, offset: off_t, length: int) -> bytes:  # noqa
+def decompress(input_bytes: bytes, index: Index, offset: off_t, length: int) -> bytes: # noqa
+    if offset + length > index.uncompressed_size:
+        raise ValueError('Offset and length specified would result in reading past the file bounds')
+
     compressed_data = cython.declare(cython.p_char, PyBytes_AsString(input_bytes))
     compressed_data_length = cython.declare(off_t, PyBytes_Size(input_bytes))
     infile = fmemopen(compressed_data, compressed_data_length, b"r")
@@ -179,6 +182,10 @@ class Index:
         self.mode = mode
         self.have = have
         self.points = points
+        if self.points[0].outloc + 1 == self.uncompressed_size:
+            self.modified = True
+        else:
+            self.modified = False
 
     @staticmethod
     def create_index(input_bytes: bytes, span: int = 2**20):
diff --git a/tests/test_zran.py b/tests/test_zran.py
index 5b59f20..b2be799 100644
--- a/tests/test_zran.py
+++ b/tests/test_zran.py
@@ -108,6 +108,7 @@ def test_modify_index_and_interior_decompress(start_index, stop_index, data, com
     stop = index.points[stop_index].outloc + 100
 
     compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
+    breakpoint()
     test_data = zran.decompress(
         compressed_dfl_data[compressed_range[0] : compressed_range[1]],
         new_index,
@@ -132,6 +133,26 @@ def test_modify_index_and_end_decompress(data, compressed_dfl_data):
     assert data[start:stop] == test_data
 
 
+def test_index_and_read_late(data, compressed_dfl_data):
+    index = zran.Index.create_index(compressed_dfl_data, span=2**18)
+    with pytest.raises(ValueError, match='Offset and length specified would result in reading past the file bounds'):
+        zran.decompress(compressed_dfl_data, index, 0, len(data))
+
+
+def test_modify_index_and_read_late(data, compressed_dfl_data):
+    index = zran.Index.create_index(compressed_dfl_data, span=2**18)
+    start = index.points[5].outloc
+    stop = index.points[10].outloc
+
+    compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
+    offset = start - uncompressed_range[0]
+    length = stop - start
+    test_data = zran.decompress(
+        compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, offset, length
+    )
+    assert data[start:stop] == test_data
+
+
 @pytest.mark.skip(reason='Integration test. Only run if testing Sentinel-1 SLC burst compatibility')
 @pytest.mark.parametrize('burst', offset_list)
 def test_safe(burst, input_data):

From 6f426e0d2c249fb0d73e896be993681af00ea0d7 Mon Sep 17 00:00:00 2001
From: Forrest Williams <ffwilliams2@alaska.edu>
Date: Thu, 10 Aug 2023 08:20:21 -0500
Subject: [PATCH 5/7] handle out of bound decompress logic in decompress func

---
 src/zran/zranlib.pyx | 14 +++++++++-----
 tests/test_zran.py   | 35 +++++++++++++++++++++++------------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/src/zran/zranlib.pyx b/src/zran/zranlib.pyx
index de90dde..5c5ed23 100644
--- a/src/zran/zranlib.pyx
+++ b/src/zran/zranlib.pyx
@@ -146,7 +146,15 @@ def build_deflate_index(input_bytes: bytes, span: off_t = 2**20) -> WrapperDefla
     return index
 
 
-def decompress(input_bytes: bytes, index: Index, offset: off_t, length: int) -> bytes: # noqa
+def decompress(input_bytes: bytes, index: Index, offset: off_t, length: int) -> bytes:  # noqa
+    first_bit_zero = index.points[0].bits == 0
+    offset_before_second_point = offset < index.points[1].outloc
+    if not first_bit_zero and offset_before_second_point:
+        raise ValueError(
+            'When first index bit != 0, offset must be at or after second index point'
+            f' ({index.points[1].outloc} for this index)'
+        )
+
     if offset + length > index.uncompressed_size:
         raise ValueError('Offset and length specified would result in reading past the file bounds')
 
@@ -182,10 +190,6 @@ class Index:
         self.mode = mode
         self.have = have
         self.points = points
-        if self.points[0].outloc + 1 == self.uncompressed_size:
-            self.modified = True
-        else:
-            self.modified = False
 
     @staticmethod
     def create_index(input_bytes: bytes, span: int = 2**20):
diff --git a/tests/test_zran.py b/tests/test_zran.py
index b2be799..bb0283d 100644
--- a/tests/test_zran.py
+++ b/tests/test_zran.py
@@ -86,7 +86,7 @@ def test_get_closest_point():
     assert r2.outloc == 4
 
 
-def test_modify_index_and_beginning_decompress(data, compressed_dfl_data):
+def test_modify_index_and_head_decompress(data, compressed_dfl_data):
     index = zran.Index.create_index(compressed_dfl_data, span=2**18)
     start = 0
     stop = 100
@@ -108,7 +108,6 @@ def test_modify_index_and_interior_decompress(start_index, stop_index, data, com
     stop = index.points[stop_index].outloc + 100
 
     compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
-    breakpoint()
     test_data = zran.decompress(
         compressed_dfl_data[compressed_range[0] : compressed_range[1]],
         new_index,
@@ -118,7 +117,7 @@ def test_modify_index_and_interior_decompress(start_index, stop_index, data, com
     assert data[start:stop] == test_data
 
 
-def test_modify_index_and_end_decompress(data, compressed_dfl_data):
+def test_modify_index_and_tail_decompress(data, compressed_dfl_data):
     index = zran.Index.create_index(compressed_dfl_data, span=2**18)
     start = index.points[-1].outloc + 100
     stop = len(data)
@@ -133,25 +132,37 @@ def test_modify_index_and_end_decompress(data, compressed_dfl_data):
     assert data[start:stop] == test_data
 
 
-def test_index_and_read_late(data, compressed_dfl_data):
+def test_index_after_end_decompress(data, compressed_dfl_data):
     index = zran.Index.create_index(compressed_dfl_data, span=2**18)
     with pytest.raises(ValueError, match='Offset and length specified would result in reading past the file bounds'):
-        zran.decompress(compressed_dfl_data, index, 0, len(data))
+        zran.decompress(compressed_dfl_data, index, 0, len(data) + 1)
 
 
-def test_modify_index_and_read_late(data, compressed_dfl_data):
+def test_modified_index_before_start_decompress(data, compressed_dfl_data):
     index = zran.Index.create_index(compressed_dfl_data, span=2**18)
     start = index.points[5].outloc
     stop = index.points[10].outloc
 
     compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
-    offset = start - uncompressed_range[0]
-    length = stop - start
-    test_data = zran.decompress(
-        compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, offset, length
-    )
-    assert data[start:stop] == test_data
+    if new_index.points[0].bits != 0:
+        msg = 'When first index bit != 0, offset must be at or after second index point *'
+        with pytest.raises(ValueError, match=msg):
+            zran.decompress(compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, 0, 10)
+
+
+def test_modified_after_end_decompress(data, compressed_dfl_data):
+    index = zran.Index.create_index(compressed_dfl_data, span=2**18)
+    start = index.points[5].outloc
+    stop = index.points[10].outloc
 
+    compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
+    with pytest.raises(ValueError, match='Offset and length specified would result in reading past the file bounds'):
+        zran.decompress(
+            compressed_dfl_data[compressed_range[0] : compressed_range[1]],
+            new_index,
+            new_index.points[1].outloc + 10,
+            new_index.uncompressed_size,
+        )
 
 @pytest.mark.skip(reason='Integration test. Only run if testing Sentinel-1 SLC burst compatibility')
 @pytest.mark.parametrize('burst', offset_list)

From fac449d26ff657e03ef3d89f8f8ff4ed029e4077 Mon Sep 17 00:00:00 2001
From: Forrest Williams <ffwilliams2@alaska.edu>
Date: Thu, 10 Aug 2023 09:00:07 -0500
Subject: [PATCH 6/7] add default functionality to remove last stop point of
 modified index

---
 src/zran/zranlib.pyx | 19 +++++++++++++------
 tests/test_zran.py   | 28 +++++++++++++---------------
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/src/zran/zranlib.pyx b/src/zran/zranlib.pyx
index 5c5ed23..1cdbf46 100644
--- a/src/zran/zranlib.pyx
+++ b/src/zran/zranlib.pyx
@@ -1,6 +1,7 @@
 # vim: filetype=python
 import struct as py_struct
 import zlib
+import warnings
 from collections import namedtuple
 from operator import attrgetter
 from typing import Iterable, List
@@ -148,7 +149,11 @@ def build_deflate_index(input_bytes: bytes, span: off_t = 2**20) -> WrapperDefla
 
 def decompress(input_bytes: bytes, index: Index, offset: off_t, length: int) -> bytes:  # noqa
     first_bit_zero = index.points[0].bits == 0
-    offset_before_second_point = offset < index.points[1].outloc
+    if index.have > 1:
+        offset_before_second_point = offset < index.points[1].outloc
+    else:
+        offset_before_second_point = False
+
     if not first_bit_zero and offset_before_second_point:
         raise ValueError(
             'When first index bit != 0, offset must be at or after second index point'
@@ -246,7 +251,7 @@ class Index:
     def to_c_index(self):
         return WrapperDeflateIndex.from_python_index(self.mode, self.uncompressed_size, self.have, self.points)
 
-    def create_modified_index(self, starts=[], stops=[]):
+    def create_modified_index(self, starts=[], stops=[], remove_last_stop=True):
         """Modifies a set of access Points so that they only contain the needed data
         Args:
             starts: uncompressed locations to provide indexes before.
@@ -283,18 +288,20 @@ class Index:
         outloc_offset = desired_points[0].outloc
 
         output_points = []
-        start_point_is_last_in_origional = start_index == len(compressed_offsets) - 1
         for i, point in enumerate(desired_points):
-            last_point_in_new_index = i == len(desired_points) - 1
             if i == 0:
                 window = bytearray(WINDOW_LENGTH)
-            elif last_point_in_new_index and not start_point_is_last_in_origional:
-                window = bytearray(WINDOW_LENGTH)
             else:
                 window = point.window
             new_point = Point(point.outloc - outloc_offset, point.inloc - inloc_offset, point.bits, window)
             output_points.append(new_point)
 
+        if stops and remove_last_stop:
+            if len(output_points) <= 2:
+                warnings.warn(UserWarning('Indexes must have at least two points, not removing last stop'))
+            else:
+                output_points = output_points[:-1]
+
         modified_index = Index(
             self.have,
             compressed_range[1] - compressed_range[0],
diff --git a/tests/test_zran.py b/tests/test_zran.py
index bb0283d..3cbe499 100644
--- a/tests/test_zran.py
+++ b/tests/test_zran.py
@@ -91,12 +91,11 @@ def test_modify_index_and_head_decompress(data, compressed_dfl_data):
     start = 0
     stop = 100
 
-    compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
+    compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop], False)
+    length = start - uncompressed_range[0]
+    offset = stop - start
     test_data = zran.decompress(
-        compressed_dfl_data[compressed_range[0] : compressed_range[1]],
-        new_index,
-        start - uncompressed_range[0],
-        stop - start,
+        compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, length, offset
     )
     assert data[start:stop] == test_data
 
@@ -108,11 +107,10 @@ def test_modify_index_and_interior_decompress(start_index, stop_index, data, com
     stop = index.points[stop_index].outloc + 100
 
     compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
+    length = start - uncompressed_range[0]
+    offset = stop - start
     test_data = zran.decompress(
-        compressed_dfl_data[compressed_range[0] : compressed_range[1]],
-        new_index,
-        start - uncompressed_range[0],
-        stop - start,
+        compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, length, offset
     )
     assert data[start:stop] == test_data
 
@@ -122,12 +120,11 @@ def test_modify_index_and_tail_decompress(data, compressed_dfl_data):
     start = index.points[-1].outloc + 100
     stop = len(data)
 
-    compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop])
+    compressed_range, uncompressed_range, new_index = index.create_modified_index([start], [stop], False)
+    length = start - uncompressed_range[0]
+    offset = stop - start
     test_data = zran.decompress(
-        compressed_dfl_data[compressed_range[0] : compressed_range[1]],
-        new_index,
-        start - uncompressed_range[0],
-        stop - start,
+        compressed_dfl_data[compressed_range[0] : compressed_range[1]], new_index, length, offset
     )
     assert data[start:stop] == test_data
 
@@ -164,9 +161,10 @@ def test_modified_after_end_decompress(data, compressed_dfl_data):
             new_index.uncompressed_size,
         )
 
+
 @pytest.mark.skip(reason='Integration test. Only run if testing Sentinel-1 SLC burst compatibility')
 @pytest.mark.parametrize('burst', offset_list)
-def test_safe(burst, input_data):
+def test_burst_extraction(burst, input_data):
     swath, golden, index = input_data
     compressed_range, uncompressed_range, new_index = index.create_modified_index([burst.start], [burst.stop])
     data_subset = swath[compressed_range[0] : compressed_range[1]]

From 542ef78c55df44c59a5a2a19567f0b2a465d17d2 Mon Sep 17 00:00:00 2001
From: Forrest Williams <ffwilliams2@alaska.edu>
Date: Thu, 10 Aug 2023 09:31:35 -0500
Subject: [PATCH 7/7] update changelog

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 87dac76..93eaae5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/)
 and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.0.5]
+### Added
+* Set the window info to all zeros for first point in first point.bits != 0 case. This decreased compressed index size
+* New default for `create_modified_index` is to remove the last stop point, since the final point represents the end of the data
+* Update testing to increase coverage of `create_modified_index` corner cases
+
 ## [0.0.4]
 ### Added
 * New information to the README.md concerning contributions and similar projects