Skip to content

Commit

Permalink
BF,PY3: f-contiguous issues (memoryview vs buffer, again)
Browse files Browse the repository at this point in the history
  • Loading branch information
anwarnunez committed Sep 21, 2018
1 parent 42615fa commit df64302
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 34 deletions.
64 changes: 31 additions & 33 deletions cottoncandy/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from base64 import b64decode, b64encode



import cottoncandy.browser
import os
import re
Expand Down Expand Up @@ -562,11 +561,10 @@ def upload_raw_array(self, object_name, array, compression=DO_COMPRESSION, acl=D
----------
object_name : str
array : np.ndarray
compression : str
Type of compression to use. 'gzip' uses gzip module, None is no compression,
other strings specify a codec from numcodecs. Available options are:
'LZ4', 'Zlib', 'Zstd', 'BZ2' (note: attend to caps). Zstd appears to be
the only one that will work with large (> 2GB) arrays.
compression : str, bool
`True` uses the configuration defaults. `False` is no compression.
Available options are: 'gzip', 'LZ4', 'Zlib', 'Zstd', 'BZ2' (attend to caps).
NB: Zstd appears to be the only one that supports >2GB arrays.
acl : str
ACL for the object
**metadata : optional
Expand All @@ -576,37 +574,37 @@ def upload_raw_array(self, object_name, array, compression=DO_COMPRESSION, acl=D
This method also uploads the array ``dtype``, ``shape``, and ``gzip``
flag as metadata
"""
if compression is None:
compression = False

# Backward compatibility
if 'gzip' in metadata:
warn('Deprecated keyword argument `gzip`. Use `compression="gzip"` instead', DeprecationWarning)
warn("Deprecated keyword argument `gzip`. Use `compression='gzip'` instead", DeprecationWarning)
gz = metadata.pop('gzip')
if gz:
compression = 'gzip'
else:
compression = None
# Test whether array is >= 2 GB
large_array = array.nbytes > 2 ** 31
compression = 'gzip' if gz else False

if compression is True:
# Select default from config file
if large_array:
compression = COMPRESSION_LARGE
else:
compression = COMPRESSION_SMALL
elif compression is None:
compression = False
# check whether array is >= 2 GB
large_array = array.nbytes > 2**31
compression = COMPRESSION_LARGE if large_array else COMPRESSION_SMALL

if large_array and compression == 'gzip':
# Raise exception for specification of gzip w/ large array
raise ValueError(("gzip does not support compression of >2GB arrays. "
"Try `compression='Zstd'` instead."))

order = 'C' if array.flags.carray else 'F'
if ((not array.flags['%s_CONTIGUOUS' % order] and six.PY2) or
(not array.flags['C_CONTIGUOUS'] and six.PY3)):
warn('Non-contiguous array. Creating copy (will use extra memory)...')

if large_array and compression == "gzip":
# Raise exception for specification of gzip w/ large array
raise ValueError(("`compression='gzip'` does not support"
" arrays > 2GB!\nPlease use `compression=True`"
" (for default compression for large arrays)\n"
" or specify a compatible algorithm."))
if six.PY3 and order == 'F':
# memoryview (PY3) vs buffer (PY2) issues
warn("PY3: Changing array from 'F' to 'C' order")
order = 'C'

order = 'F' if array.flags.f_contiguous else 'C'
if not array.flags['%s_CONTIGUOUS' % order]:
print ('array is a slice along a non-contiguous axis. copying the array '
'before saving (will use extra memory)')
array = np.array(array, order = order)
# create contiguous copy
array = np.array(array, order=order)

meta = dict(dtype=array.dtype.str,
shape=','.join(map(str, array.shape)),
Expand All @@ -624,7 +622,7 @@ def upload_raw_array(self, object_name, array, compression=DO_COMPRESSION, acl=D

if compression is False:
filestream = StringIO(array.data)
elif compression == "gzip":
elif compression == 'gzip':
if six.PY3 and array.flags['F_CONTIGUOUS']:
# eventually, array.data below should be changed to np.getbuffer(array)
# (not yet working in python3 numpy)
Expand All @@ -644,7 +642,7 @@ def upload_raw_array(self, object_name, array, compression=DO_COMPRESSION, acl=D
data_nbytes = get_fileobject_size(filestream)
print('Compressed to %0.2f%% the size'%(data_nbytes / float(orig_nbytes) * 100))
else:
raise ValueError("Unknown compression scheme: %s"%compression)
raise ValueError('Unknown compression scheme: %s'%compression)
response = self.upload_object(object_name, filestream, acl=acl, **meta)
return response

Expand Down
3 changes: 2 additions & 1 deletion cottoncandy/tests/test_roundtrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,14 @@ def content_generator():
for kind in kinds:
for order in orders:
for dtype in types:
print(kind, order, dtype)
data = np.random.randn(20,10,5)
data = np.asarray(data, order=order).astype(dtype)

if kind == 'raw':
yield data
elif kind == 'slice':
yield data[int(data.shape[0]/2):]
yield data[...,int(data.shape[0]/2):]
elif kind == 'nonco':
yield data[np.random.randint(0,data.shape[0],10)]

Expand Down

0 comments on commit df64302

Please sign in to comment.