Skip to content

Commit

Permalink
Add a fastpath parser to UTC formatted datestrings (#610)
Browse files Browse the repository at this point in the history
  • Loading branch information
rowillia authored and garrettheel committed Apr 19, 2019
1 parent 5de103c commit a92962b
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 6 deletions.
33 changes: 28 additions & 5 deletions pynamodb/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,13 +544,16 @@ def deserialize(self, value):
"""
Takes a UTC datetime string and returns a datetime object
"""
# First attempt to parse the datetime with the datetime format used
# by default when storing UTCDateTimeAttributes. This is signifantly
# faster than always going through dateutil.
try:
return datetime.strptime(value, DATETIME_FORMAT)
return _fast_parse_utc_datestring(value)
except ValueError:
return parse(value)
try:
# Attempt to parse the datetime with the datetime format used
# by default when storing UTCDateTimeAttributes. This is signifantly
# faster than always going through dateutil.
return datetime.strptime(value, DATETIME_FORMAT)
except ValueError:
return parse(value)


class NullAttribute(Attribute):
Expand Down Expand Up @@ -881,6 +884,26 @@ def _get_key_for_serialize(value):
return SERIALIZE_KEY_MAP[value_type]


def _fast_parse_utc_datestring(datestring):
# Method to quickly parse strings formatted with '%Y-%m-%dT%H:%M:%S.%f+0000'.
# This is ~5.8x faster than using strptime and 38x faster than dateutil.parser.parse.
_int = int # Hack to prevent global lookups of int, speeds up the function ~10%
try:
if (datestring[4] != '-' or datestring[7] != '-' or datestring[10] != 'T' or
datestring[13] != ':' or datestring[16] != ':' or datestring[19] != '.' or
datestring[-5:] != '+0000'):
raise ValueError("Datetime string '{}' does not match format "
"'%Y-%m-%dT%H:%M:%S.%f+0000'".format(datestring))
return datetime(
_int(datestring[0:4]), _int(datestring[5:7]), _int(datestring[8:10]),
_int(datestring[11:13]), _int(datestring[14:16]), _int(datestring[17:19]),
_int(round(float(datestring[19:-5]) * 1e6)), tzutc()
)
except (TypeError, ValueError):
raise ValueError("Datetime string '{}' does not match format "
"'%Y-%m-%dT%H:%M:%S.%f+0000'".format(datestring))


class ListAttribute(Attribute):
attr_type = LIST
element_type = None
Expand Down
35 changes: 34 additions & 1 deletion pynamodb/tests/test_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
BinarySetAttribute, BinaryAttribute, NumberSetAttribute, NumberAttribute,
UnicodeAttribute, UnicodeSetAttribute, UTCDateTimeAttribute, BooleanAttribute, LegacyBooleanAttribute,
MapAttribute, MapAttributeMeta, ListAttribute, JSONAttribute, _get_value_for_deserialize,
_fast_parse_utc_datestring,
)
from pynamodb.constants import (
DATETIME_FORMAT, DEFAULT_ENCODING, NUMBER, STRING, STRING_SET, NUMBER_SET, BINARY_SET,
Expand Down Expand Up @@ -172,7 +173,7 @@ def test_utc_date_time_deserialize_parse_args(self, parse_mock, datetime_mock):
attr.deserialize(tstamp_str)

parse_mock.assert_not_called()
datetime_mock.strptime.assert_called_once_with(tstamp_str, DATETIME_FORMAT)
datetime_mock.strptime.assert_not_called()

def test_utc_date_time_serialize(self):
"""
Expand All @@ -182,6 +183,38 @@ def test_utc_date_time_serialize(self):
attr = UTCDateTimeAttribute()
assert attr.serialize(tstamp) == tstamp.replace(tzinfo=UTC).strftime(DATETIME_FORMAT)

def test__fast_parse_utc_datestring_roundtrips(self):
tstamp = datetime.now(UTC)
tstamp_str = tstamp.strftime(DATETIME_FORMAT)
assert _fast_parse_utc_datestring(tstamp_str) == tstamp

def test__fast_parse_utc_datestring_no_microseconds(self):
expected_value = datetime(2047, 1, 6, 8, 21, tzinfo=tzutc())
assert _fast_parse_utc_datestring('2047-01-06T08:21:00.0+0000') == expected_value

@pytest.mark.parametrize(
"invalid_string",
[
'2.47-01-06T08:21:00.0+0000',
'2047-01-06T08:21:00.+0000',
'2047-01-06T08:21:00.0',
'2047-01-06 08:21:00.0+0000',
'abcd-01-06T08:21:00.0+0000',
'2047-ab-06T08:21:00.0+0000',
'2047-01-abT08:21:00.0+0000',
'2047-01-06Tab:21:00.0+0000',
'2047-01-06T08:ab:00.0+0000',
'2047-01-06T08:ab:00.0+0000',
'2047-01-06T08:21:00.a+0000',
'2047-01-06T08:21:00.0.1+0000',
'2047-01-06T08:21:00.0+00000'
]
)
def test__fast_parse_utc_datestring_invalid_input(self, invalid_string):
with pytest.raises(ValueError, match="does not match format"):
_fast_parse_utc_datestring(invalid_string)



class TestBinaryAttribute:
"""
Expand Down

0 comments on commit a92962b

Please sign in to comment.