diff --git a/pynamodb/attributes.py b/pynamodb/attributes.py index d3c3ea53d..6f82f6ae3 100644 --- a/pynamodb/attributes.py +++ b/pynamodb/attributes.py @@ -544,13 +544,16 @@ def deserialize(self, value): """ Takes a UTC datetime string and returns a datetime object """ - # First attempt to parse the datetime with the datetime format used - # by default when storing UTCDateTimeAttributes. This is signifantly - # faster than always going through dateutil. try: - return datetime.strptime(value, DATETIME_FORMAT) + return _fast_parse_utc_datestring(value) except ValueError: - return parse(value) + try: + # Attempt to parse the datetime with the datetime format used + # by default when storing UTCDateTimeAttributes. This is signifantly + # faster than always going through dateutil. + return datetime.strptime(value, DATETIME_FORMAT) + except ValueError: + return parse(value) class NullAttribute(Attribute): @@ -881,6 +884,26 @@ def _get_key_for_serialize(value): return SERIALIZE_KEY_MAP[value_type] +def _fast_parse_utc_datestring(datestring): + # Method to quickly parse strings formatted with '%Y-%m-%dT%H:%M:%S.%f+0000'. + # This is ~5.8x faster than using strptime and 38x faster than dateutil.parser.parse. + _int = int # Hack to prevent global lookups of int, speeds up the function ~10% + try: + if (datestring[4] != '-' or datestring[7] != '-' or datestring[10] != 'T' or + datestring[13] != ':' or datestring[16] != ':' or datestring[19] != '.' or + datestring[-5:] != '+0000'): + raise ValueError("Datetime string '{}' does not match format " + "'%Y-%m-%dT%H:%M:%S.%f+0000'".format(datestring)) + return datetime( + _int(datestring[0:4]), _int(datestring[5:7]), _int(datestring[8:10]), + _int(datestring[11:13]), _int(datestring[14:16]), _int(datestring[17:19]), + _int(round(float(datestring[19:-5]) * 1e6)), tzutc() + ) + except (TypeError, ValueError): + raise ValueError("Datetime string '{}' does not match format " + "'%Y-%m-%dT%H:%M:%S.%f+0000'".format(datestring)) + + class ListAttribute(Attribute): attr_type = LIST element_type = None diff --git a/pynamodb/tests/test_attributes.py b/pynamodb/tests/test_attributes.py index 5c318f767..353ebc548 100644 --- a/pynamodb/tests/test_attributes.py +++ b/pynamodb/tests/test_attributes.py @@ -17,6 +17,7 @@ BinarySetAttribute, BinaryAttribute, NumberSetAttribute, NumberAttribute, UnicodeAttribute, UnicodeSetAttribute, UTCDateTimeAttribute, BooleanAttribute, LegacyBooleanAttribute, MapAttribute, MapAttributeMeta, ListAttribute, JSONAttribute, _get_value_for_deserialize, + _fast_parse_utc_datestring, ) from pynamodb.constants import ( DATETIME_FORMAT, DEFAULT_ENCODING, NUMBER, STRING, STRING_SET, NUMBER_SET, BINARY_SET, @@ -172,7 +173,7 @@ def test_utc_date_time_deserialize_parse_args(self, parse_mock, datetime_mock): attr.deserialize(tstamp_str) parse_mock.assert_not_called() - datetime_mock.strptime.assert_called_once_with(tstamp_str, DATETIME_FORMAT) + datetime_mock.strptime.assert_not_called() def test_utc_date_time_serialize(self): """ @@ -182,6 +183,38 @@ def test_utc_date_time_serialize(self): attr = UTCDateTimeAttribute() assert attr.serialize(tstamp) == tstamp.replace(tzinfo=UTC).strftime(DATETIME_FORMAT) + def test__fast_parse_utc_datestring_roundtrips(self): + tstamp = datetime.now(UTC) + tstamp_str = tstamp.strftime(DATETIME_FORMAT) + assert _fast_parse_utc_datestring(tstamp_str) == tstamp + + def test__fast_parse_utc_datestring_no_microseconds(self): + expected_value = datetime(2047, 1, 6, 8, 21, tzinfo=tzutc()) + assert _fast_parse_utc_datestring('2047-01-06T08:21:00.0+0000') == expected_value + + @pytest.mark.parametrize( + "invalid_string", + [ + '2.47-01-06T08:21:00.0+0000', + '2047-01-06T08:21:00.+0000', + '2047-01-06T08:21:00.0', + '2047-01-06 08:21:00.0+0000', + 'abcd-01-06T08:21:00.0+0000', + '2047-ab-06T08:21:00.0+0000', + '2047-01-abT08:21:00.0+0000', + '2047-01-06Tab:21:00.0+0000', + '2047-01-06T08:ab:00.0+0000', + '2047-01-06T08:ab:00.0+0000', + '2047-01-06T08:21:00.a+0000', + '2047-01-06T08:21:00.0.1+0000', + '2047-01-06T08:21:00.0+00000' + ] + ) + def test__fast_parse_utc_datestring_invalid_input(self, invalid_string): + with pytest.raises(ValueError, match="does not match format"): + _fast_parse_utc_datestring(invalid_string) + + class TestBinaryAttribute: """