Skip to content

Commit

Permalink
Do not call DescribeTable for models (#1095)
Browse files Browse the repository at this point in the history
Do not call DescribeTable for models (#1091)

When using DynamoDB through a `Model` or `Index` (rather than `(Table)Connection` directly), we will derive the "meta-table" from the model itself rather than make an initial `DescribeTable` call. This has numerous advantages:
- Faster bootstrap (important for lambdas, as pointed out in #422)
- More consistent handling of attribute types: Before this change, if the PynamoDB model definition and the DynamoDB table definition disagreed on a key attribute's type, PynamoDB would use its own idea of the type in some code paths and the underlying type in others. Now it would consistently use its own idea of the type, allowing the erroneous model definition to be spotted sooner.
- Easier testing, since there's no longer a one-off request that only happens once and affects global state.

This approach attempts to change the library as little as possible, by synthesizing a MetaTable from the model.

This is a backport of #1091.
  • Loading branch information
ikonst authored Nov 3, 2022
1 parent efe50f9 commit c022adc
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 283 deletions.
13 changes: 13 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
Release Notes
=============

v5.3.0
----------
* No longer call ``DescribeTable`` API before first operation

Before this change, we would call ``DescribeTable`` before the first operation
on a given table in order to discover its schema. This slowed down bootstrap
(particularly important for lambdas), complicated testing and could potentially
cause inconsistent behavior since queries were serialized using the table's
(key) schema but deserialized using the model's schema.

With this change, both queries and models now use the model's schema.


v5.2.3
----------
* Update for botocore 1.28 private API change (#1087) which caused the following exception::
Expand Down
2 changes: 1 addition & 1 deletion pynamodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
"""
__author__ = 'Jharrod LaFon'
__license__ = 'MIT'
__version__ = '5.2.3'
__version__ = '5.3.0'
71 changes: 44 additions & 27 deletions pynamodb/connection/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ def __repr__(self) -> str:
return "MetaTable<{}>".format(self.data.get(TABLE_NAME))
return ""

@property
def table_name(self) -> str:
"""
Returns the table name
"""
return self.data[TABLE_NAME]

@property
def range_keyname(self) -> Optional[str]:
"""
Expand Down Expand Up @@ -559,25 +566,22 @@ def client(self) -> BotocoreBaseClientPrivate:
self._convert_to_request_dict__endpoint_url = 'endpoint_url' in inspect.signature(self._client._convert_to_request_dict).parameters
return self._client

def get_meta_table(self, table_name: str, refresh: bool = False):
def add_meta_table(self, meta_table: MetaTable) -> None:
"""
Returns a MetaTable
Adds information about the table's schema.
"""
if table_name not in self._tables or refresh:
operation_kwargs = {
TABLE_NAME: table_name
}
try:
data = self.dispatch(DESCRIBE_TABLE, operation_kwargs)
self._tables[table_name] = MetaTable(data.get(TABLE_KEY))
except BotoCoreError as e:
raise TableError("Unable to describe table: {}".format(e), e)
except ClientError as e:
if 'ResourceNotFound' in e.response['Error']['Code']:
raise TableDoesNotExist(e.response['Error']['Message'])
else:
raise
return self._tables[table_name]
if meta_table.table_name in self._tables:
raise ValueError(f"Meta-table for '{meta_table.table_name}' already added")
self._tables[meta_table.table_name] = meta_table

def get_meta_table(self, table_name: str) -> MetaTable:
"""
Returns information about the table's schema.
"""
try:
return self._tables[table_name]
except KeyError:
raise TableError(f"Meta-table for '{table_name}' not initialized") from None

def create_table(
self,
Expand Down Expand Up @@ -608,8 +612,8 @@ def create_table(
raise ValueError("attribute_definitions argument is required")
for attr in attribute_definitions:
attrs_list.append({
ATTR_NAME: attr.get('attribute_name'),
ATTR_TYPE: attr.get('attribute_type')
ATTR_NAME: attr.get(ATTR_NAME) or attr['attribute_name'],
ATTR_TYPE: attr.get(ATTR_TYPE) or attr['attribute_type']
})
operation_kwargs[ATTR_DEFINITIONS] = attrs_list

Expand Down Expand Up @@ -639,8 +643,8 @@ def create_table(
key_schema_list = []
for item in key_schema:
key_schema_list.append({
ATTR_NAME: item.get('attribute_name'),
KEY_TYPE: str(item.get('key_type')).upper()
ATTR_NAME: item.get(ATTR_NAME) or item['attribute_name'],
KEY_TYPE: str(item.get(KEY_TYPE) or item['key_type']).upper()
})
operation_kwargs[KEY_SCHEMA] = sorted(key_schema_list, key=lambda x: x.get(KEY_TYPE))

Expand Down Expand Up @@ -767,13 +771,26 @@ def describe_table(self, table_name: str) -> Dict:
"""
Performs the DescribeTable operation
"""
operation_kwargs = {
TABLE_NAME: table_name
}
try:
tbl = self.get_meta_table(table_name, refresh=True)
if tbl:
return tbl.data
except ValueError:
pass
raise TableDoesNotExist(table_name)
data = self.dispatch(DESCRIBE_TABLE, operation_kwargs)
table_data = data.get(TABLE_KEY)
# For compatibility with existing code which uses Connection directly,
# we can let DescribeTable set the meta table.
if table_data:
meta_table = MetaTable(table_data)
if meta_table.table_name not in self._tables:
self.add_meta_table(meta_table)
return table_data
except BotoCoreError as e:
raise TableError("Unable to describe table: {}".format(e), e)
except ClientError as e:
if 'ResourceNotFound' in e.response['Error']['Code']:
raise TableDoesNotExist(e.response['Error']['Message'])
else:
raise

def get_item_attribute_map(
self,
Expand Down
8 changes: 6 additions & 2 deletions pynamodb/connection/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def __init__(
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
aws_session_token: Optional[str] = None,
*,
meta_table: Optional[MetaTable] = None,
) -> None:
self.table_name = table_name
self.connection = Connection(region=region,
Expand All @@ -40,17 +42,19 @@ def __init__(
base_backoff_ms=base_backoff_ms,
max_pool_connections=max_pool_connections,
extra_headers=extra_headers)
if meta_table is not None:
self.connection.add_meta_table(meta_table)

if aws_access_key_id and aws_secret_access_key:
self.connection.session.set_credentials(aws_access_key_id,
aws_secret_access_key,
aws_session_token)

def get_meta_table(self, refresh: bool = False) -> MetaTable:
def get_meta_table(self) -> MetaTable:
"""
Returns a MetaTable
"""
return self.connection.get_meta_table(self.table_name, refresh=refresh)
return self.connection.get_meta_table(self.table_name)

def get_operation_kwargs(
self,
Expand Down
46 changes: 35 additions & 11 deletions pynamodb/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from typing import Union
from typing import cast

from pynamodb.connection.base import MetaTable

if sys.version_info >= (3, 8):
from typing import Protocol
else:
Expand All @@ -38,9 +40,10 @@
from pynamodb.connection.table import TableConnection
from pynamodb.expressions.condition import Condition
from pynamodb.types import HASH, RANGE
from pynamodb.indexes import Index, GlobalSecondaryIndex
from pynamodb.indexes import Index, GlobalSecondaryIndex, LocalSecondaryIndex
from pynamodb.pagination import ResultIterator
from pynamodb.settings import get_settings_value, OperationSettings
from pynamodb import constants
from pynamodb.constants import (
ATTR_DEFINITIONS, ATTR_NAME, ATTR_TYPE, KEY_SCHEMA,
KEY_TYPE, ITEM, READ_CAPACITY_UNITS, WRITE_CAPACITY_UNITS,
Expand All @@ -53,7 +56,7 @@
BATCH_WRITE_PAGE_LIMIT,
META_CLASS_NAME, REGION, HOST, NULL,
COUNT, ITEM_COUNT, KEY, UNPROCESSED_ITEMS, STREAM_VIEW_TYPE,
STREAM_SPECIFICATION, STREAM_ENABLED, BILLING_MODE, PAY_PER_REQUEST_BILLING_MODE, TAGS
STREAM_SPECIFICATION, STREAM_ENABLED, BILLING_MODE, PAY_PER_REQUEST_BILLING_MODE, TAGS, TABLE_NAME
)
from pynamodb.util import attribute_value_to_json
from pynamodb.util import json_to_attribute_value
Expand Down Expand Up @@ -863,18 +866,18 @@ def _get_schema(cls) -> Dict[str, Any]:
for attr_name, attr_cls in cls.get_attributes().items():
if attr_cls.is_hash_key or attr_cls.is_range_key:
schema['attribute_definitions'].append({
'attribute_name': attr_cls.attr_name,
'attribute_type': attr_cls.attr_type
ATTR_NAME: attr_cls.attr_name,
ATTR_TYPE: attr_cls.attr_type
})
if attr_cls.is_hash_key:
schema['key_schema'].append({
'key_type': HASH,
'attribute_name': attr_cls.attr_name
KEY_TYPE: HASH,
ATTR_NAME: attr_cls.attr_name
})
elif attr_cls.is_range_key:
schema['key_schema'].append({
'key_type': RANGE,
'attribute_name': attr_cls.attr_name
KEY_TYPE: RANGE,
ATTR_NAME: attr_cls.attr_name
})
for index in cls._indexes.values():
index_schema = index._get_schema()
Expand All @@ -887,13 +890,13 @@ def _get_schema(cls) -> Dict[str, Any]:
attr_names = {key_schema[ATTR_NAME]
for index_schema in (*schema['global_secondary_indexes'], *schema['local_secondary_indexes'])
for key_schema in index_schema['key_schema']}
attr_keys = {attr.get('attribute_name') for attr in schema['attribute_definitions']}
attr_keys = {attr[ATTR_NAME] for attr in schema['attribute_definitions']}
for attr_name in attr_names:
if attr_name not in attr_keys:
attr_cls = cls.get_attributes()[cls._dynamo_to_python_attr(attr_name)]
schema['attribute_definitions'].append({
'attribute_name': attr_cls.attr_name,
'attribute_type': attr_cls.attr_type
ATTR_NAME: attr_cls.attr_name,
ATTR_TYPE: attr_cls.attr_type
})
return schema

Expand Down Expand Up @@ -1057,7 +1060,28 @@ def _get_connection(cls) -> TableConnection:
# For now we just check that the connection exists and (in the case of model inheritance)
# points to the same table. In the future we should update the connection if any of the attributes differ.
if cls._connection is None or cls._connection.table_name != cls.Meta.table_name:
schema = cls._get_schema()
meta_table = MetaTable({
constants.TABLE_NAME: cls.Meta.table_name,
constants.KEY_SCHEMA: schema['key_schema'],
constants.ATTR_DEFINITIONS: schema['attribute_definitions'],
constants.GLOBAL_SECONDARY_INDEXES: [
{
constants.INDEX_NAME: index_schema['index_name'],
constants.KEY_SCHEMA: index_schema['key_schema'],
}
for index_schema in schema['global_secondary_indexes']
],
constants.LOCAL_SECONDARY_INDEXES: [
{
constants.INDEX_NAME: index_schema['index_name'],
constants.KEY_SCHEMA: index_schema['key_schema'],
}
for index_schema in schema['local_secondary_indexes']
],
})
cls._connection = TableConnection(cls.Meta.table_name,
meta_table=meta_table,
region=cls.Meta.region,
host=cls.Meta.host,
connect_timeout_seconds=cls.Meta.connect_timeout_seconds,
Expand Down
84 changes: 0 additions & 84 deletions tests/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
}
}


MODEL_TABLE_DATA = {
"Table": {
"AttributeDefinitions": [
Expand Down Expand Up @@ -345,89 +344,6 @@
}
}

DESCRIBE_TABLE_DATA_PAY_PER_REQUEST = {
"Table": {
"AttributeDefinitions": [
{
"AttributeName": "ForumName",
"AttributeType": "S"
},
{
"AttributeName": "LastPostDateTime",
"AttributeType": "S"
},
{
"AttributeName": "Subject",
"AttributeType": "S"
}
],
"CreationDateTime": 1.363729002358E9,
"ItemCount": 0,
"KeySchema": [
{
"AttributeName": "ForumName",
"KeyType": "HASH"
},
{
"AttributeName": "Subject",
"KeyType": "RANGE"
}
],
"GlobalSecondaryIndexes": [
{
"IndexName": "LastPostIndex",
"IndexSizeBytes": 0,
"ItemCount": 0,
"KeySchema": [
{
"AttributeName": "ForumName",
"KeyType": "HASH"
},
{
"AttributeName": "LastPostDateTime",
"KeyType": "RANGE"
}
],
"Projection": {
"ProjectionType": "KEYS_ONLY"
}
}
],
"LocalSecondaryIndexes": [
{
"IndexName": "LastPostIndex",
"IndexSizeBytes": 0,
"ItemCount": 0,
"KeySchema": [
{
"AttributeName": "ForumName",
"KeyType": "HASH"
},
{
"AttributeName": "LastPostDateTime",
"KeyType": "RANGE"
}
],
"Projection": {
"ProjectionType": "KEYS_ONLY"
}
}
],
"ProvisionedThroughput": {
"NumberOfDecreasesToday": 0,
"ReadCapacityUnits": 0,
"WriteCapacityUnits": 0
},
"TableName": "Thread",
"TableSizeBytes": 0,
"TableStatus": "ACTIVE",
"BillingModeSummary": {
"BillingMode": "PAY_PER_REQUEST",
"LastUpdateToPayPerRequestDateTime": 1548353644.074
}
}
}

GET_MODEL_ITEM_DATA = {
'Item': {
'user_name': {
Expand Down
Loading

0 comments on commit c022adc

Please sign in to comment.