From bbd68aeeadb65b7c030c05cb994e0f6d8a9d0f08 Mon Sep 17 00:00:00 2001 From: Fokko Date: Wed, 20 Nov 2024 00:32:29 +0100 Subject: [PATCH] Bump `pre-commit` versions --- .pre-commit-config.yaml | 6 +- pyiceberg/catalog/dynamodb.py | 4 +- pyiceberg/catalog/glue.py | 2 +- pyiceberg/io/pyarrow.py | 2 +- pyiceberg/table/update/schema.py | 2 +- pyiceberg/typedef.py | 2 +- pyiceberg/utils/schema_conversion.py | 2 +- tests/avro/test_file.py | 2 +- tests/avro/test_writer.py | 2 +- tests/catalog/test_base.py | 2 +- tests/catalog/test_rest.py | 48 +- tests/expressions/test_visitors.py | 480 +++++++++---------- tests/integration/test_writes/test_writes.py | 4 +- tests/table/test_init.py | 18 +- tests/test_transforms.py | 2 +- 15 files changed, 289 insertions(+), 289 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 10540a6b52..c0b9a31792 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ exclude: ^vendor/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -29,7 +29,7 @@ repos: - id: check-ast - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version (Used for linting) - rev: v0.1.8 + rev: v0.7.4 hooks: - id: ruff args: [ --fix, --exit-non-zero-on-fix, --preview ] @@ -47,7 +47,7 @@ repos: - id: pycln args: [--config=pyproject.toml] - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.41.0 + rev: v0.42.0 hooks: - id: markdownlint args: ["--fix"] diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py index 6dfb243a42..b3f664bfa0 100644 --- a/pyiceberg/catalog/dynamodb.py +++ b/pyiceberg/catalog/dynamodb.py @@ -330,7 +330,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U log_message += f"Rolled back table creation for {to_database_name}.{to_table_name}." except (NoSuchTableError, GenericDynamoDbError): log_message += ( - f"Failed to roll back table creation for {to_database_name}.{to_table_name}. " f"Please clean up manually" + f"Failed to roll back table creation for {to_database_name}.{to_table_name}. Please clean up manually" ) raise ValueError(log_message) from e @@ -635,7 +635,7 @@ def _convert_dynamo_table_item_to_iceberg_table(self, dynamo_table_item: Dict[st if table_type.lower() != ICEBERG: raise NoSuchIcebergTableError( - f"Property table_type is {table_type}, expected {ICEBERG}: " f"{database_name}.{table_name}" + f"Property table_type is {table_type}, expected {ICEBERG}: {database_name}.{table_name}" ) io = load_file_io(properties=self.properties, location=metadata_location) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 2396114fbb..5e79c99ab8 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -611,7 +611,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U log_message += f"Rolled back table creation for {to_database_name}.{to_table_name}." except NoSuchTableError: log_message += ( - f"Failed to roll back table creation for {to_database_name}.{to_table_name}. " f"Please clean up manually" + f"Failed to roll back table creation for {to_database_name}.{to_table_name}. Please clean up manually" ) raise ValueError(log_message) from e diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 9ab1981069..d2c4a6016e 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -2442,7 +2442,7 @@ def write_parquet(task: WriteTask) -> DataFile: for batch in task.record_batches ] arrow_table = pa.Table.from_batches(batches) - file_path = f'{table_metadata.location}/data/{task.generate_data_file_path("parquet")}' + file_path = f"{table_metadata.location}/data/{task.generate_data_file_path('parquet')}" fo = io.new_output(file_path) with fo.create(overwrite=True) as fos: with pq.ParquetWriter(fos, schema=arrow_table.schema, **parquet_writer_kwargs) as writer: diff --git a/pyiceberg/table/update/schema.py b/pyiceberg/table/update/schema.py index 0c83628f37..8ee3b43c24 100644 --- a/pyiceberg/table/update/schema.py +++ b/pyiceberg/table/update/schema.py @@ -179,7 +179,7 @@ def add_column( if required and not self._allow_incompatible_changes: # Table format version 1 and 2 cannot add required column because there is no initial value - raise ValueError(f'Incompatible change: cannot add required column: {".".join(path)}') + raise ValueError(f"Incompatible change: cannot add required column: {'.'.join(path)}") name = path[-1] parent = path[:-1] diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index 2ff123148b..01b8bea58c 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -157,7 +157,7 @@ class IcebergRootModel(RootModel[T], Generic[T]): @lru_cache def _get_struct_fields(struct_type: StructType) -> Tuple[str, ...]: - return tuple([field.name for field in struct_type.fields]) + return tuple(field.name for field in struct_type.fields) class Record(StructProtocol): diff --git a/pyiceberg/utils/schema_conversion.py b/pyiceberg/utils/schema_conversion.py index 3cba428dd9..8a303b7fb5 100644 --- a/pyiceberg/utils/schema_conversion.py +++ b/pyiceberg/utils/schema_conversion.py @@ -447,7 +447,7 @@ def _convert_logical_map_type(self, avro_type: Dict[str, Any]) -> MapType: """ fields = avro_type["items"]["fields"] if len(fields) != 2: - raise ValueError(f'Invalid key-value pair schema: {avro_type["items"]}') + raise ValueError(f"Invalid key-value pair schema: {avro_type['items']}") key = self._convert_field(list(filter(lambda f: f["name"] == "key", fields))[0]) value = self._convert_field(list(filter(lambda f: f["name"] == "value", fields))[0]) return MapType( diff --git a/tests/avro/test_file.py b/tests/avro/test_file.py index 981aab2547..0756b2670c 100644 --- a/tests/avro/test_file.py +++ b/tests/avro/test_file.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import inspect +from _decimal import Decimal from copy import copy from datetime import date, datetime, time from enum import Enum @@ -23,7 +24,6 @@ from uuid import UUID import pytest -from _decimal import Decimal from fastavro import reader, writer import pyiceberg.avro.file as avro diff --git a/tests/avro/test_writer.py b/tests/avro/test_writer.py index 0bae9ece8c..5a531c7748 100644 --- a/tests/avro/test_writer.py +++ b/tests/avro/test_writer.py @@ -18,10 +18,10 @@ import io import struct +from _decimal import Decimal from typing import Dict, List import pytest -from _decimal import Decimal from pyiceberg.avro.encoder import BinaryEncoder from pyiceberg.avro.resolver import construct_writer diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py index e212854ee2..d9d238fafd 100644 --- a/tests/catalog/test_base.py +++ b/tests/catalog/test_base.py @@ -102,7 +102,7 @@ def create_table( self.__namespaces[namespace] = {} if not location: - location = f'{self._warehouse_location}/{"/".join(identifier)}' + location = f"{self._warehouse_location}/{'/'.join(identifier)}" location = location.rstrip("/") metadata_location = self._get_metadata_location(location=location) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index f8662c1bf4..e3aae3f891 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -299,19 +299,19 @@ def test_properties_sets_headers(requests_mock: Mocker) -> None: **{"header.Content-Type": "application/vnd.api+json", "header.Customized-Header": "some/value"}, ) - assert ( - catalog._session.headers.get("Content-type") == "application/json" - ), "Expected 'Content-Type' default header not to be overwritten" - assert ( - requests_mock.last_request.headers["Content-type"] == "application/json" - ), "Config request did not include expected 'Content-Type' header" + assert catalog._session.headers.get("Content-type") == "application/json", ( + "Expected 'Content-Type' default header not to be overwritten" + ) + assert requests_mock.last_request.headers["Content-type"] == "application/json", ( + "Config request did not include expected 'Content-Type' header" + ) - assert ( - catalog._session.headers.get("Customized-Header") == "some/value" - ), "Expected 'Customized-Header' header to be 'some/value'" - assert ( - requests_mock.last_request.headers["Customized-Header"] == "some/value" - ), "Config request did not include expected 'Customized-Header' header" + assert catalog._session.headers.get("Customized-Header") == "some/value", ( + "Expected 'Customized-Header' header to be 'some/value'" + ) + assert requests_mock.last_request.headers["Customized-Header"] == "some/value", ( + "Config request did not include expected 'Customized-Header' header" + ) def test_config_sets_headers(requests_mock: Mocker) -> None: @@ -328,19 +328,19 @@ def test_config_sets_headers(requests_mock: Mocker) -> None: catalog = RestCatalog("rest", uri=TEST_URI, warehouse="s3://some-bucket") catalog.create_namespace(namespace) - assert ( - catalog._session.headers.get("Content-type") == "application/json" - ), "Expected 'Content-Type' default header not to be overwritten" - assert ( - requests_mock.last_request.headers["Content-type"] == "application/json" - ), "Create namespace request did not include expected 'Content-Type' header" + assert catalog._session.headers.get("Content-type") == "application/json", ( + "Expected 'Content-Type' default header not to be overwritten" + ) + assert requests_mock.last_request.headers["Content-type"] == "application/json", ( + "Create namespace request did not include expected 'Content-Type' header" + ) - assert ( - catalog._session.headers.get("Customized-Header") == "some/value" - ), "Expected 'Customized-Header' header to be 'some/value'" - assert ( - requests_mock.last_request.headers["Customized-Header"] == "some/value" - ), "Create namespace request did not include expected 'Customized-Header' header" + assert catalog._session.headers.get("Customized-Header") == "some/value", ( + "Expected 'Customized-Header' header to be 'some/value'" + ) + assert requests_mock.last_request.headers["Customized-Header"] == "some/value", ( + "Create namespace request did not include expected 'Customized-Header' header" + ) def test_token_400(rest_mock: Mocker) -> None: diff --git a/tests/expressions/test_visitors.py b/tests/expressions/test_visitors.py index 94bfcf076c..d61c193719 100644 --- a/tests/expressions/test_visitors.py +++ b/tests/expressions/test_visitors.py @@ -947,95 +947,95 @@ def manifest() -> ManifestFile: def test_all_nulls(schema: Schema, manifest: ManifestFile) -> None: - assert not _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval( - manifest - ), "Should skip: all nulls column with non-floating type contains all null" + assert not _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(manifest), ( + "Should skip: all nulls column with non-floating type contains all null" + ) - assert _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval( - manifest - ), "Should read: no NaN information may indicate presence of NaN value" + assert _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(manifest), ( + "Should read: no NaN information may indicate presence of NaN value" + ) - assert _ManifestEvalVisitor(schema, NotNull(Reference("some_nulls")), case_sensitive=True).eval( - manifest - ), "Should read: column with some nulls contains a non-null value" + assert _ManifestEvalVisitor(schema, NotNull(Reference("some_nulls")), case_sensitive=True).eval(manifest), ( + "Should read: column with some nulls contains a non-null value" + ) - assert _ManifestEvalVisitor(schema, NotNull(Reference("no_nulls")), case_sensitive=True).eval( - manifest - ), "Should read: non-null column contains a non-null value" + assert _ManifestEvalVisitor(schema, NotNull(Reference("no_nulls")), case_sensitive=True).eval(manifest), ( + "Should read: non-null column contains a non-null value" + ) def test_no_nulls(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, IsNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval( - manifest - ), "Should read: at least one null value in all null column" + assert _ManifestEvalVisitor(schema, IsNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(manifest), ( + "Should read: at least one null value in all null column" + ) - assert _ManifestEvalVisitor(schema, IsNull(Reference("some_nulls")), case_sensitive=True).eval( - manifest - ), "Should read: column with some nulls contains a null value" + assert _ManifestEvalVisitor(schema, IsNull(Reference("some_nulls")), case_sensitive=True).eval(manifest), ( + "Should read: column with some nulls contains a null value" + ) - assert not _ManifestEvalVisitor(schema, IsNull(Reference("no_nulls")), case_sensitive=True).eval( - manifest - ), "Should skip: non-null column contains no null values" + assert not _ManifestEvalVisitor(schema, IsNull(Reference("no_nulls")), case_sensitive=True).eval(manifest), ( + "Should skip: non-null column contains no null values" + ) - assert _ManifestEvalVisitor(schema, IsNull(Reference("both_nan_and_null")), case_sensitive=True).eval( - manifest - ), "Should read: both_nan_and_null column contains no null values" + assert _ManifestEvalVisitor(schema, IsNull(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), ( + "Should read: both_nan_and_null column contains no null values" + ) def test_is_nan(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, IsNaN(Reference("float")), case_sensitive=True).eval( - manifest - ), "Should read: no information on if there are nan value in float column" + assert _ManifestEvalVisitor(schema, IsNaN(Reference("float")), case_sensitive=True).eval(manifest), ( + "Should read: no information on if there are nan value in float column" + ) - assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_double")), case_sensitive=True).eval( - manifest - ), "Should read: no NaN information may indicate presence of NaN value" + assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_double")), case_sensitive=True).eval(manifest), ( + "Should read: no NaN information may indicate presence of NaN value" + ) - assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval( - manifest - ), "Should read: no NaN information may indicate presence of NaN value" + assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(manifest), ( + "Should read: no NaN information may indicate presence of NaN value" + ) - assert not _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval( - manifest - ), "Should skip: no nan column doesn't contain nan value" + assert not _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(manifest), ( + "Should skip: no nan column doesn't contain nan value" + ) - assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nans")), case_sensitive=True).eval( - manifest - ), "Should read: all_nans column contains nan value" + assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nans")), case_sensitive=True).eval(manifest), ( + "Should read: all_nans column contains nan value" + ) - assert _ManifestEvalVisitor(schema, IsNaN(Reference("both_nan_and_null")), case_sensitive=True).eval( - manifest - ), "Should read: both_nan_and_null column contains nan value" + assert _ManifestEvalVisitor(schema, IsNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), ( + "Should read: both_nan_and_null column contains nan value" + ) - assert not _ManifestEvalVisitor(schema, IsNaN(Reference("no_nan_or_null")), case_sensitive=True).eval( - manifest - ), "Should skip: no_nan_or_null column doesn't contain nan value" + assert not _ManifestEvalVisitor(schema, IsNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(manifest), ( + "Should skip: no_nan_or_null column doesn't contain nan value" + ) def test_not_nan(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, NotNaN(Reference("float")), case_sensitive=True).eval( - manifest - ), "Should read: no information on if there are nan value in float column" + assert _ManifestEvalVisitor(schema, NotNaN(Reference("float")), case_sensitive=True).eval(manifest), ( + "Should read: no information on if there are nan value in float column" + ) - assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_double")), case_sensitive=True).eval( - manifest - ), "Should read: all null column contains non nan value" + assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_double")), case_sensitive=True).eval(manifest), ( + "Should read: all null column contains non nan value" + ) - assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval( - manifest - ), "Should read: no_nans column contains non nan value" + assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(manifest), ( + "Should read: no_nans column contains non nan value" + ) - assert not _ManifestEvalVisitor(schema, NotNaN(Reference("all_nans")), case_sensitive=True).eval( - manifest - ), "Should skip: all nans column doesn't contain non nan value" + assert not _ManifestEvalVisitor(schema, NotNaN(Reference("all_nans")), case_sensitive=True).eval(manifest), ( + "Should skip: all nans column doesn't contain non nan value" + ) - assert _ManifestEvalVisitor(schema, NotNaN(Reference("both_nan_and_null")), case_sensitive=True).eval( - manifest - ), "Should read: both_nan_and_null nans column contains non nan value" + assert _ManifestEvalVisitor(schema, NotNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), ( + "Should read: both_nan_and_null nans column contains non nan value" + ) - assert _ManifestEvalVisitor(schema, NotNaN(Reference("no_nan_or_null")), case_sensitive=True).eval( - manifest - ), "Should read: no_nan_or_null column contains non nan value" + assert _ManifestEvalVisitor(schema, NotNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(manifest), ( + "Should read: no_nan_or_null column contains non nan value" + ) def test_missing_stats(schema: Schema, manifest_no_stats: ManifestFile) -> None: @@ -1053,15 +1053,15 @@ def test_missing_stats(schema: Schema, manifest_no_stats: ManifestFile) -> None: ] for expr in expressions: - assert _ManifestEvalVisitor(schema, expr, case_sensitive=True).eval( - manifest_no_stats - ), f"Should read when missing stats for expr: {expr}" + assert _ManifestEvalVisitor(schema, expr, case_sensitive=True).eval(manifest_no_stats), ( + f"Should read when missing stats for expr: {expr}" + ) def test_not(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, Not(LessThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval( - manifest - ), "Should read: not(false)" + assert _ManifestEvalVisitor(schema, Not(LessThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(manifest), ( + "Should read: not(false)" + ) assert not _ManifestEvalVisitor(schema, Not(GreaterThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval( manifest @@ -1118,21 +1118,21 @@ def test_or(schema: Schema, manifest: ManifestFile) -> None: def test_integer_lt(schema: Schema, manifest: ManifestFile) -> None: - assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval( - manifest - ), "Should not read: id range below lower bound (5 < 30)" + assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), ( + "Should not read: id range below lower bound (5 < 30)" + ) - assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval( - manifest - ), "Should not read: id range below lower bound (30 is not < 30)" + assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), ( + "Should not read: id range below lower bound (30 is not < 30)" + ) - assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE + 1), case_sensitive=True).eval( - manifest - ), "Should read: one possible id" + assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE + 1), case_sensitive=True).eval(manifest), ( + "Should read: one possible id" + ) - assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: may possible ids" + assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: may possible ids" + ) def test_integer_lt_eq(schema: Schema, manifest: ManifestFile) -> None: @@ -1144,13 +1144,13 @@ def test_integer_lt_eq(schema: Schema, manifest: ManifestFile) -> None: manifest ), "Should not read: id range below lower bound (29 < 30)" - assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: one possible id" + assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: one possible id" + ) - assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: many possible ids" + assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: many possible ids" + ) def test_integer_gt(schema: Schema, manifest: ManifestFile) -> None: @@ -1158,17 +1158,17 @@ def test_integer_gt(schema: Schema, manifest: ManifestFile) -> None: manifest ), "Should not read: id range above upper bound (85 < 79)" - assert not _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval( - manifest - ), "Should not read: id range above upper bound (79 is not > 79)" + assert not _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), ( + "Should not read: id range above upper bound (79 is not > 79)" + ) - assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 1), case_sensitive=True).eval( - manifest - ), "Should read: one possible id" + assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 1), case_sensitive=True).eval(manifest), ( + "Should read: one possible id" + ) - assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval( - manifest - ), "Should read: may possible ids" + assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), ( + "Should read: may possible ids" + ) def test_integer_gt_eq(schema: Schema, manifest: ManifestFile) -> None: @@ -1180,133 +1180,133 @@ def test_integer_gt_eq(schema: Schema, manifest: ManifestFile) -> None: manifest ), "Should not read: id range above upper bound (80 > 79)" - assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: one possible id" + assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: one possible id" + ) - assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: may possible ids" + assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: may possible ids" + ) def test_integer_eq(schema: Schema, manifest: ManifestFile) -> None: - assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval( - manifest - ), "Should not read: id below lower bound" + assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), ( + "Should not read: id below lower bound" + ) - assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval( - manifest - ), "Should not read: id below lower bound" + assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(manifest), ( + "Should not read: id below lower bound" + ) - assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: id equal to lower bound" + assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: id equal to lower bound" + ) - assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval( - manifest - ), "Should read: id between lower and upper bounds" + assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), ( + "Should read: id between lower and upper bounds" + ) - assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: id equal to upper bound" + assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: id equal to upper bound" + ) - assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval( - manifest - ), "Should not read: id above upper bound" + assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(manifest), ( + "Should not read: id above upper bound" + ) - assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval( - manifest - ), "Should not read: id above upper bound" + assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(manifest), ( + "Should not read: id above upper bound" + ) def test_integer_not_eq(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval( - manifest - ), "Should read: id below lower bound" + assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), ( + "Should read: id below lower bound" + ) - assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval( - manifest - ), "Should read: id below lower bound" + assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(manifest), ( + "Should read: id below lower bound" + ) - assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: id equal to lower bound" + assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: id equal to lower bound" + ) - assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval( - manifest - ), "Should read: id between lower and upper bounds" + assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), ( + "Should read: id between lower and upper bounds" + ) - assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval( - manifest - ), "Should read: id equal to upper bound" + assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), ( + "Should read: id equal to upper bound" + ) - assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval( - manifest - ), "Should read: id above upper bound" + assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(manifest), ( + "Should read: id above upper bound" + ) - assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval( - manifest - ), "Should read: id above upper bound" + assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(manifest), ( + "Should read: id above upper bound" + ) def test_integer_not_eq_rewritten(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval( - manifest - ), "Should read: id below lower bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(manifest), ( + "Should read: id below lower bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 1)), case_sensitive=True).eval( - manifest - ), "Should read: id below lower bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 1)), case_sensitive=True).eval(manifest), ( + "Should read: id below lower bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE)), case_sensitive=True).eval( - manifest - ), "Should read: id equal to lower bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE)), case_sensitive=True).eval(manifest), ( + "Should read: id equal to lower bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE - 4)), case_sensitive=True).eval( - manifest - ), "Should read: id between lower and upper bounds" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE - 4)), case_sensitive=True).eval(manifest), ( + "Should read: id between lower and upper bounds" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE)), case_sensitive=True).eval( - manifest - ), "Should read: id equal to upper bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE)), case_sensitive=True).eval(manifest), ( + "Should read: id equal to upper bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 1)), case_sensitive=True).eval( - manifest - ), "Should read: id above upper bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 1)), case_sensitive=True).eval(manifest), ( + "Should read: id above upper bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 6)), case_sensitive=True).eval( - manifest - ), "Should read: id above upper bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 6)), case_sensitive=True).eval(manifest), ( + "Should read: id above upper bound" + ) def test_integer_not_eq_rewritten_case_insensitive(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 25)), case_sensitive=False).eval( - manifest - ), "Should read: id below lower bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 25)), case_sensitive=False).eval(manifest), ( + "Should read: id below lower bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 1)), case_sensitive=False).eval( - manifest - ), "Should read: id below lower bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 1)), case_sensitive=False).eval(manifest), ( + "Should read: id below lower bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE)), case_sensitive=False).eval( - manifest - ), "Should read: id equal to lower bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE)), case_sensitive=False).eval(manifest), ( + "Should read: id equal to lower bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE - 4)), case_sensitive=False).eval( - manifest - ), "Should read: id between lower and upper bounds" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE - 4)), case_sensitive=False).eval(manifest), ( + "Should read: id between lower and upper bounds" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE)), case_sensitive=False).eval( - manifest - ), "Should read: id equal to upper bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE)), case_sensitive=False).eval(manifest), ( + "Should read: id equal to upper bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 1)), case_sensitive=False).eval( - manifest - ), "Should read: id above upper bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 1)), case_sensitive=False).eval(manifest), ( + "Should read: id above upper bound" + ) - assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 6)), case_sensitive=False).eval( - manifest - ), "Should read: id above upper bound" + assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 6)), case_sensitive=False).eval(manifest), ( + "Should read: id above upper bound" + ) def test_integer_in(schema: Schema, manifest: ManifestFile) -> None: @@ -1342,13 +1342,13 @@ def test_integer_in(schema: Schema, manifest: ManifestFile) -> None: manifest ), "Should skip: in on all nulls column" - assert _ManifestEvalVisitor(schema, In(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval( - manifest - ), "Should read: in on some nulls column" + assert _ManifestEvalVisitor(schema, In(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), ( + "Should read: in on some nulls column" + ) - assert _ManifestEvalVisitor(schema, In(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval( - manifest - ), "Should read: in on no nulls column" + assert _ManifestEvalVisitor(schema, In(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), ( + "Should read: in on no nulls column" + ) def test_integer_not_in(schema: Schema, manifest: ManifestFile) -> None: @@ -1384,73 +1384,73 @@ def test_integer_not_in(schema: Schema, manifest: ManifestFile) -> None: manifest ), "Should read: notIn on no nulls column" - assert _ManifestEvalVisitor(schema, NotIn(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval( - manifest - ), "Should read: in on some nulls column" + assert _ManifestEvalVisitor(schema, NotIn(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), ( + "Should read: in on some nulls column" + ) - assert _ManifestEvalVisitor(schema, NotIn(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval( - manifest - ), "Should read: in on no nulls column" + assert _ManifestEvalVisitor(schema, NotIn(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), ( + "Should read: in on no nulls column" + ) def test_string_starts_with(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, StartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, StartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval( - manifest - ), "Should skip: range doesn't match" + assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(manifest), ( + "Should skip: range doesn't match" + ) - assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval( - manifest - ), "Should skip: range doesn't match" + assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(manifest), ( + "Should skip: range doesn't match" + ) def test_string_not_starts_with(schema: Schema, manifest: ManifestFile) -> None: - assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) - assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval( - manifest - ), "Should read: range matches" + assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(manifest), ( + "Should read: range matches" + ) assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("all_same_value_or_null"), "a"), case_sensitive=False).eval( manifest diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index e0b788e88c..78ffc79c50 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -1301,7 +1301,7 @@ def test_merge_manifests_file_content(session_catalog: Catalog, arrow_table_with (9, b"\x00\x9bj\xca8\xf1\x05\x00"), (10, b"\x9eK\x00\x00"), (11, b"\x01"), - (12, b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" b"\x00\x00\x00\x00"), + (12, b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"), ] assert tbl_a_data_file["nan_value_counts"] == [] assert tbl_a_data_file["null_value_counts"] == [ @@ -1334,7 +1334,7 @@ def test_merge_manifests_file_content(session_catalog: Catalog, arrow_table_with (9, b"\x00\xbb\r\xab\xdb\xf5\x05\x00"), (10, b"\xd9K\x00\x00"), (11, b"\x12"), - (12, b"\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11" b"\x11\x11\x11\x11"), + (12, b"\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11"), ] assert tbl_a_data_file["value_counts"] == [ (1, 3), diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 1c4029a292..040c67034b 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -538,15 +538,15 @@ def test_update_column(table_v1: Table, table_v2: Table) -> None: assert new_schema3.find_field("z").required is False, "failed to update existing field required" # assert the above two updates also works with union_by_name - assert ( - table.update_schema().union_by_name(new_schema)._apply() == new_schema - ), "failed to update existing field doc with union_by_name" - assert ( - table.update_schema().union_by_name(new_schema2)._apply() == new_schema2 - ), "failed to remove existing field doc with union_by_name" - assert ( - table.update_schema().union_by_name(new_schema3)._apply() == new_schema3 - ), "failed to update existing field required with union_by_name" + assert table.update_schema().union_by_name(new_schema)._apply() == new_schema, ( + "failed to update existing field doc with union_by_name" + ) + assert table.update_schema().union_by_name(new_schema2)._apply() == new_schema2, ( + "failed to remove existing field doc with union_by_name" + ) + assert table.update_schema().union_by_name(new_schema3)._apply() == new_schema3, ( + "failed to update existing field required with union_by_name" + ) def test_add_primitive_type_column(table_v2: Table) -> None: diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 3a9ffd6009..bb535f1d40 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -973,7 +973,7 @@ def test_projection_truncate_string_not_starts_with(bound_reference_str: BoundRe def _test_projection(lhs: Optional[UnboundPredicate[L]], rhs: Optional[UnboundPredicate[L]]) -> None: - assert type(lhs) == type(lhs), f"Different classes: {type(lhs)} != {type(rhs)}" + assert type(lhs) is type(lhs), f"Different classes: {type(lhs)} != {type(rhs)}" if lhs is None and rhs is None: # Both null pass