diff --git a/tests/contrib/validation/test_item_validation_pipeline.py b/tests/contrib/validation/test_item_validation_pipeline.py index e7df8136..a0bbf19e 100644 --- a/tests/contrib/validation/test_item_validation_pipeline.py +++ b/tests/contrib/validation/test_item_validation_pipeline.py @@ -1,5 +1,9 @@ +from dataclasses import dataclass + import pytest +import scrapy from scrapy.utils.test import get_crawler + from spidermon.contrib.scrapy.pipelines import ( ItemValidationPipeline, PassThroughPipeline, @@ -14,6 +18,19 @@ def spidermon_enabled_settings(): } +@pytest.fixture +def dummy_schema(): + return { + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "properties": { + "foo": {"const": "bar"}, + }, + "required": ["foo"], + "additionalProperties": False, + } + + def test_spidermon_enabled_return_item_validation_pipeline(): settings = { "SPIDERMON_ENABLED": True, @@ -50,3 +67,91 @@ def test_pass_through_pipeline(): pipeline = PassThroughPipeline() item = pipeline.process_item({"original": "item"}) assert item == {"original": "item"} + + +def test_jsonschema_validation(dummy_schema): + settings = { + "SPIDERMON_ENABLED": True, + "SPIDERMON_VALIDATION_SCHEMAS": [dummy_schema], + "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS": True, + } + + item = {"foo": "bar"} + + crawler = get_crawler(settings_dict=settings) + pipeline = ItemValidationPipeline.from_crawler(crawler) + result_item = pipeline.process_item(dict(item), None) + assert item == result_item + + item = {} + result_item = pipeline.process_item(dict(item), None) + assert item != result_item + assert "_validation" in result_item + assert result_item["_validation"]["foo"] == ["Missing required field"] + + +def test_validation_errors_field(dummy_schema): + settings = { + "SPIDERMON_ENABLED": True, + "SPIDERMON_VALIDATION_SCHEMAS": [dummy_schema], + "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS": True, + "SPIDERMON_VALIDATION_ERRORS_FIELD": "custom_validation_field", + } + + item = {"no": "schema"} + + crawler = get_crawler(settings_dict=settings) + pipeline = ItemValidationPipeline.from_crawler(crawler) + item = pipeline.process_item(item, None) + assert "custom_validation_field" in item + + +def test_add_error_to_items_undefined_validation_field(dummy_schema): + settings = { + "SPIDERMON_ENABLED": True, + "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS": True, + "SPIDERMON_VALIDATION_SCHEMAS": [dummy_schema], + "SPIDERMON_VALIDATION_ERRORS_FIELD": "custom_validation_field", + } + + crawler = get_crawler(settings_dict=settings) + pipeline = ItemValidationPipeline.from_crawler(crawler) + + # Extensible classes like dict support adding additional field + item = {"foo": "invalid"} + item = pipeline.process_item(item, None) + assert "custom_validation_field" in item + + # Non-extensible classes like scrapy.Items and Dataclass raises errors + class ScrapyItem(scrapy.Item): + foo = scrapy.Field() + + item = ScrapyItem(foo="invalid") + # Supports item assignment but field but does not support field + with pytest.raises( + KeyError, match="ScrapyItem does not support field: custom_validation_field" + ): + item = pipeline.process_item(item, None) + + @dataclass + class DataclassItem: + foo: str + + item = DataclassItem(foo="invalid") + # Does not support item assignment + with pytest.raises( + TypeError, match="'DataclassItem' object does not support item assignment" + ): + item = pipeline.process_item(item, None) + + @dataclass + class DataclassItemWithItemAssignment: + foo: str + + def __setitem__(self, key, value): + setattr(self, key, value) + + item = DataclassItemWithItemAssignment(foo="invalid") + # Supports item assignment but does not support field + with pytest.raises(KeyError, match="custom_validation_field"): + item = pipeline.process_item(item, None)