Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for item validation pipeline #414

Merged
merged 2 commits into from
Aug 30, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions tests/contrib/validation/test_item_validation_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from dataclasses import dataclass

import pytest
import scrapy
from scrapy.utils.test import get_crawler

from spidermon.contrib.scrapy.pipelines import (
ItemValidationPipeline,
PassThroughPipeline,
Expand All @@ -14,6 +18,19 @@ def spidermon_enabled_settings():
}


@pytest.fixture
def dummy_schema():
return {
"$schema": "http://json-schema.org/draft-07/schema",
"type": "object",
"properties": {
"foo": {"const": "bar"},
},
"required": ["foo"],
"additionalProperties": False,
}


def test_spidermon_enabled_return_item_validation_pipeline():
settings = {
"SPIDERMON_ENABLED": True,
Expand Down Expand Up @@ -50,3 +67,91 @@ def test_pass_through_pipeline():
pipeline = PassThroughPipeline()
item = pipeline.process_item({"original": "item"})
assert item == {"original": "item"}


def test_jsonschema_validation(dummy_schema):
settings = {
"SPIDERMON_ENABLED": True,
"SPIDERMON_VALIDATION_SCHEMAS": [dummy_schema],
"SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS": True,
}

item = {"foo": "bar"}

crawler = get_crawler(settings_dict=settings)
pipeline = ItemValidationPipeline.from_crawler(crawler)
result_item = pipeline.process_item(dict(item), None)
assert item == result_item

item = {}
result_item = pipeline.process_item(dict(item), None)
assert item != result_item
assert "_validation" in result_item
assert result_item["_validation"]["foo"] == ["Missing required field"]


def test_validation_errors_field(dummy_schema):
settings = {
"SPIDERMON_ENABLED": True,
"SPIDERMON_VALIDATION_SCHEMAS": [dummy_schema],
"SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS": True,
"SPIDERMON_VALIDATION_ERRORS_FIELD": "custom_validation_field",
}

item = {"no": "schema"}

crawler = get_crawler(settings_dict=settings)
pipeline = ItemValidationPipeline.from_crawler(crawler)
item = pipeline.process_item(item, None)
assert "custom_validation_field" in item


def test_add_error_to_items_undefined_validation_field(dummy_schema):
settings = {
"SPIDERMON_ENABLED": True,
"SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS": True,
"SPIDERMON_VALIDATION_SCHEMAS": [dummy_schema],
"SPIDERMON_VALIDATION_ERRORS_FIELD": "custom_validation_field",
}

crawler = get_crawler(settings_dict=settings)
pipeline = ItemValidationPipeline.from_crawler(crawler)

# Extensible classes like dict support adding additional field
item = {"foo": "invalid"}
item = pipeline.process_item(item, None)
assert "custom_validation_field" in item

# Non-extensible classes like scrapy.Items and Dataclass raises errors
class ScrapyItem(scrapy.Item):
foo = scrapy.Field()

item = ScrapyItem(foo="invalid")
# Supports item assignment but field but does not support field
with pytest.raises(
KeyError, match="ScrapyItem does not support field: custom_validation_field"
):
item = pipeline.process_item(item, None)

@dataclass
class DataclassItem:
foo: str

item = DataclassItem(foo="invalid")
# Does not support item assignment
with pytest.raises(
TypeError, match="'DataclassItem' object does not support item assignment"
):
item = pipeline.process_item(item, None)

@dataclass
class DataclassItemWithItemAssignment:
foo: str

def __setitem__(self, key, value):
setattr(self, key, value)

item = DataclassItemWithItemAssignment(foo="invalid")
# Supports item assignment but does not support field
with pytest.raises(KeyError, match="custom_validation_field"):
item = pipeline.process_item(item, None)
Loading