Skip to content

Commit

Permalink
common.py: Improved handling of jsonschema
Browse files Browse the repository at this point in the history
Handle urn references and allOf conditional in
schema_dict_fields_generator (plus added tests)
  • Loading branch information
Ed (ODSC) committed Dec 11, 2024
1 parent d543d67 commit feed77c
Show file tree
Hide file tree
Showing 13 changed files with 6,909 additions and 10 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Test
on: [push, pull_request]

jobs:
test:
runs-on: ubuntu-22.04
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
steps:
- uses: actions/checkout@v2
- name: Setup python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- uses: actions/cache@v1
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ matrix.python-version }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: pip install -e .[dev]
- name: Run tests
run: py.test tests
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]

### Changed

- Improved handling of jsonschema: Handle urn references and allOf conditional in schema_dict_fields_generator

## [0.1.0] - 2023-05-31

First Release
Expand Down
119 changes: 109 additions & 10 deletions libcove2/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,131 @@
)


def schema_dict_fields_generator(schema_dict):
def _resolve_ref(value, defs, registry=None):
if value["$ref"].startswith("urn:"):
subschema = registry.contents(value["$ref"].split("#")[0])
if "#/$defs/" in value["$ref"]:
name = value["$ref"].split("$defs/")[-1]
defs = subschema["$defs"]
subschema = defs[name]
return subschema
elif value["$ref"].startswith("#/$defs/"):
name = value["$ref"].split("$defs/")[-1]
return defs[name]


def _process_items(schema_dict, registry=None, defs=None):
items_schema_dicts = []
if "oneOf" in schema_dict["items"] and isinstance(
schema_dict["items"]["oneOf"], list
):
for oneOf in schema_dict["items"]["oneOf"]:
items_schema_dicts.append(oneOf)
elif "$ref" in schema_dict["items"]:
if schema_dict["items"]["$ref"].startswith("urn:"):
subschema = registry.contents(schema_dict["items"]["$ref"].split("#")[0])
if "#/$defs/" in schema_dict["items"]["$ref"]:
name = schema_dict["items"]["$ref"].split("$defs/")[-1]
defs = subschema["$defs"]
subschema = defs[name]
items_schema_dicts.append(subschema)
elif schema_dict["items"]["$ref"].startswith("#/$defs/"):
name = schema_dict["items"]["$ref"].split("$defs/")[-1]
items_schema_dicts.append(defs[name])
elif "properties" in schema_dict["items"] and isinstance(
schema_dict["items"]["properties"], dict
):
items_schema_dicts.append(schema_dict["items"])
return items_schema_dicts


def schema_dict_fields_generator(schema_dict, registry=None, defs=None):
"""
Iterate over fields in the input schema (with recursion):
Parameters:
schema_dict (dict): Current input schema or subset thereof.
registry (int, optional): Registry object from referencing package.
Contains all schema files that might be referenced.
Currently only urn: references are supported.
defs (dict, optional): Contents of "$defs" schema property.
This will usually only be used internally when function is
calling itself.
Yields:
str: Contains path of field
"""
if "$defs" in schema_dict:
defs = schema_dict["$defs"]
if "properties" in schema_dict and isinstance(schema_dict["properties"], dict):
for property_name, value in schema_dict["properties"].items():
if "oneOf" in value:
if "$ref" in value:
if value["$ref"].startswith("urn:"):
property_schema_dicts = registry.contents(
value["$ref"].split("#")[0]
)
if "$defs/" in value["$ref"]:
name = value["$ref"].split("$defs/")[-1]
property_schema_dicts = property_schema_dicts["$defs"][name]
else:
name = value["$ref"].split("$defs/")[-1]
property_schema_dicts = defs[name]
property_schema_dicts = [property_schema_dicts]
elif (
"items" in value
and isinstance(value["items"], dict)
and ("type" not in value["items"] or value["items"]["type"] == "object")
):
property_schema_dicts = _process_items(
value, registry=registry, defs=defs
)
elif "oneOf" in value:
property_schema_dicts = value["oneOf"]
else:
property_schema_dicts = [value]
for property_schema_dict in property_schema_dicts:
if not isinstance(property_schema_dict, dict):
continue
if "properties" in property_schema_dict:
for field in schema_dict_fields_generator(property_schema_dict):
for field in schema_dict_fields_generator(
property_schema_dict, registry=registry, defs=defs
):
yield f"/{property_name}{field}"
elif "items" in property_schema_dict:
for field in schema_dict_fields_generator(
property_schema_dict["items"]
property_schema_dict["items"], registry=registry, defs=defs
):
yield f"/{property_name}{field}"
elif "$ref" in property_schema_dict:
item_schema_dict = _resolve_ref(
property_schema_dict, defs, registry=registry
)
for field in schema_dict_fields_generator(
item_schema_dict, registry=registry, defs=defs
):
yield f"/{property_name}{field}"
yield f"/{property_name}"
if "items" in schema_dict and isinstance(schema_dict["items"], dict):
if "oneOf" in schema_dict["items"] and isinstance(
schema_dict["items"]["oneOf"], list
):
for oneOf in schema_dict["items"]["oneOf"]:
for field in schema_dict_fields_generator(oneOf):
if "allOf" in schema_dict and isinstance(schema_dict["allOf"], list):
for clause in schema_dict["allOf"]:
if "then" in clause and isinstance(clause["then"], dict):
for field in schema_dict_fields_generator(
clause["then"], registry=registry, defs=defs
):
yield field
if (
"items" in schema_dict
and isinstance(schema_dict["items"], dict)
and (
"type" not in schema_dict["items"]
or schema_dict["items"]["type"] == "object"
)
):
items_schema_dicts = _process_items(schema_dict, registry=registry, defs=defs)
for items_schema_dict in items_schema_dicts:
for field in schema_dict_fields_generator(
items_schema_dict, registry=registry, defs=defs
):
yield field


def get_additional_fields_info(json_data, schema_fields, fields_regex=False):
Expand Down
103 changes: 103 additions & 0 deletions tests/fixtures/bods-data-0-3-0-additional.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
[
{
"statementID": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"statementType": "entityStatement",
"isComponent": false,
"statementDate": "2017-11-18",
"entityType": "registeredEntity",
"name": "CHRINON LTD",
"foundingDate": "2010-11-18",
"identifiers": [
{
"scheme": "GB-COH",
"id": "07444723"
}
],
"publicListing": {
"hasPublicListing": true,
"companyFilingsURLs": ["http://example.com/"],
"securitiesListings": []
},
"publicationDetails": {
"publicationDate": "2018-02-13",
"bodsVersion": "0.3",
"publisher": {
"name": "CHRINON LTD"
}
},
"additional": true
},
{
"statementID": "019a93f1-e470-42e9-957b-03559861b2e2",
"statementType": "personStatement",
"isComponent": false,
"statementDate": "2017-11-18",
"personType": "knownPerson",
"nationalities": [
{
"code": "GB",
"name": "United Kingdom of Great Britain and Northern Ireland (the)"
}
],
"names": [
{
"type": "individual",
"fullName": "Christopher Taggart",
"givenName": "Christopher",
"familyName": "Taggart"
},
{
"type": "alternative",
"fullName": "Chris Taggart"
}
],
"birthDate": "1964-04",
"addresses": [
{
"type": "service",
"address": "Aston House, Cornwall Avenue, London",
"country": "GB",
"postCode": "N3 1LF"
}
],
"publicationDetails": {
"publicationDate": "2018-02-13",
"bodsVersion": "0.3",
"publisher": {
"name": "CHRINON LTD"
}
}
},
{
"statementID": "fbfd0547-d0c6-4a00-b559-5c5e91c34f5c",
"statementType": "ownershipOrControlStatement",
"isComponent": false,
"statementDate": "2017-11-18",
"subject": {
"describedByEntityStatement": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7"
},
"interestedParty": {
"describedByPersonStatement": "019a93f1-e470-42e9-957b-03559861b2e2"
},
"interests": [
{
"type": "shareholding",
"directOrIndirect": "direct",
"beneficialOwnershipOrControl": true,
"startDate": "2016-04-06",
"share": {
"exact": 100,
"minimum": 100,
"maximum": 100
}
}
],
"publicationDetails": {
"publicationDate": "2018-02-13",
"bodsVersion": "0.3",
"publisher": {
"name": "CHRINON LTD"
}
}
}
]
102 changes: 102 additions & 0 deletions tests/fixtures/bods-data-0-3-0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
[
{
"statementID": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"statementType": "entityStatement",
"isComponent": false,
"statementDate": "2017-11-18",
"entityType": "registeredEntity",
"name": "CHRINON LTD",
"foundingDate": "2010-11-18",
"identifiers": [
{
"scheme": "GB-COH",
"id": "07444723"
}
],
"publicListing": {
"hasPublicListing": true,
"companyFilingsURLs": ["http://example.com/"],
"securitiesListings": []
},
"publicationDetails": {
"publicationDate": "2018-02-13",
"bodsVersion": "0.3",
"publisher": {
"name": "CHRINON LTD"
}
}
},
{
"statementID": "019a93f1-e470-42e9-957b-03559861b2e2",
"statementType": "personStatement",
"isComponent": false,
"statementDate": "2017-11-18",
"personType": "knownPerson",
"nationalities": [
{
"code": "GB",
"name": "United Kingdom of Great Britain and Northern Ireland (the)"
}
],
"names": [
{
"type": "individual",
"fullName": "Christopher Taggart",
"givenName": "Christopher",
"familyName": "Taggart"
},
{
"type": "alternative",
"fullName": "Chris Taggart"
}
],
"birthDate": "1964-04",
"addresses": [
{
"type": "service",
"address": "Aston House, Cornwall Avenue, London",
"country": "GB",
"postCode": "N3 1LF"
}
],
"publicationDetails": {
"publicationDate": "2018-02-13",
"bodsVersion": "0.3",
"publisher": {
"name": "CHRINON LTD"
}
}
},
{
"statementID": "fbfd0547-d0c6-4a00-b559-5c5e91c34f5c",
"statementType": "ownershipOrControlStatement",
"isComponent": false,
"statementDate": "2017-11-18",
"subject": {
"describedByEntityStatement": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7"
},
"interestedParty": {
"describedByPersonStatement": "019a93f1-e470-42e9-957b-03559861b2e2"
},
"interests": [
{
"type": "shareholding",
"directOrIndirect": "direct",
"beneficialOwnershipOrControl": true,
"startDate": "2016-04-06",
"share": {
"exact": 100,
"minimum": 100,
"maximum": 100
}
}
],
"publicationDetails": {
"publicationDate": "2018-02-13",
"bodsVersion": "0.3",
"publisher": {
"name": "CHRINON LTD"
}
}
}
]
Loading

0 comments on commit feed77c

Please sign in to comment.