diff --git a/tests/scenarios/basic/amr.json b/tests/scenarios/basic/amr.json index 124f811..d696edc 100644 --- a/tests/scenarios/basic/amr.json +++ b/tests/scenarios/basic/amr.json @@ -1,90 +1,638 @@ { - "name":"mathml model", - "schema":"https://github.com/DARPA-ASKEM/Model-Representations/blob/main/petrinet/petrinet_schema.json", - "schema_name":"PetriNet", - "description":"This is a model from mathml equations", - "model_version":"0.1", - "model":{ - "states":[ + "header": { + "name": "Test SIR Model", + "schema": "https://raw.githubusercontent.com/DARPA-ASKEM/Model-Representations/petrinet_v0.6/petrinet/petrinet_schema.json", + "description": "Test SIR model", + "schema_name": "petrinet", + "model_version": "0.1" + }, + "model": { + "states": [ + { + "id": "S", + "name": "Susceptible", + "description": "Number of individuals that are 'susceptible' to a disease infection", + "grounding": { + "identifiers": { + "ido": "0000514" + } + }, + "units": { + "expression": "person", + "expression_mathml": "person" + } + }, + { + "id": "I", + "name": "Infected", + "description": "Number of individuals that are 'infected' by a disease", + "grounding": { + "identifiers": { + "ido": "0000511" + } + }, + "units": { + "expression": "person", + "expression_mathml": "person" + } + }, + { + "id": "R", + "name": "Recovered", + "description": "Number of individuals that have 'recovered' from a disease infection", + "grounding": { + "identifiers": { + "ido": "0000592" + } + }, + "units": { + "expression": "person", + "expression_mathml": "person" + } + } + ], + "transitions": [ + { + "id": "inf", + "input": ["S", "I"], + "output": ["I", "I"], + "properties": { + "name": "Infection", + "description": "Infective process between individuals" + } + }, + { + "id": "rec", + "input": ["I"], + "output": ["R"], + "properties": { + "name": "Recovery", + "description": "Recovery process of a infected individual" + } + } + ] + }, + "semantics": { + "ode": { + "rates": [ + { + "target": "inf", + "expression": "S*I*beta", + "expression_mathml": "SIbeta" + }, + { + "target": "rec", + "expression": "I*gamma", + "expression_mathml": "Igamma" + } + ], + "initials": [ + { + "target": "S", + "expression": "S0", + "expression_mathml": "S0" + }, { - "id":"i", - "name":"i" + "target": "I", + "expression": "I0", + "expression_mathml": "I0" }, { - "id":"r", - "name":"r" + "target": "R", + "expression": "R0", + "expression_mathml": "R0" + } + ], + "parameters": [ + { + "id": "beta", + "name": "β", + "description": "infection rate", + "units": { + "expression": "1/(person*day)", + "expression_mathml": "1personday" + }, + "value": 2.7e-7, + "distribution": { + "type": "Uniform1", + "parameters": { + "minimum": 2.6e-7, + "maximum": 2.8e-7 + } + } + }, + { + "id": "gamma", + "name": "γ", + "description": "recovery rate", + "grounding": { + "identifiers": { + "askemo": "0000013" + } + }, + "units": { + "expression": "1/day", + "expression_mathml": "1day" + }, + "value": 0.14, + "distribution": { + "type": "Uniform1", + "parameters": { + "minimum": 0.1, + "maximum": 0.18 + } + } + }, + { + "id": "S0", + "name": "S₀", + "description": "Total susceptible population at timestep 0", + "value": 1000 + }, + { + "id": "I0", + "name": "I₀", + "description": "Total infected population at timestep 0", + "value": 1 }, { - "id":"s", - "name":"s" + "id": "R0", + "name": "R₀", + "description": "Total recovered population at timestep 0", + "value": 0 } - ], - "transitions":[ + ], + "observables": [ { - "id":"t0", - "input":[ - "i" - ], - "output":[ - "r" - ], - "grounding":null - } - ] + "id": "noninf", + "name": "Non-infectious", + "states": ["S", "R"], + "expression": "S+R", + "expression_mathml": "SR" + } + ], + "time": { + "id": "t", + "units": { + "expression": "day", + "expression_mathml": "day" + } + } + } }, - "semantics":{ - "ode":{ - "rates":[ - { - "target":"t0", - "expression":"gamma*i*", - "expression_mathml":"igamma" - } - ], - "initials":[ - { - "target":"s", - "expression":"s0", - "expression_mathml":"" - }, - { - "target":"i", - "expression":"i0", - "expression_mathml":"" - }, - { - "target":"r", - "expression":"r0", - "expression_mathml":"" - } - ], - "parameters":[ - { - "id":"beta", - "name":"beta", - "description":"beta rate" - }, - { - "id":"gamma", - "name":"gamma", - "description":"gamma rate" - }, - { - "id":"i0", - "name":"i0", - "description":"The total i population at timestep 0" - }, - { - "id":"r0", - "name":"r0", - "description":"The total r population at timestep 0" - }, - { - "id":"s0", - "name":"s0", - "description":"The total s population at timestep 0" - } - ] - } + "metadata": { + "attributes": [ + { + "type": "anchored_extraction", + "payload": { + "id": { + "id": "R:190348269" + }, + "names": [ + { + "id": { + "id": "T:-1709799622" + }, + "name": "Bucky", + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 738, + "char_end": 743, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.974474" + } + } + ], + "descriptions": [ + { + "id": { + "id": "T:-486841659" + }, + "source": "time", + "grounding": [ + { + "grounding_text": "time since time scale zero", + "grounding_id": "apollosv:00000272", + "source": [], + "score": 0.8945620059967041, + "provenance": { + "method": "SKEMA-TR-Embedding", + "timestamp": "2023-06-15T22:59:11.974644" + } + } + ], + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 732, + "char_end": 736, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.974474" + } + } + ], + "value_specs": [], + "groundings": [] + } + }, + { + "type": "anchored_extraction", + "payload": { + "id": { + "id": "R:159895595" + }, + "names": [ + { + "id": { + "id": "T:2131207786" + }, + "name": "SEIR", + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 56, + "char_end": 60, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.974780" + } + } + ], + "descriptions": [ + { + "id": { + "id": "T:-1520869470" + }, + "source": "spatially distributed", + "grounding": [], + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 34, + "char_end": 55, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.974780" + } + } + ], + "value_specs": [], + "groundings": [] + } + }, + { + "type": "anchored_extraction", + "payload": { + "id": { + "id": "E:-337831219" + }, + "names": [ + { + "id": { + "id": "T:1326919589" + }, + "name": "S", + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 562, + "char_end": 563, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.974931" + } + } + ], + "descriptions": [ + { + "id": { + "id": "T:1687413640" + }, + "source": "fraction of the population", + "grounding": [ + { + "grounding_text": "count of simulated population", + "grounding_id": "apollosv:00000022", + "source": [], + "score": 0.8330355286598206, + "provenance": { + "method": "SKEMA-TR-Embedding", + "timestamp": "2023-06-15T22:59:11.975009" + } + } + ], + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 570, + "char_end": 596, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.974931" + } + } + ], + "value_specs": [], + "groundings": [ + { + "grounding_text": "Meruvax I", + "grounding_id": "vo:0003109", + "source": [], + "score": 0.7847759127616882, + "provenance": { + "method": "SKEMA-TR-Embedding", + "timestamp": "2023-06-15T22:59:11.974960" + } + } + ] + } + }, + { + "type": "anchored_extraction", + "payload": { + "id": { + "id": "E:-1921441554" + }, + "names": [ + { + "id": { + "id": "T:-24678027" + }, + "name": "asym frac", + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 142, + "char_end": 151, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.975127" + } + }, + { + "id": { + "id": "v10" + }, + "name": "\u03b1", + "extraction_source": null, + "provenance": { + "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)", + "timestamp": "2023-06-15T22:59:13.177022" + } + } + ], + "descriptions": [ + { + "id": { + "id": "T:1244663286" + }, + "source": "percentage of infections", + "grounding": [ + { + "grounding_text": "percentage of cases", + "grounding_id": "cemo:percentage_of_cases", + "source": [], + "score": 0.8812347650527954, + "provenance": { + "method": "SKEMA-TR-Embedding", + "timestamp": "2023-06-15T22:59:11.975201" + } + } + ], + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 94, + "char_end": 118, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.975127" + } + }, + { + "id": { + "id": "v10" + }, + "source": " Rate of infections that are asymptomatic", + "grounding": null, + "extraction_source": null, + "provenance": { + "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)", + "timestamp": "2023-06-15T22:59:13.177022" + } + } + ], + "value_specs": [], + "groundings": [ + { + "grounding_text": "Van", + "grounding_id": "geonames:298117", + "source": [], + "score": 1.0, + "provenance": { + "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)", + "timestamp": "2023-06-15T22:59:13.177022" + } + }, + { + "grounding_text": "Sanaa", + "grounding_id": "geonames:71137", + "source": [], + "score": 1.0, + "provenance": { + "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)", + "timestamp": "2023-06-15T22:59:13.177022" + } + } + ] + } + }, + { + "type": "anchored_extraction", + "payload": { + "id": { + "id": "E:392549189" + }, + "names": [ + { + "id": { + "id": "T:-24678027" + }, + "name": "asym frac", + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 142, + "char_end": 151, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.975270" + } + }, + { + "id": { + "id": "v18" + }, + "name": "asym_frac", + "extraction_source": null, + "provenance": { + "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)", + "timestamp": "2023-06-15T22:59:13.177022" + } + } + ], + "descriptions": [ + { + "id": { + "id": "T:1244663286" + }, + "source": "percentage of infections", + "grounding": [ + { + "grounding_text": "percentage of cases", + "grounding_id": "cemo:percentage_of_cases", + "source": [], + "score": 0.8812347650527954, + "provenance": { + "method": "SKEMA-TR-Embedding", + "timestamp": "2023-06-15T22:59:11.975340" + } + } + ], + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 94, + "char_end": 118, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.975270" + } + }, + { + "id": { + "id": "v18" + }, + "source": " Fraction of infections that are asymptomatic", + "grounding": null, + "extraction_source": null, + "provenance": { + "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)", + "timestamp": "2023-06-15T22:59:13.177022" + } + } + ], + "value_specs": [], + "groundings": [] + } + }, + { + "type": "anchored_extraction", + "payload": { + "id": { + "id": "E:-1790112729" + }, + "names": [ + { + "id": { + "id": "T:-24678027" + }, + "name": "asym frac", + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 142, + "char_end": 151, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.975409" + } + } + ], + "descriptions": [ + { + "id": { + "id": "T:1244663286" + }, + "source": "percentage of infections", + "grounding": [ + { + "grounding_text": "percentage of cases", + "grounding_id": "cemo:percentage_of_cases", + "source": [], + "score": 0.8812347650527954, + "provenance": { + "method": "SKEMA-TR-Embedding", + "timestamp": "2023-06-15T22:59:11.975479" + } + } + ], + "extraction_source": { + "page": 0, + "block": 0, + "char_start": 94, + "char_end": 118, + "document_reference": { + "id": "buckymodel_webdocs.pdf" + } + }, + "provenance": { + "method": "Skema TR Pipeline rules", + "timestamp": "2023-06-15T22:59:11.975409" + } + } + ], + "value_specs": [], + "groundings": [] + } + } + ] } } \ No newline at end of file diff --git a/tests/scenarios/basic/config.yaml b/tests/scenarios/basic/config.yaml index 9761661..a2119cf 100644 --- a/tests/scenarios/basic/config.yaml +++ b/tests/scenarios/basic/config.yaml @@ -1,4 +1,6 @@ --- +name: "Fake Scenario" +description: "A fake scenario to test if basic integration is working." enabled: - pdf_extraction - pdf_to_text diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 2a37aac..e78f1fb 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -4,9 +4,10 @@ import pytest import logging +from rq.job import Job from lib.settings import settings -from tests.test_utils import get_parameterizations +from tests.utils import get_parameterizations, AMR logger = logging.getLogger(__name__) @@ -121,11 +122,17 @@ def test_code_to_amr(context_dir, http_mock, client, worker, gen_tds_artifact, f worker.work(burst=True) status_response = client.get(f"/status/{job_id}") + job = Job.fetch(job_id, connection=worker.connection) + amr_instance = AMR(job.result["amr"]) + #### ASSERT #### assert results.get("status") == "queued" assert status_response.status_code == 200 assert status_response.json().get("status") == "finished" + assert ( + amr_instance.is_valid() + ), f"AMR failed to validate to its provided schema: {amr_instance.validation_error}" @pytest.mark.parametrize("resource", params["equations_to_amr"]) def test_equations_to_amr(context_dir, http_mock, client, worker, file_storage): @@ -157,12 +164,19 @@ def test_equations_to_amr(context_dir, http_mock, client, worker, file_storage): job_id = results.get("id") worker.work(burst=True) status_response = client.get(f"/status/{job_id}") + + job = Job.fetch(job_id, connection=worker.connection) + amr_instance = AMR(job.result["amr"]) #### ASSERT #### assert results.get("status") == "queued" assert status_response.status_code == 200 assert status_response.json().get("status") == "finished" + assert ( + amr_instance.is_valid() + ), f"AMR failed to validate to its provided schema: {amr_instance.validation_error}" + @pytest.mark.parametrize("resource", params["profile_dataset"]) def test_profile_dataset(context_dir, http_mock, client, worker, gen_tds_artifact, file_storage): diff --git a/tests/test_utils.py b/tests/utils.py similarity index 54% rename from tests/test_utils.py rename to tests/utils.py index 7c087a4..1ac7cb4 100644 --- a/tests/test_utils.py +++ b/tests/utils.py @@ -24,47 +24,27 @@ def get_parameterizations(): class AMR: def __init__(self, json_data): self.json_data = json_data - self.schema_url = self._transform_url(self.json_data.get("schema", None)) - self.schema = self._fetch_schema() # Fetch the schema during initialization - self.validation_error = None # Store the validation error if any - - def _transform_url(self, url): - """Transforms a GitHub URL into its raw format.""" - if not url: - return None - - if "raw.githubusercontent.com" in url: - return url - - return url.replace("github.com", "raw.githubusercontent.com").replace( - "/blob", "" - ) - - def _fetch_schema(self): - """Private method to fetch the JSON schema from the specified URL.""" - if not self.schema_url: + self.header = json_data["header"] + try: + self.schema_url = self.header["schema"] + except KeyError: raise ValueError("No schema URL specified in the input JSON.") - + if "raw.githubusercontent.com" not in self.schema_url: + self.schema_url = self.schema_url.replace("github.com", "raw.githubusercontent.com").replace( + "/blob", "" + ) response = requests.get(self.schema_url) response.raise_for_status() - return response.json() + self.schema = response.json() + self.validation_error = None + def is_valid(self): """Validates the original JSON against the fetched JSON schema.""" - if not self.schema: - raise ValueError( - "Schema is not available. Fetching might have failed during initialization." - ) - try: validate(instance=self.json_data, schema=self.schema) - return True except ValidationError as e: - self.validation_error = e + self.validation_error = str(e) return False - - def get_validation_error(self): - """Retrieve the validation error message.""" - if not self.validation_error: - return None - return str(self.validation_error) + else: + return True