diff --git a/tests/scenarios/basic/amr.json b/tests/scenarios/basic/amr.json
index 124f811..d696edc 100644
--- a/tests/scenarios/basic/amr.json
+++ b/tests/scenarios/basic/amr.json
@@ -1,90 +1,638 @@
{
- "name":"mathml model",
- "schema":"https://github.com/DARPA-ASKEM/Model-Representations/blob/main/petrinet/petrinet_schema.json",
- "schema_name":"PetriNet",
- "description":"This is a model from mathml equations",
- "model_version":"0.1",
- "model":{
- "states":[
+ "header": {
+ "name": "Test SIR Model",
+ "schema": "https://raw.githubusercontent.com/DARPA-ASKEM/Model-Representations/petrinet_v0.6/petrinet/petrinet_schema.json",
+ "description": "Test SIR model",
+ "schema_name": "petrinet",
+ "model_version": "0.1"
+ },
+ "model": {
+ "states": [
+ {
+ "id": "S",
+ "name": "Susceptible",
+ "description": "Number of individuals that are 'susceptible' to a disease infection",
+ "grounding": {
+ "identifiers": {
+ "ido": "0000514"
+ }
+ },
+ "units": {
+ "expression": "person",
+ "expression_mathml": "person"
+ }
+ },
+ {
+ "id": "I",
+ "name": "Infected",
+ "description": "Number of individuals that are 'infected' by a disease",
+ "grounding": {
+ "identifiers": {
+ "ido": "0000511"
+ }
+ },
+ "units": {
+ "expression": "person",
+ "expression_mathml": "person"
+ }
+ },
+ {
+ "id": "R",
+ "name": "Recovered",
+ "description": "Number of individuals that have 'recovered' from a disease infection",
+ "grounding": {
+ "identifiers": {
+ "ido": "0000592"
+ }
+ },
+ "units": {
+ "expression": "person",
+ "expression_mathml": "person"
+ }
+ }
+ ],
+ "transitions": [
+ {
+ "id": "inf",
+ "input": ["S", "I"],
+ "output": ["I", "I"],
+ "properties": {
+ "name": "Infection",
+ "description": "Infective process between individuals"
+ }
+ },
+ {
+ "id": "rec",
+ "input": ["I"],
+ "output": ["R"],
+ "properties": {
+ "name": "Recovery",
+ "description": "Recovery process of a infected individual"
+ }
+ }
+ ]
+ },
+ "semantics": {
+ "ode": {
+ "rates": [
+ {
+ "target": "inf",
+ "expression": "S*I*beta",
+ "expression_mathml": "SIbeta"
+ },
+ {
+ "target": "rec",
+ "expression": "I*gamma",
+ "expression_mathml": "Igamma"
+ }
+ ],
+ "initials": [
+ {
+ "target": "S",
+ "expression": "S0",
+ "expression_mathml": "S0"
+ },
{
- "id":"i",
- "name":"i"
+ "target": "I",
+ "expression": "I0",
+ "expression_mathml": "I0"
},
{
- "id":"r",
- "name":"r"
+ "target": "R",
+ "expression": "R0",
+ "expression_mathml": "R0"
+ }
+ ],
+ "parameters": [
+ {
+ "id": "beta",
+ "name": "β",
+ "description": "infection rate",
+ "units": {
+ "expression": "1/(person*day)",
+ "expression_mathml": "1personday"
+ },
+ "value": 2.7e-7,
+ "distribution": {
+ "type": "Uniform1",
+ "parameters": {
+ "minimum": 2.6e-7,
+ "maximum": 2.8e-7
+ }
+ }
+ },
+ {
+ "id": "gamma",
+ "name": "γ",
+ "description": "recovery rate",
+ "grounding": {
+ "identifiers": {
+ "askemo": "0000013"
+ }
+ },
+ "units": {
+ "expression": "1/day",
+ "expression_mathml": "1day"
+ },
+ "value": 0.14,
+ "distribution": {
+ "type": "Uniform1",
+ "parameters": {
+ "minimum": 0.1,
+ "maximum": 0.18
+ }
+ }
+ },
+ {
+ "id": "S0",
+ "name": "S₀",
+ "description": "Total susceptible population at timestep 0",
+ "value": 1000
+ },
+ {
+ "id": "I0",
+ "name": "I₀",
+ "description": "Total infected population at timestep 0",
+ "value": 1
},
{
- "id":"s",
- "name":"s"
+ "id": "R0",
+ "name": "R₀",
+ "description": "Total recovered population at timestep 0",
+ "value": 0
}
- ],
- "transitions":[
+ ],
+ "observables": [
{
- "id":"t0",
- "input":[
- "i"
- ],
- "output":[
- "r"
- ],
- "grounding":null
- }
- ]
+ "id": "noninf",
+ "name": "Non-infectious",
+ "states": ["S", "R"],
+ "expression": "S+R",
+ "expression_mathml": "SR"
+ }
+ ],
+ "time": {
+ "id": "t",
+ "units": {
+ "expression": "day",
+ "expression_mathml": "day"
+ }
+ }
+ }
},
- "semantics":{
- "ode":{
- "rates":[
- {
- "target":"t0",
- "expression":"gamma*i*",
- "expression_mathml":"igamma"
- }
- ],
- "initials":[
- {
- "target":"s",
- "expression":"s0",
- "expression_mathml":""
- },
- {
- "target":"i",
- "expression":"i0",
- "expression_mathml":""
- },
- {
- "target":"r",
- "expression":"r0",
- "expression_mathml":""
- }
- ],
- "parameters":[
- {
- "id":"beta",
- "name":"beta",
- "description":"beta rate"
- },
- {
- "id":"gamma",
- "name":"gamma",
- "description":"gamma rate"
- },
- {
- "id":"i0",
- "name":"i0",
- "description":"The total i population at timestep 0"
- },
- {
- "id":"r0",
- "name":"r0",
- "description":"The total r population at timestep 0"
- },
- {
- "id":"s0",
- "name":"s0",
- "description":"The total s population at timestep 0"
- }
- ]
- }
+ "metadata": {
+ "attributes": [
+ {
+ "type": "anchored_extraction",
+ "payload": {
+ "id": {
+ "id": "R:190348269"
+ },
+ "names": [
+ {
+ "id": {
+ "id": "T:-1709799622"
+ },
+ "name": "Bucky",
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 738,
+ "char_end": 743,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.974474"
+ }
+ }
+ ],
+ "descriptions": [
+ {
+ "id": {
+ "id": "T:-486841659"
+ },
+ "source": "time",
+ "grounding": [
+ {
+ "grounding_text": "time since time scale zero",
+ "grounding_id": "apollosv:00000272",
+ "source": [],
+ "score": 0.8945620059967041,
+ "provenance": {
+ "method": "SKEMA-TR-Embedding",
+ "timestamp": "2023-06-15T22:59:11.974644"
+ }
+ }
+ ],
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 732,
+ "char_end": 736,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.974474"
+ }
+ }
+ ],
+ "value_specs": [],
+ "groundings": []
+ }
+ },
+ {
+ "type": "anchored_extraction",
+ "payload": {
+ "id": {
+ "id": "R:159895595"
+ },
+ "names": [
+ {
+ "id": {
+ "id": "T:2131207786"
+ },
+ "name": "SEIR",
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 56,
+ "char_end": 60,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.974780"
+ }
+ }
+ ],
+ "descriptions": [
+ {
+ "id": {
+ "id": "T:-1520869470"
+ },
+ "source": "spatially distributed",
+ "grounding": [],
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 34,
+ "char_end": 55,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.974780"
+ }
+ }
+ ],
+ "value_specs": [],
+ "groundings": []
+ }
+ },
+ {
+ "type": "anchored_extraction",
+ "payload": {
+ "id": {
+ "id": "E:-337831219"
+ },
+ "names": [
+ {
+ "id": {
+ "id": "T:1326919589"
+ },
+ "name": "S",
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 562,
+ "char_end": 563,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.974931"
+ }
+ }
+ ],
+ "descriptions": [
+ {
+ "id": {
+ "id": "T:1687413640"
+ },
+ "source": "fraction of the population",
+ "grounding": [
+ {
+ "grounding_text": "count of simulated population",
+ "grounding_id": "apollosv:00000022",
+ "source": [],
+ "score": 0.8330355286598206,
+ "provenance": {
+ "method": "SKEMA-TR-Embedding",
+ "timestamp": "2023-06-15T22:59:11.975009"
+ }
+ }
+ ],
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 570,
+ "char_end": 596,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.974931"
+ }
+ }
+ ],
+ "value_specs": [],
+ "groundings": [
+ {
+ "grounding_text": "Meruvax I",
+ "grounding_id": "vo:0003109",
+ "source": [],
+ "score": 0.7847759127616882,
+ "provenance": {
+ "method": "SKEMA-TR-Embedding",
+ "timestamp": "2023-06-15T22:59:11.974960"
+ }
+ }
+ ]
+ }
+ },
+ {
+ "type": "anchored_extraction",
+ "payload": {
+ "id": {
+ "id": "E:-1921441554"
+ },
+ "names": [
+ {
+ "id": {
+ "id": "T:-24678027"
+ },
+ "name": "asym frac",
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 142,
+ "char_end": 151,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.975127"
+ }
+ },
+ {
+ "id": {
+ "id": "v10"
+ },
+ "name": "\u03b1",
+ "extraction_source": null,
+ "provenance": {
+ "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)",
+ "timestamp": "2023-06-15T22:59:13.177022"
+ }
+ }
+ ],
+ "descriptions": [
+ {
+ "id": {
+ "id": "T:1244663286"
+ },
+ "source": "percentage of infections",
+ "grounding": [
+ {
+ "grounding_text": "percentage of cases",
+ "grounding_id": "cemo:percentage_of_cases",
+ "source": [],
+ "score": 0.8812347650527954,
+ "provenance": {
+ "method": "SKEMA-TR-Embedding",
+ "timestamp": "2023-06-15T22:59:11.975201"
+ }
+ }
+ ],
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 94,
+ "char_end": 118,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.975127"
+ }
+ },
+ {
+ "id": {
+ "id": "v10"
+ },
+ "source": " Rate of infections that are asymptomatic",
+ "grounding": null,
+ "extraction_source": null,
+ "provenance": {
+ "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)",
+ "timestamp": "2023-06-15T22:59:13.177022"
+ }
+ }
+ ],
+ "value_specs": [],
+ "groundings": [
+ {
+ "grounding_text": "Van",
+ "grounding_id": "geonames:298117",
+ "source": [],
+ "score": 1.0,
+ "provenance": {
+ "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)",
+ "timestamp": "2023-06-15T22:59:13.177022"
+ }
+ },
+ {
+ "grounding_text": "Sanaa",
+ "grounding_id": "geonames:71137",
+ "source": [],
+ "score": 1.0,
+ "provenance": {
+ "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)",
+ "timestamp": "2023-06-15T22:59:13.177022"
+ }
+ }
+ ]
+ }
+ },
+ {
+ "type": "anchored_extraction",
+ "payload": {
+ "id": {
+ "id": "E:392549189"
+ },
+ "names": [
+ {
+ "id": {
+ "id": "T:-24678027"
+ },
+ "name": "asym frac",
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 142,
+ "char_end": 151,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.975270"
+ }
+ },
+ {
+ "id": {
+ "id": "v18"
+ },
+ "name": "asym_frac",
+ "extraction_source": null,
+ "provenance": {
+ "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)",
+ "timestamp": "2023-06-15T22:59:13.177022"
+ }
+ }
+ ],
+ "descriptions": [
+ {
+ "id": {
+ "id": "T:1244663286"
+ },
+ "source": "percentage of infections",
+ "grounding": [
+ {
+ "grounding_text": "percentage of cases",
+ "grounding_id": "cemo:percentage_of_cases",
+ "source": [],
+ "score": 0.8812347650527954,
+ "provenance": {
+ "method": "SKEMA-TR-Embedding",
+ "timestamp": "2023-06-15T22:59:11.975340"
+ }
+ }
+ ],
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 94,
+ "char_end": 118,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.975270"
+ }
+ },
+ {
+ "id": {
+ "id": "v18"
+ },
+ "source": " Fraction of infections that are asymptomatic",
+ "grounding": null,
+ "extraction_source": null,
+ "provenance": {
+ "method": "MIT extractor V1.0 - text, dataset, formula annotation (chunwei@mit.edu)",
+ "timestamp": "2023-06-15T22:59:13.177022"
+ }
+ }
+ ],
+ "value_specs": [],
+ "groundings": []
+ }
+ },
+ {
+ "type": "anchored_extraction",
+ "payload": {
+ "id": {
+ "id": "E:-1790112729"
+ },
+ "names": [
+ {
+ "id": {
+ "id": "T:-24678027"
+ },
+ "name": "asym frac",
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 142,
+ "char_end": 151,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.975409"
+ }
+ }
+ ],
+ "descriptions": [
+ {
+ "id": {
+ "id": "T:1244663286"
+ },
+ "source": "percentage of infections",
+ "grounding": [
+ {
+ "grounding_text": "percentage of cases",
+ "grounding_id": "cemo:percentage_of_cases",
+ "source": [],
+ "score": 0.8812347650527954,
+ "provenance": {
+ "method": "SKEMA-TR-Embedding",
+ "timestamp": "2023-06-15T22:59:11.975479"
+ }
+ }
+ ],
+ "extraction_source": {
+ "page": 0,
+ "block": 0,
+ "char_start": 94,
+ "char_end": 118,
+ "document_reference": {
+ "id": "buckymodel_webdocs.pdf"
+ }
+ },
+ "provenance": {
+ "method": "Skema TR Pipeline rules",
+ "timestamp": "2023-06-15T22:59:11.975409"
+ }
+ }
+ ],
+ "value_specs": [],
+ "groundings": []
+ }
+ }
+ ]
}
}
\ No newline at end of file
diff --git a/tests/scenarios/basic/config.yaml b/tests/scenarios/basic/config.yaml
index 9761661..a2119cf 100644
--- a/tests/scenarios/basic/config.yaml
+++ b/tests/scenarios/basic/config.yaml
@@ -1,4 +1,6 @@
---
+name: "Fake Scenario"
+description: "A fake scenario to test if basic integration is working."
enabled:
- pdf_extraction
- pdf_to_text
diff --git a/tests/test_e2e.py b/tests/test_e2e.py
index 2a37aac..e78f1fb 100644
--- a/tests/test_e2e.py
+++ b/tests/test_e2e.py
@@ -4,9 +4,10 @@
import pytest
import logging
+from rq.job import Job
from lib.settings import settings
-from tests.test_utils import get_parameterizations
+from tests.utils import get_parameterizations, AMR
logger = logging.getLogger(__name__)
@@ -121,11 +122,17 @@ def test_code_to_amr(context_dir, http_mock, client, worker, gen_tds_artifact, f
worker.work(burst=True)
status_response = client.get(f"/status/{job_id}")
+ job = Job.fetch(job_id, connection=worker.connection)
+ amr_instance = AMR(job.result["amr"])
+
#### ASSERT ####
assert results.get("status") == "queued"
assert status_response.status_code == 200
assert status_response.json().get("status") == "finished"
+ assert (
+ amr_instance.is_valid()
+ ), f"AMR failed to validate to its provided schema: {amr_instance.validation_error}"
@pytest.mark.parametrize("resource", params["equations_to_amr"])
def test_equations_to_amr(context_dir, http_mock, client, worker, file_storage):
@@ -157,12 +164,19 @@ def test_equations_to_amr(context_dir, http_mock, client, worker, file_storage):
job_id = results.get("id")
worker.work(burst=True)
status_response = client.get(f"/status/{job_id}")
+
+ job = Job.fetch(job_id, connection=worker.connection)
+ amr_instance = AMR(job.result["amr"])
#### ASSERT ####
assert results.get("status") == "queued"
assert status_response.status_code == 200
assert status_response.json().get("status") == "finished"
+ assert (
+ amr_instance.is_valid()
+ ), f"AMR failed to validate to its provided schema: {amr_instance.validation_error}"
+
@pytest.mark.parametrize("resource", params["profile_dataset"])
def test_profile_dataset(context_dir, http_mock, client, worker, gen_tds_artifact, file_storage):
diff --git a/tests/test_utils.py b/tests/utils.py
similarity index 54%
rename from tests/test_utils.py
rename to tests/utils.py
index 7c087a4..1ac7cb4 100644
--- a/tests/test_utils.py
+++ b/tests/utils.py
@@ -24,47 +24,27 @@ def get_parameterizations():
class AMR:
def __init__(self, json_data):
self.json_data = json_data
- self.schema_url = self._transform_url(self.json_data.get("schema", None))
- self.schema = self._fetch_schema() # Fetch the schema during initialization
- self.validation_error = None # Store the validation error if any
-
- def _transform_url(self, url):
- """Transforms a GitHub URL into its raw format."""
- if not url:
- return None
-
- if "raw.githubusercontent.com" in url:
- return url
-
- return url.replace("github.com", "raw.githubusercontent.com").replace(
- "/blob", ""
- )
-
- def _fetch_schema(self):
- """Private method to fetch the JSON schema from the specified URL."""
- if not self.schema_url:
+ self.header = json_data["header"]
+ try:
+ self.schema_url = self.header["schema"]
+ except KeyError:
raise ValueError("No schema URL specified in the input JSON.")
-
+ if "raw.githubusercontent.com" not in self.schema_url:
+ self.schema_url = self.schema_url.replace("github.com", "raw.githubusercontent.com").replace(
+ "/blob", ""
+ )
response = requests.get(self.schema_url)
response.raise_for_status()
- return response.json()
+ self.schema = response.json()
+ self.validation_error = None
+
def is_valid(self):
"""Validates the original JSON against the fetched JSON schema."""
- if not self.schema:
- raise ValueError(
- "Schema is not available. Fetching might have failed during initialization."
- )
-
try:
validate(instance=self.json_data, schema=self.schema)
- return True
except ValidationError as e:
- self.validation_error = e
+ self.validation_error = str(e)
return False
-
- def get_validation_error(self):
- """Retrieve the validation error message."""
- if not self.validation_error:
- return None
- return str(self.validation_error)
+ else:
+ return True