DARPA-ASKEM · brandomr · Sep 5, 2023 · Aug 28, 2023 · Aug 28, 2023 · Aug 28, 2023
diff --git a/README.md b/README.md
@@ -50,11 +50,16 @@ To add additional scenarios, create a new directory in `tests/scenarios`. The di
 will be specified in `enabled`. 
 
 The `.env` will be used to specify the `MOCK_TA1` setting as well as the appropriate endpoints and can be passed into the test suite with:
-
 ```
 poetry shell && export $(cat .env | xargs) && pytest -s
 ```
 
+Run `poetry run poe report`, to generate `tests/output/report.json` which contains the status of each scenario and operation.
+
+Once the report has been generated, run `poetry run streamlit run tests/Home.py` to run the web interface into the test suite, which will be available at `http://localhost:8501`.
+
+> Note: if the tests fail, `poetry poe` will exit and not generate a report. To work around this, run `pytest --json-report --json-report-file=tests/output/tests.json` then `python tests/report.py` manually.
+
 ## License
 
 [Apache License 2.0](LICENSE)
diff --git a/env.sample b/env.sample
@@ -5,4 +5,5 @@ SKEMA_RS_URL=http://skema-rs.staging.terarium.ai
 TA1_UNIFIED_URL=http://skema-unified.staging.terarium.ai
 MIT_TR_URL=http://mit-tr.staging.terarium.ai
 LOG_LEVEL=INFO
-MOCK_TA1=True
+MOCK_TA1=True
+OPENAI_API_KEY=foo
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ readme = "README.md"
 packages = [{include = "lib"}, {include = "worker"}, {include = "api"}, {include = "tests"}]
 
 [tool.poetry.dependencies]
-python = "^3.9"
+python = "^3.10"
 redis = "^4.6.0"
 rq = "^1.15.1"
 pydantic = "^2.0.2"
@@ -16,6 +16,7 @@ python-multipart = "^0.0.6"
 pandas = "^2.0.3"
 requests = "^2.31.0"
 pydantic-settings = "^2.0.3"
+streamlit = "^1.26.0"
 
 
 [tool.poetry.group.api]
@@ -37,9 +38,20 @@ poethepoet = "^0.22.0"
 requests-mock = "^1.11.0"
 pyyaml = "^6.0.1"
 pre-commit = "^3.3.3"
+pytest-json-report = "^1.5.0"
+
+
+[tool.poetry.scripts]
+gen-report = "tests.report:report"
+
+[tool.poe.tasks]
+_test = "pytest --json-report --json-report-file=tests/output/tests.json"
+_report = "gen-report"
+report = ["_test", "_report"]
 
 [tool.pytest.ini_options]
 markers = ["resource"]
+python_files = ["tests/test_e2e.py"]
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/tests/Home.py b/tests/Home.py
@@ -0,0 +1,23 @@
+import json
+from functools import reduce
+from collections import defaultdict
+
+import streamlit as st
+import pandas as pd
+
+"""
+# Integration Healthchecks
+
+## TODOs
+- [x] TA1
+- [ ] TA3
+
+"""
+
+st.sidebar.markdown("""
+# Status of Integrations
+
+This app contains information about integration
+with various TAs.
+""")
+
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -6,6 +6,7 @@
 import re
 from urllib.parse import urlparse, parse_qs, quote_plus 
 import json
+import time
 from collections import namedtuple
 from io import BytesIO
 from itertools import count
@@ -136,3 +137,13 @@ def generate(code=False):
  http_mock.put(artifact_url)
  return artifact
  return generate
+
+@pytest.mark.tryfirst
+def pytest_runtest_protocol(item, nextitem):
+ """
+ Runs between tests, add a sleep to introduce delay when
+ making external calls
+ """
+ if not settings.MOCK_TA1:
+ if nextitem: # Check if there's a next item
+ time.sleep(90) # Sleep for 60 seconds
diff --git a/tests/create_table.py b/tests/create_table.py
@@ -0,0 +1,44 @@
+import json
+
+with open("tests/output/report.json", "r") as f:
+ data_dict = json.load(f)
+
+tests = set()
+operations = set()
+
+for scenario, values in data_dict.items():
+ for operation, raw_tests in values['operations'].items():
+ operations.add(operation)
+ for test in raw_tests:
+ tests.add(test)
+
+html = ""
+
+for test in sorted(tests):
+ table = f'<h2>{test}</h2>\n'
+ table += '<table border="1">\n'
+
+ table += '<tr>\n<th>Scenarios</th>\n'
+ for operation in sorted(operations):
+ table += f'<th>{operation}</th>\n'
+ table += '</tr>\n'
+
+ for scenario in sorted(data_dict.keys()):
+ table += f'<tr>\n<td>{scenario}</td>\n'
+ for operation in sorted(operations):
+ operation_data = data_dict[scenario]['operations'].get(operation, {}).get(test, None)
+ if operation_data is not None:
+ if isinstance(operation_data, bool):
+ table += f"<td>{'✅' if operation_data else '❌'}</td>\n"
+ else:
+ table += f"<td>{operation_data}</td>\n"
+ else:
+ table += "<td>⚠️</td>\n" # Indicating not applicable/missing
+ table += '</tr>\n'
+
+ table += '</table>\n'
+
+ html += table
+
+with open("tests/output/report.html", "w") as file:
+ file.write(html)
diff --git a/tests/pages/1_TA1.py b/tests/pages/1_TA1.py
@@ -0,0 +1,91 @@
+import json
+import datetime
+import os
+import re
+from functools import reduce
+from collections import defaultdict
+
+import streamlit as st
+import pandas as pd
+
+
+def custom_title(s):
+ # List of words you want to fully capitalize
+ FULL_CAPS = ['pdf', 'amr']
+
+ words = s.replace('_', ' ').split()
+ capitalized_words = [word.upper() if word in FULL_CAPS else word.title() for word in words]
+ return ' '.join(capitalized_words)
+
+# Get a list of all report files with timestamp
+report_dir = "tests/output/"
+report_files = [f for f in os.listdir(report_dir) if re.match(r'report_\d{8}_\d{6}\.json', f)]
+report_files.sort(reverse=True) # Sort the files so the most recent is on top
+
+def format_timestamp_from_filename(filename):
+ # Extract timestamp from filename
+ match = re.search(r'report_(\d{8})_(\d{6})\.json', filename)
+ if match:
+ date_part, time_part = match.groups()
+ # Convert to datetime object
+ dt = datetime.datetime.strptime(f"{date_part}{time_part}", '%Y%m%d%H%M%S')
+ # Return formatted string
+ return dt.strftime('%Y-%m-%d %H:%M:%S')
+ return None
+
+# Create a mapping of formatted timestamp to filename
+timestamp_to_filename = {format_timestamp_from_filename(f): f for f in report_files}
+
+# Let the user select a report based on formatted timestamps
+st.title("TA1 Integration Dashboard")
+selected_timestamp = st.selectbox("Select a report", list(timestamp_to_filename.keys()))
+
+# Map back to the original file name
+selected_report = timestamp_to_filename[selected_timestamp]
+
+# Open the selected report
+with open(os.path.join(report_dir, selected_report)) as file:
+ report = json.load(file)
+
+test_results = defaultdict(lambda: defaultdict())
+
+for scenario, content in report.items():
+ for operation, tests in content["operations"].items():
+ for name, result in tests.items():
+ test_results[name][(content["name"], operation)] = result
+
+scenarios = [report[scenario]["name"] for scenario in report.keys()]
+operations = list(reduce(lambda left, right: left.union(right), [set(content["operations"].keys()) for content in report.values()], set()))
+tests = sorted([i for i in test_results.keys() if i != "Logs"], reverse=True)
+tests.append("Logs")
+
+
+dataframes = {name: pd.DataFrame(index=scenarios, columns=operations) for name in tests}
+
+st.sidebar.markdown("""
+# TA1
+
+TA1 integration status and quality metrics.
+
+The current metrics are:
+- Status of `knowledge-middleware` integration
+- F-score for conversion of code/equations to AMR
+- Execution time
+- Application logs
+""")
+st.write("### Scenario Overview")
+scenarios_overview = ""
+for kk, vv in sorted(report.items(), key=lambda item: item[1]['name']):
+ scenarios_overview += f"- **{vv['name']}**: {vv['description']}\n"
+st.write(scenarios_overview)
+
+for test in tests:
+ df = dataframes[test]
+ results = test_results[test]
+ for (scenario_name, operation), result in results.items():
+ df.at[scenario_name, operation] = result
+ st.write(f"### {test}")
+ df.replace({False: "❌", True: "✅", None: ""}, inplace=True)
+ df.columns = [custom_title(col) for col in df.columns]
+ df = df.sort_index()
+ df
diff --git a/tests/report.py b/tests/report.py
@@ -0,0 +1,50 @@
+import json
+import csv
+import datetime
+import os
+import re
+from collections import defaultdict
+
+import yaml
+
+def report():
+ # TODO: Make this into a predefined struct
+ report = defaultdict(lambda: {"operations": defaultdict(dict)}) 
+ if os.path.exists("tests/output/qual.csv"):
+ with open("tests/output/qual.csv", "r", newline="") as file:
+ qual = csv.reader(file)
+ for scenario, operation, test, result in qual:
+ report[scenario]["operations"][operation][test] = result
+
+ with open("tests/output/tests.json", "r") as file:
+ raw_tests = json.load(file)["tests"]
+ def add_case(testobj):
+ full_name = testobj["nodeid"].split("::")[-1]
+ # Don't worry we're not actually checking if brackets match
+ pattern = r"test_([a-z0-9_]+)\[([a-z0-9_]+)\]"
+ match_result = re.match(pattern, full_name, re.IGNORECASE)
+ operation, scenario = match_result[1], match_result[2]
+ passed = testobj["outcome"] == "passed"
+ duration = round(testobj["call"]["duration"],2)
+ report[scenario]["operations"][operation]["Integration Status"] = passed
+ report[scenario]["operations"][operation]["Execution Time"] = duration
+ try:
+ logs = testobj["call"]["stderr"]
+ report[scenario]["operations"][operation]["Logs"] = logs
+ except Exception as e:
+ print(f"Unable to obtain logs for {full_name}: {e}")
+ for testobj in raw_tests: add_case(testobj)
+
+ for scenario in report:
+ with open(f"tests/scenarios/{scenario}/config.yaml") as file:
+ spec = yaml.load(file, yaml.CLoader)
+ report[scenario]["name"] = spec["name"]
+ report[scenario]["description"] = spec["description"]
+
+ timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
+ filename = f"tests/output/report_{timestamp}.json"
+ with open(filename, "w") as file:
+ json.dump(report, file, indent=2)
+
+if __name__ == "__main__":
+ report()
diff --git a/tests/scenarios/12_month_eval_scenario_1/code.py b/tests/scenarios/12_month_eval_scenario_1/code.py
@@ -0,0 +1,10 @@
+def SEIRHD_Model(y, t, N, beta, r_I_to_R, r_I_to_H, r_E_to_I, r_H_to_R, r_H_to_D, p_I_to_H, p_I_to_R, p_H_to_D, p_H_to_R):
+ S, E, I, R, H, D = y
+
+ dSdt = -beta * I * S / N
+ dEdt = beta* I * S / N - r_E_to_I * E
+ dIdt = r_E_to_I * E - (r_I_to_H * p_I_to_H) * I - (r_I_to_R * p_I_to_R * I)
+ dRdt = (r_I_to_R * p_I_to_R * I) + (r_H_to_R * p_H_to_R * H)
+ dHdt = (r_I_to_H * p_I_to_H * I) - (r_H_to_D * p_H_to_D * H) - (r_H_to_R * p_H_to_R * H)
+ dDdt = r_H_to_D * p_H_to_D * H
+ return dSdt, dEdt, dIdt, dRdt, dHdt, dDdt
diff --git a/tests/scenarios/12_month_eval_scenario_1/config.yaml b/tests/scenarios/12_month_eval_scenario_1/config.yaml
@@ -0,0 +1,5 @@
+---
+name: "12 Month Eval Secnario #1"
+description: "12 month evaluation scenario 1: SEIRHD model"
+enabled:
+ - code_to_amr