diff --git a/.github/workflows/test_with_docker.yml b/.github/workflows/test_with_docker.yml new file mode 100644 index 0000000..c8406a6 --- /dev/null +++ b/.github/workflows/test_with_docker.yml @@ -0,0 +1,36 @@ +# This is a basic workflow to help you get started with Actions + +name: test-with-docker + +# Controls when the action will run. Triggers the workflow on push or pull request +# events but only for the master branch +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + schedule: + # * is a special character in YAML so you have to quote this string + - cron: '5 4 * * 0' + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "build" + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - name: Checkout + uses: actions/checkout@v2 + + - name: Make sure that the workflow works + run: echo Smoke test + + - name: Run the tests using docker-compose + working-directory: .github/workflows + run: | + docker compose -f ../../docker-compose.tests.yml build + docker compose -f ../../docker-compose.tests.yml up --exit-code-from notebook-server diff --git a/docker-compose.tests.yml b/docker-compose.tests.yml new file mode 100644 index 0000000..6ed827e --- /dev/null +++ b/docker-compose.tests.yml @@ -0,0 +1,51 @@ +version: "3" +services: + dashboard: + image: em-pub-dash-dev/frontend + build: + context: frontend + dockerfile: docker/Dockerfile.dev + depends_on: + - db + ports: + # DASH in numbers + - "3274:6060" + volumes: + - ./frontend:/public + - ./plots:/public/plots + networks: + - emission + notebook-server: + image: em-pub-dash-dev/viz-scripts + build: + context: viz_scripts + dockerfile: docker/Dockerfile.test + args: + SERVER_IMAGE_TAG: ${SERVER_IMAGE_TAG} + depends_on: + - db + environment: + - DB_HOST=db + - WEB_SERVER_HOST=0.0.0.0 + - CRON_MODE= + - STUDY_CONFIG=stage-program + ports: + # ipynb in numbers + - "47962:47962" + networks: + - emission + volumes: + - ./viz_scripts:/usr/src/app/saved-notebooks + - ./plots:/plots + db: + image: mongo:4.4.0 + volumes: + - mongo-data:/data/db + networks: + - emission + +networks: + emission: + +volumes: + mongo-data: diff --git a/viz_scripts/docker/Dockerfile.test b/viz_scripts/docker/Dockerfile.test new file mode 100644 index 0000000..d9a84ea --- /dev/null +++ b/viz_scripts/docker/Dockerfile.test @@ -0,0 +1,19 @@ +# python 3 +ARG SERVER_IMAGE_TAG +FROM shankari/e-mission-server:master_${SERVER_IMAGE_TAG} + +VOLUME /plots + +ADD docker/environment36.dashboard.additions.yml / + +WORKDIR /usr/src/app + +RUN /bin/bash -c "source setup/activate.sh && conda env update --name emission --file setup/environment36.notebook.additions.yml" +RUN /bin/bash -c "source setup/activate.sh && conda env update --name emission --file /environment36.dashboard.additions.yml" + +ADD docker/start_tests.sh /usr/src/app/.docker/start_tests.sh +RUN chmod u+x /usr/src/app/.docker/start_tests.sh + +EXPOSE 8888 + +CMD ["/bin/bash", "/usr/src/app/.docker/start_tests.sh"] diff --git a/viz_scripts/docker/environment36.dashboard.additions.yml b/viz_scripts/docker/environment36.dashboard.additions.yml index 59d26eb..49d927f 100644 --- a/viz_scripts/docker/environment36.dashboard.additions.yml +++ b/viz_scripts/docker/environment36.dashboard.additions.yml @@ -4,6 +4,8 @@ channels: - defaults dependencies: - seaborn=0.11.1 +- pytest +- coverage - pip: - nbparameterise==0.6 - devcron==0.4 diff --git a/viz_scripts/docker/start_tests.sh b/viz_scripts/docker/start_tests.sh new file mode 100644 index 0000000..2ae0c46 --- /dev/null +++ b/viz_scripts/docker/start_tests.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e # Exit on error + +# change python environment +pwd +source setup/activate.sh || exit 1 +conda env list +cd saved-notebooks/tests || exit 1 + +echo "Starting unit tests..." +PYTHONPATH=../.. coverage run -m pytest . -v + +coverage report diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py index 34e26bc..4c35465 100644 --- a/viz_scripts/plots.py +++ b/viz_scripts/plots.py @@ -9,7 +9,17 @@ sns.set_style("whitegrid") sns.set() -get_ipython().run_line_magic('matplotlib', 'inline') + +try: + # Import the function + from IPython import get_ipython + # Check if running in an IPython environment (like Jupyter Notebook) + if get_ipython() is not None: + get_ipython().run_line_magic('matplotlib', 'inline') +except ImportError: + # Handle the case where IPython is not installed + # We are running in regular Python (likely pytest), not Jupyter/IPython + pass # Module for pretty-printing outputs (e.g. head) to help users # understand what is going on diff --git a/viz_scripts/tests/test_plots.py b/viz_scripts/tests/test_plots.py new file mode 100644 index 0000000..26d26e3 --- /dev/null +++ b/viz_scripts/tests/test_plots.py @@ -0,0 +1,58 @@ +import pytest as pytest +import pandas as pd +import numpy as np +# Using import_module, as we have saved-notebooks as the directory +import importlib +plots = importlib.import_module('saved-notebooks.plots') + +# Test Data Fixtures +@pytest.fixture +def sample_labels(): + return ['Car', 'Bus', 'Train', 'Walk'] + +@pytest.fixture +def sample_values(): + return [100, 50, 3, 1] + +@pytest.fixture +def sample_labels_no_small(): + return ['Car', 'Bus'] + + +@pytest.fixture +def sample_values_no_small(): + return [100, 100] + +class TestCalculatePct: + def test_calculate_pct_basic(self, sample_labels, sample_values): + labels, values, pcts = plots.calculate_pct(sample_labels, sample_values) + assert len(labels) == len(sample_labels) + assert len(values) == len(sample_values) + assert sum(pcts) == pytest.approx(100.0, abs=0.1) + + def test_calculate_pct_empty(self): + labels, values, pcts = plots.calculate_pct([],[]) + assert len(labels) == 0 + assert len(values) == 0 + assert len(pcts) == 0 + + def test_calculate_pct_single(self): + labels, values, pcts = plots.calculate_pct(['Car'], [100]) + assert pcts == [100.0] + +class TestMergeSmallEntries: + def test_merge_small_entries_basic(self, sample_labels, sample_values): + labels, values, pcts = plots.merge_small_entries(sample_labels, sample_values) + assert all(pct > 2.0 for pct in pcts) + + def test_merge_small_entries_no_small(self, sample_labels_no_small, sample_values_no_small): + result_labels, result_values, result_pcts = plots.merge_small_entries(sample_labels_no_small, sample_values_no_small) + assert len(result_labels) == 2 + assert 'other' not in result_labels + assert 'OTHER' not in result_labels + + def test_merge_small_entries_some_small(self, sample_labels, sample_values): + result_labels, result_values, result_pcts = plots.merge_small_entries(sample_labels, sample_values) + print(result_labels) + assert len(result_labels) == 3 + assert result_labels[0] in ['Car', 'Bus','other', 'OTHER'] diff --git a/viz_scripts/tests/test_scaffolding.py b/viz_scripts/tests/test_scaffolding.py new file mode 100644 index 0000000..8ce0877 --- /dev/null +++ b/viz_scripts/tests/test_scaffolding.py @@ -0,0 +1,378 @@ +import unittest.mock as mock +import emission.core.wrapper.localdate as ecwl +import emission.storage.timeseries.tcquery as esttc +import importlib +import pandas as pd +import numpy as np +import collections as colls +import pytest +import asyncio +import matplotlib.pyplot as plt +import emcommon.util as emcu + +# Dynamically import saved-notebooks.plots +scaffolding = importlib.import_module('saved-notebooks.scaffolding') + +def test_get_time_query(): + # Test with both year and month + result = scaffolding.get_time_query(2022, 6) + assert result is not None + assert isinstance(result, esttc.TimeComponentQuery) + + # Test with year and no month + result = scaffolding.get_time_query(2023, None) + assert result is not None + assert isinstance(result, esttc.TimeComponentQuery) + + # Test with month and no year + with pytest.raises(Exception) as e_info: + result = scaffolding.get_time_query(None, 12) + + # Test with no year or month + result = scaffolding.get_time_query(None, None) + assert result is None + +@pytest.fixture +def dynamic_labels(): + return { + "MODE": [ + {"value":"gas_car", "base_mode": "CAR", + "baseMode":"CAR", "met_equivalent":"IN_VEHICLE", "kgCo2PerKm": 0.22031}, + {"value":"motorcycle", "base_mode": "MOPED", "footprint": { "gasoline": { "wh_per_km": 473.17 }}, + "baseMode":"MOPED", "met_equivalent":"IN_VEHICLE", "kgCo2PerKm": 0.113143309}, + {"value":"walk", "base_mode": "WALKING", + "baseMode":"WALKING", "met_equivalent":"WALKING", "kgCo2PerKm": 0} + ], + "PURPOSE": [ + {"value":"home"}, + {"value":"shopping"}, + {"value":"meal"} + ], + "REPLACED_MODE": [ + {"value":"no_travel"}, + {"value":"bike"}, + {"value":"taxi"} + ], + "translations": { + "en": { + "walk": "Walk", + "motorcycle":"Motorcycle", + "bike": "Bicycle", + "gas_car": "Car", + "taxi": "Taxi", + "no_travel": "No Travel", + "home": "Home", + "meal": "Meal", + "shopping": "Shopping" + }, + "es": { + "walk": "Caminando", + "motorcycle":"Motocicleta", + "bike":"Bicicleta", + "gas_car":"Coche de gasolina", + "taxi":"Taxi", + "no_travel":"No viajar", + "home":"Casa", + "meal":"Comida", + "shopping":"Compras" + } + } + } + +def test_mapping_labels(dynamic_labels): + result_mode = scaffolding.mapping_labels(dynamic_labels, "MODE") + result_purpose = scaffolding.mapping_labels(dynamic_labels, "PURPOSE") + result_replaced = scaffolding.mapping_labels(dynamic_labels, "REPLACED_MODE") + + expected_result_mode = colls.defaultdict(lambda: 'Other', { + "gas_car": "Car", + "motorcycle": "Motorcycle", + "walk": "Walk" + }) + + expected_result_purpose = colls.defaultdict(lambda: 'Other', { + "home": "Home", + "shopping": "Shopping", + "meal": "Meal" + }) + + expected_result_replaced = colls.defaultdict(lambda: 'Other', { + "no_travel": "No Travel", + "bike": "Bicycle", + "taxi": "Taxi" + }) + assert result_mode == expected_result_mode + assert result_purpose == expected_result_purpose + assert result_replaced == expected_result_replaced + +def test_mapping_color_surveys(): + dic_options = { + 'yes': 'Yes', + 'no': 'No', + '1': 'Disagree (1)', + '2': '2', + '3': 'Neutral (3)', + '4': '4', + '5': 'Agree (5)', + 'unsure': 'Unsure' + } + + result = scaffolding.mapping_color_surveys(dic_options) + + # Check that result is a dictionary + assert isinstance(result, dict) + + # Check unique values have unique colors, with an Other + unique_values = list(colls.OrderedDict.fromkeys(dic_options.values())) + assert len(result) == len(unique_values) + 1 + + # Check colors are from plt.cm.tab10 + for color in result.values(): + assert color in plt.cm.tab10.colors + + # Specific checks for this example + assert result['Yes'] == plt.cm.tab10.colors[0] + assert result['No'] == plt.cm.tab10.colors[1] + assert result['Disagree (1)'] == plt.cm.tab10.colors[2] + assert result['2'] == plt.cm.tab10.colors[3] + assert result['Neutral (3)'] == plt.cm.tab10.colors[4] + assert result['4'] == plt.cm.tab10.colors[5] + assert result['Agree (5)'] == plt.cm.tab10.colors[6] + assert result['Unsure'] == plt.cm.tab10.colors[7] + assert result['Other'] == plt.cm.tab10.colors[8] + +def test_mapping_color_surveys_empty(): + # Test with an empty dictionary + with pytest.raises(Exception): + mapping_color_surveys({}) + +@pytest.fixture +def before_df(): + return pd.DataFrame({ + "user_id":["user_1", "user_1", "user_1", "user_2", "user_2", "user_3", "user_4", "user_5"], + "mode_confirm":["own_car", "own_car", "walk", "bus", "walk", "car", "motorcycle", "bike"], + "Mode_confirm":["Other", "Other", "Walk", "Bus", "Walk", "Car", "Bike", "Bike"], + "raw_trip":["trip_0", "trip_1", "trip_2", "trip_3", "trip_4", "trip_5", "trip_6", "trip_7"], + "start_ts":[1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09], + "duration": [1845.26, 1200.89, 1000.56, 564.54, 456.456, 156.45, 1564.456, 156.564], + "distance": [100, 150, 600, 500, 300, 200, 50, 20] + }) + +@pytest.fixture +def after_df(): + return pd.DataFrame({ + "user_id":["user_1", "user_1", "user_4", "user_5"], + "mode_confirm":["own_car", "own_car", "motorcycle", "bike"], + "Mode_confirm":["Other", "Other", "Bike", "Bike"], + "raw_trip":["trip_0", "trip_1", "trip_6", "trip_7"], + "start_ts":[1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09], + "duration": [1845.26, 1200.89, 1564.456, 156.564], + "distance": [100, 150, 50, 20] + }) + +def test_get_quality_text(before_df, after_df): + result = scaffolding.get_quality_text(before_df, after_df) + assert result == "Based on 4 confirmed trips from 3 users\nof 8 total trips from 5 users (50.00%)" + +def test_get_quality_text_include_test_users(before_df, after_df): + result = scaffolding.get_quality_text(before_df, after_df, include_test_users = True) + assert result == "Based on 4 confirmed trips from 3 testers and participants\nof 8 total trips from 5 users (50.00%)" + +def test_get_quality_text_include_mode_of_interest(before_df, after_df): + result = scaffolding.get_quality_text(before_df, after_df, mode_of_interest = "Motorcycle") + assert result == "Based on 4 confirmed Motorcycle trips from 3 users\nof 8 total confirmed trips from 5 users (50.00%)" + +@pytest.fixture +def sensed_df(): + return pd.DataFrame({ + "user_id":["user_1", "user_1", "user_1", "user_2", "user_2", "user_3", "user_4", "user_5"], + "primary_mode":["IN_VEHICLE", "IN_VEHICLE", "IN_VEHICLE", "IN_VEHICLE", "IN_VEHICLE", "IN_VEHICLE", "IN_VEHICLE", "IN_VEHICLE"], + "raw_trip":["trip_0", "trip_1", "trip_2", "trip_3", "trip_4", "trip_5", "trip_6", "trip_7"], + "start_ts":[1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09], + "duration": [1845.26, 1200.89, 1000.56, 564.54, 456.456, 156.45, 1564.456, 156.564] + }) + +def test_get_quality_text_sensed(sensed_df): + result = scaffolding.get_quality_text_sensed(sensed_df) + assert result == "Based on 8 trips from 5 users" + +def test_get_quality_text_sensed_include_test_users(sensed_df): + result = scaffolding.get_quality_text_sensed(sensed_df, include_test_users=True) + assert result == "Based on 8 trips from 5 testers and participants" + +def test_get_quality_text_numerator(sensed_df): + result = scaffolding.get_quality_text_sensed(sensed_df) + assert result == "Based on 8 trips from 5 users" + +def test_get_quality_text_numerator_include_test_users(sensed_df): + result = scaffolding.get_quality_text_sensed(sensed_df, include_test_users=True) + assert result == "Based on 8 trips from 5 testers and participants" + +def test_get_file_suffix(): + year = 2024 + month = 12 + program = "default" + result = scaffolding.get_file_suffix(year, month, program) + assert result == "_2024_12_default" + +def test_unit_conversions(before_df): + test_df = before_df.copy() + scaffolding.unit_conversions(test_df) + assert 'distance_miles' in test_df.columns + assert 'distance_kms' in test_df.columns + + np.testing.assert_almost_equal( + test_df['distance_miles'], + [0.062, 0.093, 0.373, 0.311, 0.186, 0.124, 0.031, 0.012], + decimal=2 + ) + + np.testing.assert_almost_equal( + test_df['distance_kms'], + [0.1, 0.15, 0.6, 0.5, 0.3, 0.2, 0.05, 0.02], + decimal=2 + ) + +def test_filter_labeled_trips_with_labeled_trips(): + mixed_trip_df = pd.DataFrame({ + 'user_input':[ + {'purpose_confirm': 'work', 'mode_confirm':'own_car'}, + {'mode_confirm':'bus'}, + {'purpose_confirm': 'school'}, + {}, + {'purpose_confirm': 'shopping', 'mode_confirm':'car'}, + {}, + {} + ], + "distance": [100, 150, 50, 20, 50, 10, 60] + }) + + labeled_ct = scaffolding.filter_labeled_trips(mixed_trip_df) + + # Assert the length of the dataframe, which does not have user_input + assert len(labeled_ct) == 4 + assert all(labeled_ct['user_input'].apply(bool)) + +def test_filter_labeled_trips_empty_dataframe(): + # Create an empty DataFrame + mixed_trip_df = pd.DataFrame(columns=['user_input']) + + labeled_ct = scaffolding.filter_labeled_trips(mixed_trip_df) + + # Assert the returned DataFrame is empty + assert len(labeled_ct) == 0 + +def test_filter_labeled_trips_no_labeled_trips(): + # Create a DataFrame with only unlabeled trips + mixed_trip_df = pd.DataFrame({ + 'user_input': [{}, {}, {}], + "distance": [100, 150, 50] + }) + + labeled_ct = scaffolding.filter_labeled_trips(mixed_trip_df) + + # Assert the returned DataFrame is empty + assert len(labeled_ct) == 0 + +@pytest.fixture +def labeled_ct(): + return pd.DataFrame({ + 'user_input':[ + {'purpose_confirm': 'work', 'mode_confirm':'own_car'}, + {'mode_confirm':'bus'}, + {'purpose_confirm': 'school'}, + {'purpose_confirm': 'at_work', 'mode_confirm': 'own_car'}, + {'purpose_confirm': 'access_recreation', 'mode_confirm':'car'}, + {'mode_confirm':'bike', 'purpose_confirm':'pick_drop_person'}, + {'purpose_confirm':'work', 'mode_confirm':'bike'} + ], + "distance": [100, 150, 50, 20, 50, 10, 60], + "user_id":["user_1", "user_1", "user_1", "user_2", "user_2", "user_3", "user_4"], + "raw_trip":["trip_0", "trip_1", "trip_2", "trip_3", "trip_4", "trip_5", "trip_6"], + "start_ts":[1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09, 1.690e+09], + "duration": [1845.26, 1200.89, 1000.56, 564.54, 456.456, 156.45, 1564.456], + "distance": [100, 150, 600, 500, 300, 200, 50] + }) + +def test_expand_userinputs(labeled_ct): + expanded_ct = scaffolding.expand_userinputs(labeled_ct) + + # Assert the length of the dataframe is not changed + # Assert the columns have increased with labels_per_trip + labels_per_trip = len(pd.DataFrame(labeled_ct.user_input.to_list()).columns) + + assert len(expanded_ct) == len(labeled_ct) + assert labels_per_trip == 2 + assert len(expanded_ct.columns) == len(labeled_ct.columns) + labels_per_trip + + # Assert new columns and their values + assert 'mode_confirm' in expanded_ct.columns + assert 'purpose_confirm' in expanded_ct.columns + assert expanded_ct['purpose_confirm'].fillna('NaN').tolist() == ['work', 'NaN', 'school', 'at_work', 'access_recreation', 'pick_drop_person', 'work'] + assert expanded_ct['mode_confirm'].fillna('NaN').tolist() == ['own_car', 'bus', 'NaN', 'own_car', 'car', 'bike', 'bike'] + +# Testing with just dynamic_labels since PR#164 Unify calls to read json resource from e-mission-common in generate_plots.py would make sure we have labels passed into this file, instead of fetching the label-options.json file here +@pytest.mark.asyncio +async def test_translate_values_to_labels_english(dynamic_labels): + # Call the function with our predefined labels + mode_translations, purpose_translations, replaced_translations = await scaffolding.translate_values_to_labels(dynamic_labels) + + expected_mode_translations = colls.defaultdict(lambda: 'Other', { + "gas_car": "Car", + "motorcycle": "Motorcycle", + "walk": "Walk" + }) + + expected_purpose_translations = colls.defaultdict(lambda: 'Other', { + "home": "Home", + "shopping": "Shopping", + "meal": "Meal" + }) + + expected_replaced_translations = colls.defaultdict(lambda: 'Other', { + "no_travel": "No Travel", + "bike": "Bicycle", + "taxi": "Taxi" + }) + assert mode_translations == expected_mode_translations + assert purpose_translations == expected_purpose_translations + assert replaced_translations == expected_replaced_translations + +# TODO:: Implement language specific changes in mapping_translations +@pytest.mark.skip(reason="Implementation limited only for english translations") +@pytest.mark.asyncio +async def test_translate_values_to_labels_spanish(dynamic_labels, language="es"): + # Call the function with our predefined labels + mode_translations_es, purpose_translations_es, replaced_translations_es = await scaffolding.translate_values_to_labels(dynamic_labels) + + expected_mode_translations_es = colls.defaultdict(lambda: 'Other', { + "gas_car":"Coche de gasolina", + "motorcycle":"Motocicleta", + "walk": "Caminando" + }) + + expected_purpose_translations_es = colls.defaultdict(lambda: 'Other', { + "home":"Casa", + "shopping":"Compras", + "meal":"Comida" + }) + + expected_replaced_translations_es = colls.defaultdict(lambda: 'Other', { + "no_travel":"No viajar", + "bike":"Bicicleta", + "taxi": "Taxi" + }) + assert mode_translations_es == expected_mode_translations_es + assert purpose_translations_es == expected_purpose_translations_es + assert replaced_translations_es == expected_result_replaced_translations_es + +@pytest.mark.asyncio +async def test_translate_values_to_labels_empty_input(): + # Test with empty input + mode_translations, purpose_translations, replaced_translations = await scaffolding.translate_values_to_labels([]) + + # Verify that the function can handle empty input + # The exact behavior depends on how your function handles this + assert isinstance(mode_translations, dict) + assert isinstance(purpose_translations, dict) + assert isinstance(replaced_translations, dict)