Merge pull request #621 from IATI/refactor-and-tests

Automated URL route testing, URL redirects and some minor refactor tidy ups
IATI · Nov 7, 2024 · ed168b3 · ed168b3
2 parents 3b24364 + bbdd8a5
commit ed168b3
Show file tree

Hide file tree

Showing 14 changed files with 344 additions and 91 deletions.
diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ The IATI Dashboard is mostly written in Python but also has some helper Bash scr
 3. Build the static graphs and other data that will be served via the Dashboard.
 4. Run the web server.
 
-Paths to different directories are set in `./src/config.py`.
+Paths to different directories are set in `./dashboard/config.py`.
 
 ### 1. Setup environment
 
@@ -64,7 +64,7 @@ Bash scripts are used to fetch the data that the Dashboard will present.  They w
 
 ```
 mkdir out
-cd src
+cd dashboard
 python make_plots.py
 python make_csv.py
 python speakers_kit.py
@@ -75,7 +75,7 @@ cp ../img/publishers static/
 
 ### 4. Run the webserver.
 
-From `./src/`:
+From `./dashboard/`:
 
 ```
 python manage.py runserver
@@ -86,6 +86,21 @@ The Dashboard will now be accessible from `localhost:8000/`.
 
 ## Development
 
+### Automated tests
+There are some unit tests written using `pytest` and site testing using Django's own testing framework.
+
+Once the development dependencies have been installed the unit tests can be run with:
+
+```
+pytest
+```
+
+The Django site tests can be run from the `dashboard/` directory with:
+
+```
+python manage.py test -v 2
+```
+
 ### Calculating your own statistics
 
 The IATI Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above.  This can also be calculated using [Code for IATI Stats](http://github.com/codeforIATI/IATI-Stats) where `stats-calculated` corresponds to the `gitout` directory generated by [`git.sh` in IATI-Stats](https://github.com/codeforIATI/IATI-Stats#running-for-every-commit-in-the-data-directory).

diff --git a/dashboard/common.py b/dashboard/common.py
@@ -2,10 +2,10 @@
 import data
 import json
 
-import config
+import filepaths
 
 #  Import organisation_type_codelist as a global, then delete when used to save memory
-with open(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh:
+with open(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh:
     organisation_type_codelist = json.load(fh)
 organisation_type_dict = {c['code']: c['name'] for c in organisation_type_codelist['data']}
 del organisation_type_codelist

diff --git a/dashboard/coverage.py b/dashboard/coverage.py
@@ -6,7 +6,7 @@
 from data import publisher_name
 from data import publishers_ordered_by_title
 from data import secondary_publishers
-import config
+import filepaths
 
 
 def is_number(s):
@@ -180,7 +180,7 @@ def table():
 
 
 # Compile a list of Development finance institutions (DFIs)
-with open(config.join_base_path('dfi_publishers.csv'), 'r') as csv_file:
+with open(filepaths.join_base_path('dfi_publishers.csv'), 'r') as csv_file:
     reader = csv.reader(csv_file, delimiter=',')
     dfi_publishers = []
     for line in reader:

diff --git a/dashboard/data.py b/dashboard/data.py
@@ -8,7 +8,7 @@
 
 import xmlschema
 
-import config
+import filepaths
 
 
 # Modified from:
@@ -120,7 +120,7 @@ def get_publisher_name(self):
 
         # Loop over this list and return the publisher name if it is found within the historic list of publishers
         for x in path_components:
-            if x in JSONDir(config.join_stats_path('current/aggregated-publisher')).keys():
+            if x in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).keys():
                 return x
 
         # If got to the end of the loop and nothing found, this folder does not relate to a single publisher
@@ -133,7 +133,7 @@ def get_publisher_stats(publisher, stats_type='aggregated'):
              is not found.
     """
     try:
-        return JSONDir(config.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher)))
+        return JSONDir(filepaths.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher)))
     except IOError:
         return {}
 
@@ -145,7 +145,7 @@ def get_registry_id_matches():
     """
 
     # Load registry IDs for publishers who have changed their registry ID
-    with open(config.join_base_path('registry_id_relationships.csv')) as f:
+    with open(filepaths.join_base_path('registry_id_relationships.csv')) as f:
         reader = csv.DictReader(f)
         # Load this data into a dictonary
         registry_matches = {
@@ -184,33 +184,33 @@ def deep_merge(obj1, obj2):
 
 
 current_stats = {
-    'aggregated': JSONDir(config.join_stats_path('current/aggregated')),
-    'aggregated_file': JSONDir(config.join_stats_path('current/aggregated-file')),
-    'inverted_publisher': JSONDir(config.join_stats_path('current/inverted-publisher')),
-    'inverted_file': JSONDir(config.join_stats_path('current/inverted-file')),
-    'inverted_file_publisher': JSONDir(config.join_stats_path('current/inverted-file-publisher')),
+    'aggregated': JSONDir(filepaths.join_stats_path('current/aggregated')),
+    'aggregated_file': JSONDir(filepaths.join_stats_path('current/aggregated-file')),
+    'inverted_publisher': JSONDir(filepaths.join_stats_path('current/inverted-publisher')),
+    'inverted_file': JSONDir(filepaths.join_stats_path('current/inverted-file')),
+    'inverted_file_publisher': JSONDir(filepaths.join_stats_path('current/inverted-file-publisher')),
     'download_errors': []
 }
-ckan_publishers = JSONDir(config.join_data_path('ckan_publishers'))
-github_issues = JSONDir(config.join_data_path('github/publishers'))
-ckan = json.load(open(config.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict)
+ckan_publishers = JSONDir(filepaths.join_data_path('ckan_publishers'))
+github_issues = JSONDir(filepaths.join_data_path('github/publishers'))
+ckan = json.load(open(filepaths.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict)
 dataset_to_publisher_dict = {
     dataset: publisher
     for publisher, publisher_dict in ckan.items()
     for dataset in publisher_dict.keys()
 }
-metadata = json.load(open(config.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict)
-with open(config.join_data_path('downloads/errors')) as fp:
+metadata = json.load(open(filepaths.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict)
+with open(filepaths.join_data_path('downloads/errors')) as fp:
     for line in fp:
         if line != '.\n':
             current_stats['download_errors'].append(line.strip('\n').split(' ', 3))
 
 sources105 = [
-    config.join_data_path('schemas/1.05/iati-activities-schema.xsd'),
-    config.join_data_path('schemas/1.05/iati-organisations-schema.xsd')]
+    filepaths.join_data_path('schemas/1.05/iati-activities-schema.xsd'),
+    filepaths.join_data_path('schemas/1.05/iati-organisations-schema.xsd')]
 sources203 = [
-    config.join_data_path('schemas/2.03/iati-activities-schema.xsd'),
-    config.join_data_path('schemas/2.03/iati-organisations-schema.xsd')]
+    filepaths.join_data_path('schemas/2.03/iati-activities-schema.xsd'),
+    filepaths.join_data_path('schemas/2.03/iati-organisations-schema.xsd')]
 schema105 = xmlschema.XMLSchema(sources105)
 schema203 = xmlschema.XMLSchema(sources203)
 
@@ -246,7 +246,7 @@ def transform_codelist_mapping_keys(codelist_mapping):
 
 def create_codelist_mapping(major_version):
     codelist_mapping = {}
-    for x in json.load(open(config.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))):
+    for x in json.load(open(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))):
         if 'condition' in x:
             pref, attr = x['path'].rsplit('/', 1)
             path = '{0}[{1}]/{2}'.format(
@@ -264,12 +264,12 @@ def create_codelist_mapping(major_version):
 # Create a big dictionary of all codelist values by version and codelist name
 codelist_sets = {
     major_version: {
-        cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items()
+        cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items()
     } for major_version in MAJOR_VERSIONS}
 
 codelist_lookup = {
     major_version: {
-        cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items()
+        cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items()
     } for major_version in MAJOR_VERSIONS}
 
 # Simple look up to map publisher id to a publishers given name (title)
@@ -279,11 +279,11 @@ def create_codelist_mapping(major_version):
 publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower())
 
 # List of publishers who report all their activities as a secondary publisher
-secondary_publishers = [publisher for publisher, stats in JSONDir(config.join_stats_path('current/aggregated-publisher')).items()
+secondary_publishers = [publisher for publisher, stats in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).items()
                         if int(stats['activities']) == len(stats['activities_secondary_reported']) and int(stats['activities']) > 0]
 
 try:
-    dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(config.join_data_path('dac2012.csv')))}
+    dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(filepaths.join_data_path('dac2012.csv')))}
 except IOError:
     dac2012 = {}
 

diff --git a/dashboard/config.py → dashboard/filepaths.py b/dashboard/config.py → dashboard/filepaths.py
@@ -6,32 +6,28 @@
 
 import os.path
 
-
-STATS_DIRECTORY = "../stats-calculated"
-DATA_DIRECTORY = "../data"
-BASE_DIRECTORY = "../"
-OUT_DIRECTORY = "../out"
+import ui.settings
 
 
 def join_stats_path(p: str) -> str:
     """Make a path to a file or directory within the downloaded stats directory
     """
-    return os.path.join(STATS_DIRECTORY, p)
+    return os.path.join(ui.settings.DASHBOARD_STATS_DIRECTORY, p)
 
 
 def join_data_path(p: str) -> str:
     """Make a path to a file or directory within the downloaded data directory
     """
-    return os.path.join(DATA_DIRECTORY, p)
+    return os.path.join(ui.settings.DASHBOARD_DATA_DIRECTORY, p)
 
 
 def join_base_path(p: str) -> str:
     """Make a path to a file or directory relative to the base of the dashboard directory
     """
-    return os.path.join(BASE_DIRECTORY, p)
+    return os.path.join(ui.settings.DASHBOARD_BASE_DIRECTORY, p)
 
 
 def join_out_path(p: str) -> str:
     """Make a path to a file or directory relative to the base of the out directory
     """
-    return os.path.join(OUT_DIRECTORY, p)
+    return os.path.join(ui.settings.DASHBOARD_OUT_DIRECTORY, p)
diff --git a/dashboard/make_csv.py b/dashboard/make_csv.py
@@ -12,7 +12,7 @@
 import comprehensiveness
 import summary_stats
 import humanitarian
-import config
+import filepaths
 
 
 logger = logging.getLogger(__name__)
@@ -51,10 +51,10 @@ def main():
         logger.addHandler(logging.StreamHandler(sys.stdout))
 
     logger.info("Generating CSV files")
-    os.makedirs(config.join_out_path('data/csv'), exist_ok=True)
+    os.makedirs(filepaths.join_out_path('data/csv'), exist_ok=True)
 
     logger.info("Generating publishers.csv")
-    with open(config.join_out_path('data/csv/publishers.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/publishers.csv'), 'w') as fp:
         writer = csv.DictWriter(fp, [
             'Publisher Name',
             'Publisher Registry Id',
@@ -76,23 +76,23 @@ def main():
 
     logger.info("Generating elements.csv")
     publishers = list(data.current_stats['inverted_publisher']['activities'].keys())
-    with open(config.join_out_path('data/csv/elements.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/elements.csv'), 'w') as fp:
         writer = csv.DictWriter(fp, ['Element'] + publishers)
         writer.writeheader()
         for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items():
             publisher_dict['Element'] = element
             writer.writerow(publisher_dict)
 
     logger.info("Generating elements_total.csv")
-    with open(config.join_out_path('data/csv/elements_total.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/elements_total.csv'), 'w') as fp:
         writer = csv.DictWriter(fp, ['Element'] + publishers)
         writer.writeheader()
         for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items():
             publisher_dict['Element'] = element
             writer.writerow(publisher_dict)
 
     logger.info("Generating registry.csv")
-    with open(config.join_out_path('data/csv/registry.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/registry.csv'), 'w') as fp:
         keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness']
         writer = csv.DictWriter(fp, keys)
         writer.writeheader()
@@ -101,29 +101,29 @@ def main():
 
     logger.info("Generating timeliness_frequency.csv")
     previous_months = timeliness.previous_months_reversed
-    with open(config.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp:
         writer = csv.writer(fp)
         writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published'])
         for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted():
             writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band])
 
     logger.info("Generating timeliness_timelag.csv")
-    with open(config.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp:
         writer = csv.writer(fp)
         writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag'])
         for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted():
             writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment])
 
     logger.info("Generating forwardlooking.csv")
-    with open(config.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp:
         writer = csv.writer(fp)
         writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years])
         for row in forwardlooking.table():
             writer.writerow([row['publisher_title'], row['publisher']] + [year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years])
 
     for tab in comprehensiveness.columns.keys():
         logger.info("Generating comprehensiveness_{}.csv".format(tab))
-        with open(config.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp:
+        with open(filepaths.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp:
             writer = csv.writer(fp)
             if tab == 'financials':
                 writer.writerow(['Publisher Name', 'Publisher Registry Id'] +
@@ -145,7 +145,7 @@ def main():
                                     [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]])
 
     logger.info("Generating summary_stats.csv")
-    with open(config.join_out_path('data/csv/summary_stats.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/summary_stats.csv'), 'w') as fp:
         writer = csv.writer(fp)
         # Add column headers
         writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns])
@@ -154,7 +154,7 @@ def main():
             writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns])
 
     logger.info("Generating humanitarian.csv")
-    with open(config.join_out_path('data/csv/humanitarian.csv'), 'w') as fp:
+    with open(filepaths.join_out_path('data/csv/humanitarian.csv'), 'w') as fp:
         writer = csv.writer(fp)
         # Add column headers
         writer.writerow([