Skip to content

Commit

Permalink
Merge pull request #621 from IATI/refactor-and-tests
Browse files Browse the repository at this point in the history
Automated URL route testing, URL redirects and some minor refactor tidy ups
  • Loading branch information
Bjwebb authored Nov 7, 2024
2 parents 3b24364 + bbdd8a5 commit ed168b3
Show file tree
Hide file tree
Showing 14 changed files with 344 additions and 91 deletions.
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ The IATI Dashboard is mostly written in Python but also has some helper Bash scr
3. Build the static graphs and other data that will be served via the Dashboard.
4. Run the web server.

Paths to different directories are set in `./src/config.py`.
Paths to different directories are set in `./dashboard/config.py`.

### 1. Setup environment

Expand Down Expand Up @@ -64,7 +64,7 @@ Bash scripts are used to fetch the data that the Dashboard will present. They w

```
mkdir out
cd src
cd dashboard
python make_plots.py
python make_csv.py
python speakers_kit.py
Expand All @@ -75,7 +75,7 @@ cp ../img/publishers static/

### 4. Run the webserver.

From `./src/`:
From `./dashboard/`:

```
python manage.py runserver
Expand All @@ -86,6 +86,21 @@ The Dashboard will now be accessible from `localhost:8000/`.

## Development

### Automated tests
There are some unit tests written using `pytest` and site testing using Django's own testing framework.

Once the development dependencies have been installed the unit tests can be run with:

```
pytest
```

The Django site tests can be run from the `dashboard/` directory with:

```
python manage.py test -v 2
```

### Calculating your own statistics

The IATI Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above. This can also be calculated using [Code for IATI Stats](http://github.com/codeforIATI/IATI-Stats) where `stats-calculated` corresponds to the `gitout` directory generated by [`git.sh` in IATI-Stats](https://github.com/codeforIATI/IATI-Stats#running-for-every-commit-in-the-data-directory).
Expand Down
4 changes: 2 additions & 2 deletions dashboard/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import data
import json

import config
import filepaths

# Import organisation_type_codelist as a global, then delete when used to save memory
with open(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh:
with open(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh:
organisation_type_codelist = json.load(fh)
organisation_type_dict = {c['code']: c['name'] for c in organisation_type_codelist['data']}
del organisation_type_codelist
Expand Down
4 changes: 2 additions & 2 deletions dashboard/coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from data import publisher_name
from data import publishers_ordered_by_title
from data import secondary_publishers
import config
import filepaths


def is_number(s):
Expand Down Expand Up @@ -180,7 +180,7 @@ def table():


# Compile a list of Development finance institutions (DFIs)
with open(config.join_base_path('dfi_publishers.csv'), 'r') as csv_file:
with open(filepaths.join_base_path('dfi_publishers.csv'), 'r') as csv_file:
reader = csv.reader(csv_file, delimiter=',')
dfi_publishers = []
for line in reader:
Expand Down
46 changes: 23 additions & 23 deletions dashboard/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import xmlschema

import config
import filepaths


# Modified from:
Expand Down Expand Up @@ -120,7 +120,7 @@ def get_publisher_name(self):

# Loop over this list and return the publisher name if it is found within the historic list of publishers
for x in path_components:
if x in JSONDir(config.join_stats_path('current/aggregated-publisher')).keys():
if x in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).keys():
return x

# If got to the end of the loop and nothing found, this folder does not relate to a single publisher
Expand All @@ -133,7 +133,7 @@ def get_publisher_stats(publisher, stats_type='aggregated'):
is not found.
"""
try:
return JSONDir(config.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher)))
return JSONDir(filepaths.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher)))
except IOError:
return {}

Expand All @@ -145,7 +145,7 @@ def get_registry_id_matches():
"""

# Load registry IDs for publishers who have changed their registry ID
with open(config.join_base_path('registry_id_relationships.csv')) as f:
with open(filepaths.join_base_path('registry_id_relationships.csv')) as f:
reader = csv.DictReader(f)
# Load this data into a dictonary
registry_matches = {
Expand Down Expand Up @@ -184,33 +184,33 @@ def deep_merge(obj1, obj2):


current_stats = {
'aggregated': JSONDir(config.join_stats_path('current/aggregated')),
'aggregated_file': JSONDir(config.join_stats_path('current/aggregated-file')),
'inverted_publisher': JSONDir(config.join_stats_path('current/inverted-publisher')),
'inverted_file': JSONDir(config.join_stats_path('current/inverted-file')),
'inverted_file_publisher': JSONDir(config.join_stats_path('current/inverted-file-publisher')),
'aggregated': JSONDir(filepaths.join_stats_path('current/aggregated')),
'aggregated_file': JSONDir(filepaths.join_stats_path('current/aggregated-file')),
'inverted_publisher': JSONDir(filepaths.join_stats_path('current/inverted-publisher')),
'inverted_file': JSONDir(filepaths.join_stats_path('current/inverted-file')),
'inverted_file_publisher': JSONDir(filepaths.join_stats_path('current/inverted-file-publisher')),
'download_errors': []
}
ckan_publishers = JSONDir(config.join_data_path('ckan_publishers'))
github_issues = JSONDir(config.join_data_path('github/publishers'))
ckan = json.load(open(config.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict)
ckan_publishers = JSONDir(filepaths.join_data_path('ckan_publishers'))
github_issues = JSONDir(filepaths.join_data_path('github/publishers'))
ckan = json.load(open(filepaths.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict)
dataset_to_publisher_dict = {
dataset: publisher
for publisher, publisher_dict in ckan.items()
for dataset in publisher_dict.keys()
}
metadata = json.load(open(config.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict)
with open(config.join_data_path('downloads/errors')) as fp:
metadata = json.load(open(filepaths.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict)
with open(filepaths.join_data_path('downloads/errors')) as fp:
for line in fp:
if line != '.\n':
current_stats['download_errors'].append(line.strip('\n').split(' ', 3))

sources105 = [
config.join_data_path('schemas/1.05/iati-activities-schema.xsd'),
config.join_data_path('schemas/1.05/iati-organisations-schema.xsd')]
filepaths.join_data_path('schemas/1.05/iati-activities-schema.xsd'),
filepaths.join_data_path('schemas/1.05/iati-organisations-schema.xsd')]
sources203 = [
config.join_data_path('schemas/2.03/iati-activities-schema.xsd'),
config.join_data_path('schemas/2.03/iati-organisations-schema.xsd')]
filepaths.join_data_path('schemas/2.03/iati-activities-schema.xsd'),
filepaths.join_data_path('schemas/2.03/iati-organisations-schema.xsd')]
schema105 = xmlschema.XMLSchema(sources105)
schema203 = xmlschema.XMLSchema(sources203)

Expand Down Expand Up @@ -246,7 +246,7 @@ def transform_codelist_mapping_keys(codelist_mapping):

def create_codelist_mapping(major_version):
codelist_mapping = {}
for x in json.load(open(config.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))):
for x in json.load(open(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))):
if 'condition' in x:
pref, attr = x['path'].rsplit('/', 1)
path = '{0}[{1}]/{2}'.format(
Expand All @@ -264,12 +264,12 @@ def create_codelist_mapping(major_version):
# Create a big dictionary of all codelist values by version and codelist name
codelist_sets = {
major_version: {
cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items()
cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items()
} for major_version in MAJOR_VERSIONS}

codelist_lookup = {
major_version: {
cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items()
cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items()
} for major_version in MAJOR_VERSIONS}

# Simple look up to map publisher id to a publishers given name (title)
Expand All @@ -279,11 +279,11 @@ def create_codelist_mapping(major_version):
publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower())

# List of publishers who report all their activities as a secondary publisher
secondary_publishers = [publisher for publisher, stats in JSONDir(config.join_stats_path('current/aggregated-publisher')).items()
secondary_publishers = [publisher for publisher, stats in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).items()
if int(stats['activities']) == len(stats['activities_secondary_reported']) and int(stats['activities']) > 0]

try:
dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(config.join_data_path('dac2012.csv')))}
dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(filepaths.join_data_path('dac2012.csv')))}
except IOError:
dac2012 = {}

Expand Down
14 changes: 5 additions & 9 deletions dashboard/config.py → dashboard/filepaths.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,28 @@

import os.path


STATS_DIRECTORY = "../stats-calculated"
DATA_DIRECTORY = "../data"
BASE_DIRECTORY = "../"
OUT_DIRECTORY = "../out"
import ui.settings


def join_stats_path(p: str) -> str:
"""Make a path to a file or directory within the downloaded stats directory
"""
return os.path.join(STATS_DIRECTORY, p)
return os.path.join(ui.settings.DASHBOARD_STATS_DIRECTORY, p)


def join_data_path(p: str) -> str:
"""Make a path to a file or directory within the downloaded data directory
"""
return os.path.join(DATA_DIRECTORY, p)
return os.path.join(ui.settings.DASHBOARD_DATA_DIRECTORY, p)


def join_base_path(p: str) -> str:
"""Make a path to a file or directory relative to the base of the dashboard directory
"""
return os.path.join(BASE_DIRECTORY, p)
return os.path.join(ui.settings.DASHBOARD_BASE_DIRECTORY, p)


def join_out_path(p: str) -> str:
"""Make a path to a file or directory relative to the base of the out directory
"""
return os.path.join(OUT_DIRECTORY, p)
return os.path.join(ui.settings.DASHBOARD_OUT_DIRECTORY, p)
24 changes: 12 additions & 12 deletions dashboard/make_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import comprehensiveness
import summary_stats
import humanitarian
import config
import filepaths


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -51,10 +51,10 @@ def main():
logger.addHandler(logging.StreamHandler(sys.stdout))

logger.info("Generating CSV files")
os.makedirs(config.join_out_path('data/csv'), exist_ok=True)
os.makedirs(filepaths.join_out_path('data/csv'), exist_ok=True)

logger.info("Generating publishers.csv")
with open(config.join_out_path('data/csv/publishers.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/publishers.csv'), 'w') as fp:
writer = csv.DictWriter(fp, [
'Publisher Name',
'Publisher Registry Id',
Expand All @@ -76,23 +76,23 @@ def main():

logger.info("Generating elements.csv")
publishers = list(data.current_stats['inverted_publisher']['activities'].keys())
with open(config.join_out_path('data/csv/elements.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/elements.csv'), 'w') as fp:
writer = csv.DictWriter(fp, ['Element'] + publishers)
writer.writeheader()
for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items():
publisher_dict['Element'] = element
writer.writerow(publisher_dict)

logger.info("Generating elements_total.csv")
with open(config.join_out_path('data/csv/elements_total.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/elements_total.csv'), 'w') as fp:
writer = csv.DictWriter(fp, ['Element'] + publishers)
writer.writeheader()
for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items():
publisher_dict['Element'] = element
writer.writerow(publisher_dict)

logger.info("Generating registry.csv")
with open(config.join_out_path('data/csv/registry.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/registry.csv'), 'w') as fp:
keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness']
writer = csv.DictWriter(fp, keys)
writer.writeheader()
Expand All @@ -101,29 +101,29 @@ def main():

logger.info("Generating timeliness_frequency.csv")
previous_months = timeliness.previous_months_reversed
with open(config.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp:
writer = csv.writer(fp)
writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published'])
for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted():
writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band])

logger.info("Generating timeliness_timelag.csv")
with open(config.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp:
writer = csv.writer(fp)
writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag'])
for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted():
writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment])

logger.info("Generating forwardlooking.csv")
with open(config.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp:
writer = csv.writer(fp)
writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years])
for row in forwardlooking.table():
writer.writerow([row['publisher_title'], row['publisher']] + [year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years])

for tab in comprehensiveness.columns.keys():
logger.info("Generating comprehensiveness_{}.csv".format(tab))
with open(config.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp:
with open(filepaths.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp:
writer = csv.writer(fp)
if tab == 'financials':
writer.writerow(['Publisher Name', 'Publisher Registry Id'] +
Expand All @@ -145,7 +145,7 @@ def main():
[row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]])

logger.info("Generating summary_stats.csv")
with open(config.join_out_path('data/csv/summary_stats.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/summary_stats.csv'), 'w') as fp:
writer = csv.writer(fp)
# Add column headers
writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns])
Expand All @@ -154,7 +154,7 @@ def main():
writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns])

logger.info("Generating humanitarian.csv")
with open(config.join_out_path('data/csv/humanitarian.csv'), 'w') as fp:
with open(filepaths.join_out_path('data/csv/humanitarian.csv'), 'w') as fp:
writer = csv.writer(fp)
# Add column headers
writer.writerow([
Expand Down
Loading

0 comments on commit ed168b3

Please sign in to comment.