From a64ff901e99216ea43433056a965f83317779fe3 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 5 Nov 2024 14:43:07 +0000 Subject: [PATCH 1/4] feat: Added redirects from previous site structure This commit adds a set of url routes that redirect from the old Dashboard to the new views. It also removes the url route for the coverage page. --- dashboard/ui/urls.py | 47 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index f10c68bb8..08426d668 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -15,7 +15,7 @@ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ from django.contrib import admin -from django.urls import path +from django.urls import path, re_path from django.views.generic.base import RedirectView import ui.views @@ -69,18 +69,49 @@ path('publishing-statistics/comprehensiveness/core', ui.views.pubstats_comprehensiveness_core, name="dash-publishingstats-comprehensiveness-core"), path('publishing-statistics/comprehensiveness/financials', ui.views.pubstats_comprehensiveness_financials, name="dash-publishingstats-comprehensiveness-financials"), path('publishing-statistics/comprehensiveness/value-added', ui.views.pubstats_comprehensiveness_valueadded, name="dash-publishingstats-comprehensiveness-valueadded"), - path('publishing-statistics/coverage', lambda x: None, name="dash-publishingstats-coverage"), path('publishing-statistics/summary-statistics', ui.views.pubstats_summarystats, name="dash-publishingstats-summarystats"), path('publishing-statistics/humanitarian-reporting', ui.views.pubstats_humanitarian, name="dash-publishingstats-humanitarian"), # Registration agencies. path('registration-agencies', ui.views.registration_agencies, name="dash-registrationagencies"), - path("registration_agencies.html", RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True)) + path("registration_agencies.html", RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True)), # Redirects to support any users with bookmarks to pages on the old Dashboard. - # path('timeliness.html', redirect("dash-publishingstats-timeliness")), - # path('index.html', redirect("dash-index")), - # path('summary_stats.html', redirect("dash-publishingstats-summarystats")), - # path('exploring_data.html', redirect("dash-exploringdata")) - + path("index.html", RedirectView.as_view(pattern_name="dash-index", permanent=True)), + path("headlines.html", RedirectView.as_view(pattern_name="dash-headlines", permanent=True)), + path("data_quality.html", RedirectView.as_view(pattern_name="dash-dataquality", permanent=True)), + path("exploring_data.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + path("publishers.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), + path("publishing_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats", permanent=True)), + path("timeliness.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness", permanent=True)), + path("timeliness_timelag.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness-timelag", permanent=True)), + path("forwardlooking.html", RedirectView.as_view(pattern_name="dash-publishingstats-forwardlooking", permanent=True)), + path("comprehensiveness.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness", permanent=True)), + path("comprehensiveness_core.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-core", permanent=True)), + path("comprehensiveness_financials.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-financials", permanent=True)), + path("comprehensiveness_valueadded.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-valueadded", permanent=True)), + path("summary_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats-summarystats", permanent=True)), + path("humanitarian.html", RedirectView.as_view(pattern_name="dash-publishingstats-humanitarian", permanent=True)), + path("files.html", RedirectView.as_view(pattern_name="dash-headlines-files", permanent=True)), + path("activities.html", RedirectView.as_view(pattern_name="dash-headlines-activities", permanent=True)), + path("download.html", RedirectView.as_view(pattern_name="dash-dataquality-download", permanent=True)), + path("xml.html", RedirectView.as_view(pattern_name="dash-dataquality-xml", permanent=True)), + path("validation.html", RedirectView.as_view(pattern_name="dash-dataquality-validation", permanent=True)), + path("versions.html", RedirectView.as_view(pattern_name="dash-dataquality-versions", permanent=True)), + path("organisation.html", RedirectView.as_view(pattern_name="dash-dataquality-organisation", permanent=True)), + path("identifiers.html", RedirectView.as_view(pattern_name="dash-dataquality-identifiers", permanent=True)), + path("reporting_orgs.html", RedirectView.as_view(pattern_name="dash-dataquality-reportingorgs", permanent=True)), + path("elements.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + path("codelists.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True)), + path("booleans.html", RedirectView.as_view(pattern_name="dash-exploringdata-booleans", permanent=True)), + path("dates.html", RedirectView.as_view(pattern_name="dash-exploringdata-dates", permanent=True)), + path("traceability.html", RedirectView.as_view(pattern_name="dash-exploringdata-traceability", permanent=True)), + path("org_ids.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), + path("faq.html", RedirectView.as_view(pattern_name="dash-faq", permanent=True)), + path("licenses.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), + re_path(r"license\/\S*.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), + re_path(r"publisher\/\S*.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), + re_path(r"codelist\/\d\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True)), + re_path(r"element\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + re_path(r"org_type\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), ] From c82b2288253d2b69853e101a8b877adda1d7ce21 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 5 Nov 2024 16:14:50 +0000 Subject: [PATCH 2/4] testing: Added basic automated testing of URL routes Using the Django test framework this commit adds some basic automated testing of the Dashboard URL routes, contains some fixes to the view functions to properly return HTTP 404 errors, and updates the documentation (including a small fix for the file paths). A small fix was also added to pytest.ini. --- README.md | 21 ++++- dashboard/ui/tests.py | 194 ++++++++++++++++++++++++++++++++++++++++++ dashboard/ui/views.py | 16 +++- pytest.ini | 2 +- 4 files changed, 228 insertions(+), 5 deletions(-) create mode 100644 dashboard/ui/tests.py diff --git a/README.md b/README.md index 4592fe562..9dfc217df 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ The IATI Dashboard is mostly written in Python but also has some helper Bash scr 3. Build the static graphs and other data that will be served via the Dashboard. 4. Run the web server. -Paths to different directories are set in `./src/config.py`. +Paths to different directories are set in `./dashboard/config.py`. ### 1. Setup environment @@ -64,7 +64,7 @@ Bash scripts are used to fetch the data that the Dashboard will present. They w ``` mkdir out -cd src +cd dashboard python make_plots.py python make_csv.py python speakers_kit.py @@ -75,7 +75,7 @@ cp ../img/publishers static/ ### 4. Run the webserver. -From `./src/`: +From `./dashboard/`: ``` python manage.py runserver @@ -86,6 +86,21 @@ The Dashboard will now be accessible from `localhost:8000/`. ## Development +### Automated tests +There are some unit tests written using `pytest` and site testing using Django's own testing framework. + +Once the development dependencies have been installed the unit tests can be run with: + +``` +pytest +``` + +The Django site tests can be run from the `dashboard/` directory with: + +``` +python manage.py test -v 2 +``` + ### Calculating your own statistics The IATI Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above. This can also be calculated using [Code for IATI Stats](http://github.com/codeforIATI/IATI-Stats) where `stats-calculated` corresponds to the `gitout` directory generated by [`git.sh` in IATI-Stats](https://github.com/codeforIATI/IATI-Stats#running-for-every-commit-in-the-data-directory). diff --git a/dashboard/ui/tests.py b/dashboard/ui/tests.py new file mode 100644 index 000000000..11d097084 --- /dev/null +++ b/dashboard/ui/tests.py @@ -0,0 +1,194 @@ +from django.test import TestCase +from django.urls import reverse + + +class BasicPageTests(TestCase): + """Perform basic HTTP 200/404 checks on the Dashboard pages + + These are split up into a number of functions because some can + take some time to run and so running with the "-v 2" flag will + list the tests as they run. + """ + + def test_top_pages(self): + """Test the index and top hierarchy pages return a 200 status code """ + + self.assertEqual(self.client.get(reverse("dash-index")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-faq")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-registrationagencies")).status_code, 200) + + def test_headlines(self): + """Test the headlines pages """ + + self.assertEqual(self.client.get(reverse("dash-headlines-files")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-activities")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-publishers")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-publisher-detail", args=("undp", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-publisher-detail", args=("not-a-valid-publisher", ))).status_code, 404) + + def test_dataquality(self): + """Test the data quality pages""" + + self.assertEqual(self.client.get(reverse("dash-dataquality-download")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-download-json")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-xml")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-validation")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-versions")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-organisation")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-identifiers")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-reportingorgs")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-licenses")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-licenses-detail", args=("cc-by", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-licenses-detail", args=("not-a-valid-license", ))).status_code, 404) + + def test_publishingstats_timeliness(self): + """Test timeliness pages in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-timeliness")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-timeliness-timelag")).status_code, 200) + + def test_publishingstats_comprehensiveness(self): + """Test comprehensiveness pages in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-core")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-financials")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-valueadded")).status_code, 200) + + def test_publishingstats_forwardlooking(self): + """Test the forward looking page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-forwardlooking")).status_code, 200) + + def test_publishingstats_summarystats(self): + """Test the summary statistics page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-summarystats")).status_code, 200) + + def test_publishingstats_humanitarian(self): + """Test the humanitarian page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-humanitarian")).status_code, 200) + + def test_exploringdata(self): + """Test the exploring data pages""" + self.assertEqual(self.client.get(reverse("dash-exploringdata-booleans")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("2", "budget_@type", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("2", "not-a-valid-slug", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("3", "budget_@type", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-exploringdata-dates")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-elements")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-elements-detail", args=("iati-activity_activity-date_narrative", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-elements-detail", args=("not-a-valid-element", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-exploringdata-orgids")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("funding_org", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("not-a-valid-org-type", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-exploringdata-traceability")).status_code, 200) + + +class OriginalDashboardRedirectTests(TestCase): + """Perform basic HTTP 301 redirection checks on the Dashboard pages + + These are split up into a number of functions because some can + take some time to run and so running with the "-v 2" flag will + list the tests as they run. + """ + + def _url_and_view_helper(self, urls_and_views_to_check): + """Checks that a set of URLs redirect to matching view functions""" + + for url, view_name in urls_and_views_to_check.items(): + self.assertRedirects(self.client.get(f"/{url}.html"), reverse(view_name), status_code=301) + + def test_headlines_and_misc(self): + """Test headlines and miscellaneous pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper({ + "index": "dash-index", + "headlines": "dash-headlines", + "files": "dash-headlines-files", + "activities": "dash-headlines-activities", + "publishers": "dash-headlines-publishers", + "faq": "dash-faq", + "registration_agencies": "dash-registrationagencies" + }) + + def test_dataquality(self): + """Test data quality pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper({ + "data_quality": "dash-dataquality", + "download": "dash-dataquality-download", + "xml": "dash-dataquality-xml", + "validation": "dash-dataquality-validation", + "versions": "dash-dataquality-versions", + "organisation": "dash-dataquality-organisation", + "identifiers": "dash-dataquality-identifiers", + "reporting_orgs": "dash-dataquality-reportingorgs", + "licenses": "dash-dataquality-licenses" + }) + + def test_publishingstats(self): + """Test publishing stats pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper({ + "publishing_stats": "dash-publishingstats", + "timeliness": "dash-publishingstats-timeliness", + "timeliness_timelag": "dash-publishingstats-timeliness-timelag", + "forwardlooking": "dash-publishingstats-forwardlooking", + "comprehensiveness": "dash-publishingstats-comprehensiveness", + "comprehensiveness_core": "dash-publishingstats-comprehensiveness-core", + "comprehensiveness_financials": "dash-publishingstats-comprehensiveness-financials", + "comprehensiveness_valueadded": "dash-publishingstats-comprehensiveness-valueadded", + "summary_stats": "dash-publishingstats-summarystats", + "humanitarian": "dash-publishingstats-humanitarian" + }) + + def test_exploringdata(self): + """Test exploring data pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper({ + "exploring_data": "dash-exploringdata-elements", + "elements": "dash-exploringdata-elements", + "codelists": "dash-exploringdata-codelists", + "booleans": "dash-exploringdata-booleans", + "dates": "dash-exploringdata-dates", + "traceability": "dash-exploringdata-traceability", + "org_ids": "dash-exploringdata-orgids" + }) + + def test_slug_page_redirects(self): + """Test pages with slugs redirect to the section page""" + + self.assertRedirects(self.client.get(r"/publisher/undp.html"), + reverse("dash-headlines-publishers"), + status_code=301) + self.assertRedirects(self.client.get(r"/license/cc-by.html"), + reverse("dash-dataquality-licenses"), + status_code=301) + self.assertRedirects(self.client.get(r"/codelist/2/budget_@type.html"), + reverse("dash-exploringdata-codelists"), + status_code=301) + self.assertRedirects(self.client.get(r"/element/iati-activity_activity-date_narrative.html"), + reverse("dash-exploringdata-elements"), + status_code=301) + self.assertRedirects(self.client.get(r"/org_type/funding_org.html"), + reverse("dash-exploringdata-orgids"), + status_code=301) diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index dc2e81b26..9a013a900 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -321,6 +321,9 @@ def dataquality_licenses(request): def dataquality_licenses_detail(request, license_id=None): template = loader.get_template("license.html") + if license_id not in LICENSE_URLS: + raise Http404("Unknown license") + publishers = [ publisher_name for publisher_name, publisher in ckan.items() @@ -365,6 +368,10 @@ def exploringdata_elements(request): def exploringdata_element_detail(request, element=None): template = loader.get_template("element.html") context = _make_context("elements") + + if element not in slugs['element']['by_slug']: + raise Http404("Unknown element or attribute") + i = slugs['element']['by_slug'][element] context["element"] = list(current_stats['inverted_publisher']['elements'])[i] context["publishers"] = list(current_stats['inverted_publisher']['elements'].values())[i] @@ -378,7 +385,9 @@ def exploringdata_orgids(request): def exploringdata_orgtypes_detail(request, org_type=None): - assert org_type in slugs['org_type']['by_slug'] + if org_type not in slugs['org_type']['by_slug']: + raise Http404("Unknown organisation type") + template = loader.get_template("org_type.html") context = _make_context("org_ids") context["slug"] = org_type @@ -393,6 +402,11 @@ def exploringdata_codelists(request): def exploringdata_codelists_detail(request, major_version=None, attribute=None): template = loader.get_template("codelist.html") + if major_version not in slugs['codelist']: + raise Http404("Unknown major version of the IATI standard") + if attribute not in slugs['codelist'][major_version]['by_slug']: + raise Http404("Unknown attribute") + context = _make_context("codelists") i = slugs['codelist'][major_version]['by_slug'][attribute] element = list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version])[i] diff --git a/pytest.ini b/pytest.ini index cebc5c7e2..20843432d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] -testpaths = src/tests +testpaths = dashboard/tests norecursedirs = *__pycache__* *.pytest_cache* console_output_style = count From a43e003bfef9363ad487081ee667f9c6dbc6ac0a Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 5 Nov 2024 16:32:10 +0000 Subject: [PATCH 3/4] refactor: Moved file path constants into ui settings module This commit moves the file path constants from config.py into the ui settings module so that it is more consistent with Django's settings architecture. --- dashboard/config.py | 14 +++++--------- dashboard/ui/settings.py | 8 ++++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/dashboard/config.py b/dashboard/config.py index 0fd3c199f..49570b219 100644 --- a/dashboard/config.py +++ b/dashboard/config.py @@ -6,32 +6,28 @@ import os.path - -STATS_DIRECTORY = "../stats-calculated" -DATA_DIRECTORY = "../data" -BASE_DIRECTORY = "../" -OUT_DIRECTORY = "../out" +import ui.settings def join_stats_path(p: str) -> str: """Make a path to a file or directory within the downloaded stats directory """ - return os.path.join(STATS_DIRECTORY, p) + return os.path.join(ui.settings.DASHBOARD_STATS_DIRECTORY, p) def join_data_path(p: str) -> str: """Make a path to a file or directory within the downloaded data directory """ - return os.path.join(DATA_DIRECTORY, p) + return os.path.join(ui.settings.DASHBOARD_DATA_DIRECTORY, p) def join_base_path(p: str) -> str: """Make a path to a file or directory relative to the base of the dashboard directory """ - return os.path.join(BASE_DIRECTORY, p) + return os.path.join(ui.settings.DASHBOARD_BASE_DIRECTORY, p) def join_out_path(p: str) -> str: """Make a path to a file or directory relative to the base of the out directory """ - return os.path.join(OUT_DIRECTORY, p) + return os.path.join(ui.settings.DASHBOARD_OUT_DIRECTORY, p) diff --git a/dashboard/ui/settings.py b/dashboard/ui/settings.py index d8008da83..c0461c3d1 100644 --- a/dashboard/ui/settings.py +++ b/dashboard/ui/settings.py @@ -136,3 +136,11 @@ # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + +# +# Relative (to dashboard/) paths to IATI data and output directories. +# +DASHBOARD_STATS_DIRECTORY = "../stats-calculated" +DASHBOARD_DATA_DIRECTORY = "../data" +DASHBOARD_BASE_DIRECTORY = "../" +DASHBOARD_OUT_DIRECTORY = "../out" From bbdd8a51afac6d86b5001e00a2569d292721d46c Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 6 Nov 2024 23:17:55 +0000 Subject: [PATCH 4/4] refactor: Moved file path functions into a more sensibly named module After moving configuration constants into settings.py the config.py module was somewhat poorly named, this commit changes the name and refactors the other code to use the new name. --- dashboard/common.py | 4 +-- dashboard/coverage.py | 4 +-- dashboard/data.py | 46 +++++++++++++-------------- dashboard/{config.py => filepaths.py} | 0 dashboard/make_csv.py | 24 +++++++------- dashboard/make_plots.py | 20 ++++++------ dashboard/speakers_kit.py | 14 ++++---- dashboard/timeliness.py | 8 ++--- dashboard/ui/views.py | 13 +++----- 9 files changed, 64 insertions(+), 69 deletions(-) rename dashboard/{config.py => filepaths.py} (100%) diff --git a/dashboard/common.py b/dashboard/common.py index a7922798b..41f773b8c 100644 --- a/dashboard/common.py +++ b/dashboard/common.py @@ -2,10 +2,10 @@ import data import json -import config +import filepaths # Import organisation_type_codelist as a global, then delete when used to save memory -with open(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh: +with open(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh: organisation_type_codelist = json.load(fh) organisation_type_dict = {c['code']: c['name'] for c in organisation_type_codelist['data']} del organisation_type_codelist diff --git a/dashboard/coverage.py b/dashboard/coverage.py index 35ae7549e..58885685a 100644 --- a/dashboard/coverage.py +++ b/dashboard/coverage.py @@ -6,7 +6,7 @@ from data import publisher_name from data import publishers_ordered_by_title from data import secondary_publishers -import config +import filepaths def is_number(s): @@ -180,7 +180,7 @@ def table(): # Compile a list of Development finance institutions (DFIs) -with open(config.join_base_path('dfi_publishers.csv'), 'r') as csv_file: +with open(filepaths.join_base_path('dfi_publishers.csv'), 'r') as csv_file: reader = csv.reader(csv_file, delimiter=',') dfi_publishers = [] for line in reader: diff --git a/dashboard/data.py b/dashboard/data.py index ba5967312..01edc56ff 100644 --- a/dashboard/data.py +++ b/dashboard/data.py @@ -8,7 +8,7 @@ import xmlschema -import config +import filepaths # Modified from: @@ -120,7 +120,7 @@ def get_publisher_name(self): # Loop over this list and return the publisher name if it is found within the historic list of publishers for x in path_components: - if x in JSONDir(config.join_stats_path('current/aggregated-publisher')).keys(): + if x in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).keys(): return x # If got to the end of the loop and nothing found, this folder does not relate to a single publisher @@ -133,7 +133,7 @@ def get_publisher_stats(publisher, stats_type='aggregated'): is not found. """ try: - return JSONDir(config.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher))) + return JSONDir(filepaths.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher))) except IOError: return {} @@ -145,7 +145,7 @@ def get_registry_id_matches(): """ # Load registry IDs for publishers who have changed their registry ID - with open(config.join_base_path('registry_id_relationships.csv')) as f: + with open(filepaths.join_base_path('registry_id_relationships.csv')) as f: reader = csv.DictReader(f) # Load this data into a dictonary registry_matches = { @@ -184,33 +184,33 @@ def deep_merge(obj1, obj2): current_stats = { - 'aggregated': JSONDir(config.join_stats_path('current/aggregated')), - 'aggregated_file': JSONDir(config.join_stats_path('current/aggregated-file')), - 'inverted_publisher': JSONDir(config.join_stats_path('current/inverted-publisher')), - 'inverted_file': JSONDir(config.join_stats_path('current/inverted-file')), - 'inverted_file_publisher': JSONDir(config.join_stats_path('current/inverted-file-publisher')), + 'aggregated': JSONDir(filepaths.join_stats_path('current/aggregated')), + 'aggregated_file': JSONDir(filepaths.join_stats_path('current/aggregated-file')), + 'inverted_publisher': JSONDir(filepaths.join_stats_path('current/inverted-publisher')), + 'inverted_file': JSONDir(filepaths.join_stats_path('current/inverted-file')), + 'inverted_file_publisher': JSONDir(filepaths.join_stats_path('current/inverted-file-publisher')), 'download_errors': [] } -ckan_publishers = JSONDir(config.join_data_path('ckan_publishers')) -github_issues = JSONDir(config.join_data_path('github/publishers')) -ckan = json.load(open(config.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict) +ckan_publishers = JSONDir(filepaths.join_data_path('ckan_publishers')) +github_issues = JSONDir(filepaths.join_data_path('github/publishers')) +ckan = json.load(open(filepaths.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict) dataset_to_publisher_dict = { dataset: publisher for publisher, publisher_dict in ckan.items() for dataset in publisher_dict.keys() } -metadata = json.load(open(config.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict) -with open(config.join_data_path('downloads/errors')) as fp: +metadata = json.load(open(filepaths.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict) +with open(filepaths.join_data_path('downloads/errors')) as fp: for line in fp: if line != '.\n': current_stats['download_errors'].append(line.strip('\n').split(' ', 3)) sources105 = [ - config.join_data_path('schemas/1.05/iati-activities-schema.xsd'), - config.join_data_path('schemas/1.05/iati-organisations-schema.xsd')] + filepaths.join_data_path('schemas/1.05/iati-activities-schema.xsd'), + filepaths.join_data_path('schemas/1.05/iati-organisations-schema.xsd')] sources203 = [ - config.join_data_path('schemas/2.03/iati-activities-schema.xsd'), - config.join_data_path('schemas/2.03/iati-organisations-schema.xsd')] + filepaths.join_data_path('schemas/2.03/iati-activities-schema.xsd'), + filepaths.join_data_path('schemas/2.03/iati-organisations-schema.xsd')] schema105 = xmlschema.XMLSchema(sources105) schema203 = xmlschema.XMLSchema(sources203) @@ -246,7 +246,7 @@ def transform_codelist_mapping_keys(codelist_mapping): def create_codelist_mapping(major_version): codelist_mapping = {} - for x in json.load(open(config.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))): + for x in json.load(open(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))): if 'condition' in x: pref, attr = x['path'].rsplit('/', 1) path = '{0}[{1}]/{2}'.format( @@ -264,12 +264,12 @@ def create_codelist_mapping(major_version): # Create a big dictionary of all codelist values by version and codelist name codelist_sets = { major_version: { - cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() + cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() } for major_version in MAJOR_VERSIONS} codelist_lookup = { major_version: { - cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() + cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() } for major_version in MAJOR_VERSIONS} # Simple look up to map publisher id to a publishers given name (title) @@ -279,11 +279,11 @@ def create_codelist_mapping(major_version): publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower()) # List of publishers who report all their activities as a secondary publisher -secondary_publishers = [publisher for publisher, stats in JSONDir(config.join_stats_path('current/aggregated-publisher')).items() +secondary_publishers = [publisher for publisher, stats in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).items() if int(stats['activities']) == len(stats['activities_secondary_reported']) and int(stats['activities']) > 0] try: - dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(config.join_data_path('dac2012.csv')))} + dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(filepaths.join_data_path('dac2012.csv')))} except IOError: dac2012 = {} diff --git a/dashboard/config.py b/dashboard/filepaths.py similarity index 100% rename from dashboard/config.py rename to dashboard/filepaths.py diff --git a/dashboard/make_csv.py b/dashboard/make_csv.py index f34e2f9e3..74e076efe 100644 --- a/dashboard/make_csv.py +++ b/dashboard/make_csv.py @@ -12,7 +12,7 @@ import comprehensiveness import summary_stats import humanitarian -import config +import filepaths logger = logging.getLogger(__name__) @@ -51,10 +51,10 @@ def main(): logger.addHandler(logging.StreamHandler(sys.stdout)) logger.info("Generating CSV files") - os.makedirs(config.join_out_path('data/csv'), exist_ok=True) + os.makedirs(filepaths.join_out_path('data/csv'), exist_ok=True) logger.info("Generating publishers.csv") - with open(config.join_out_path('data/csv/publishers.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/publishers.csv'), 'w') as fp: writer = csv.DictWriter(fp, [ 'Publisher Name', 'Publisher Registry Id', @@ -76,7 +76,7 @@ def main(): logger.info("Generating elements.csv") publishers = list(data.current_stats['inverted_publisher']['activities'].keys()) - with open(config.join_out_path('data/csv/elements.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/elements.csv'), 'w') as fp: writer = csv.DictWriter(fp, ['Element'] + publishers) writer.writeheader() for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items(): @@ -84,7 +84,7 @@ def main(): writer.writerow(publisher_dict) logger.info("Generating elements_total.csv") - with open(config.join_out_path('data/csv/elements_total.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/elements_total.csv'), 'w') as fp: writer = csv.DictWriter(fp, ['Element'] + publishers) writer.writeheader() for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items(): @@ -92,7 +92,7 @@ def main(): writer.writerow(publisher_dict) logger.info("Generating registry.csv") - with open(config.join_out_path('data/csv/registry.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/registry.csv'), 'w') as fp: keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness'] writer = csv.DictWriter(fp, keys) writer.writeheader() @@ -101,21 +101,21 @@ def main(): logger.info("Generating timeliness_frequency.csv") previous_months = timeliness.previous_months_reversed - with open(config.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published']) for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted(): writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band]) logger.info("Generating timeliness_timelag.csv") - with open(config.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag']) for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted(): writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) logger.info("Generating forwardlooking.csv") - with open(config.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) for row in forwardlooking.table(): @@ -123,7 +123,7 @@ def main(): for tab in comprehensiveness.columns.keys(): logger.info("Generating comprehensiveness_{}.csv".format(tab)) - with open(config.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp: + with open(filepaths.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp: writer = csv.writer(fp) if tab == 'financials': writer.writerow(['Publisher Name', 'Publisher Registry Id'] + @@ -145,7 +145,7 @@ def main(): [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]]) logger.info("Generating summary_stats.csv") - with open(config.join_out_path('data/csv/summary_stats.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/summary_stats.csv'), 'w') as fp: writer = csv.writer(fp) # Add column headers writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) @@ -154,7 +154,7 @@ def main(): writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) logger.info("Generating humanitarian.csv") - with open(config.join_out_path('data/csv/humanitarian.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/humanitarian.csv'), 'w') as fp: writer = csv.writer(fp) # Add column headers writer.writerow([ diff --git a/dashboard/make_plots.py b/dashboard/make_plots.py index 70d266597..cdbe27640 100644 --- a/dashboard/make_plots.py +++ b/dashboard/make_plots.py @@ -13,7 +13,7 @@ from tqdm import tqdm import common import data -import config +import filepaths from vars import expected_versions # noqa: F401 import matplotlib as mpl mpl.use('Agg') @@ -96,7 +96,7 @@ def make_plot(stat_path, git_stats, img_prefix=''): else: fig_legend.legend(plots.values(), plots.keys(), loc='center', ncol=4) fig_legend.set_size_inches(600.0 / dpi, 100.0 / dpi) - fig_legend.savefig(config.join_out_path('{0}{1}{2}_legend.png'.format(img_prefix, stat_name, stat_path[2]))) + fig_legend.savefig(filepaths.join_out_path('{0}{1}{2}_legend.png'.format(img_prefix, stat_name, stat_path[2]))) else: keys = None ax.plot(x_values, y_values) @@ -123,10 +123,10 @@ def make_plot(stat_path, git_stats, img_prefix=''): ax.ticklabel_format(axis='y', style='plain', useOffset=False) - fig.savefig(config.join_out_path('{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else '')), dpi=dpi) + fig.savefig(filepaths.join_out_path('{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else '')), dpi=dpi) plt.close('all') - fn = config.join_out_path('{0}{1}.csv'.format(img_prefix, stat_name)) + fn = filepaths.join_out_path('{0}{1}.csv'.format(img_prefix, stat_name)) with open(fn, 'w') as fp: writer = csv.writer(fp) if keys: @@ -148,15 +148,15 @@ def main(): args = parser.parse_args() # Load data required for loading stats. - failed_downloads = csv.reader(open(config.join_data_path('downloads/history.csv'))) - gitaggregate_publisher = data.JSONDir(config.join_stats_path('gitaggregate-publisher-dated')) + failed_downloads = csv.reader(open(filepaths.join_data_path('downloads/history.csv'))) + gitaggregate_publisher = data.JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')) # Generate plots for aggregated stats for all data. logger.info("Generating plots for all aggregated data") - git_stats = AugmentedJSONDir(config.join_stats_path('gitaggregate-dated'), + git_stats = AugmentedJSONDir(filepaths.join_stats_path('gitaggregate-dated'), failed_downloads, gitaggregate_publisher) - os.makedirs(config.join_out_path('img/aggregate'), exist_ok=True) + os.makedirs(filepaths.join_out_path('img/aggregate'), exist_ok=True) _paths = [ 'activities', @@ -192,10 +192,10 @@ def main(): # Generate plots for each publisher. logger.info("Generating plots for all publishers") - git_stats_publishers = AugmentedJSONDir(config.join_stats_path('gitaggregate-publisher-dated/'), + git_stats_publishers = AugmentedJSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated/'), failed_downloads, gitaggregate_publisher) - os.makedirs(config.join_out_path('img/publishers'), exist_ok=True) + os.makedirs(filepaths.join_out_path('img/publishers'), exist_ok=True) with tqdm(total=len(git_stats_publishers)) as pbar: if args.verbose: diff --git a/dashboard/speakers_kit.py b/dashboard/speakers_kit.py index b6e517d5f..0fb9b4bde 100644 --- a/dashboard/speakers_kit.py +++ b/dashboard/speakers_kit.py @@ -4,7 +4,7 @@ from collections import defaultdict from itertools import zip_longest -import config +import filepaths def codelist_dict(codelist_path): @@ -12,11 +12,11 @@ def codelist_dict(codelist_path): return {c['code']: c['name'] for c in codelist_json['data']} -organisation_type_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) -country_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/Country.json')) -region_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/Region.json')) +organisation_type_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) +country_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/Country.json')) +region_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/Region.json')) -aggregated_publisher = data.JSONDir(config.join_stats_path('current/aggregated-publisher/')) +aggregated_publisher = data.JSONDir(filepaths.join_stats_path('current/aggregated-publisher/')) activities_by = defaultdict(lambda: defaultdict(int)) publishers_by = defaultdict(lambda: defaultdict(int)) @@ -48,7 +48,7 @@ def codelist_dict(codelist_path): fieldnames = ['publisher_type', 'publishers_by_type', '', 'publisher_country', 'publishers_by_country', '', 'date', 'publishers_quarterly', '', 'activity_country', 'activities_by_country', '', 'activity_region', 'activities_by_region'] publishers_quarterly = [] -publishers_by_date = json.load(open(config.join_stats_path('gitaggregate-dated/publishers.json'))) +publishers_by_date = json.load(open(filepaths.join_stats_path('gitaggregate-dated/publishers.json'))) for date, publishers in sorted(publishers_by_date.items()): if (date[8:10] == '30' and date[5:7] in ['06', '09']) or (date[8:10] == '31' and date[5:7] in ['03', '12']): publishers_quarterly.append((date, publishers)) @@ -58,7 +58,7 @@ def sort_second(x): return sorted(x, key=lambda y: y[1], reverse=True) -with open(config.join_out_path('speakers_kit.csv'), 'w') as fp: +with open(filepaths.join_out_path('speakers_kit.csv'), 'w') as fp: writer = csv.DictWriter(fp, fieldnames) writer.writeheader() for publishers_by_type, publishers_by_country, publishers_quarterly_, activities_by_country, activities_by_region in zip_longest( diff --git a/dashboard/timeliness.py b/dashboard/timeliness.py index 96f8f0079..d1074ceca 100644 --- a/dashboard/timeliness.py +++ b/dashboard/timeliness.py @@ -4,7 +4,7 @@ import datetime from dateutil.relativedelta import relativedelta from collections import defaultdict, Counter -import config +import filepaths def short_month(month_str): @@ -59,7 +59,7 @@ def publisher_frequency(): """ # Load all the data from 'gitaggregate-publisher-dated' into memory - gitaggregate_publisher = JSONDir(config.join_stats_path('gitaggregate-publisher-dated')) + gitaggregate_publisher = JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')) # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' for publisher, agg in gitaggregate_publisher.items(): @@ -172,7 +172,7 @@ def first_published_band_index(first_published_band): def publisher_timelag(): - return [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir(config.join_stats_path('current/aggregated-publisher')).items()] + return [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).items()] def publisher_timelag_sorted(): @@ -205,7 +205,7 @@ def has_future_transactions(publisher): if transaction_date and transaction_date > datetime.date.today(): return 2 - gitaggregate_publisher = JSONDir(config.join_stats_path('gitaggregate-publisher-dated')).get(publisher, {}) + gitaggregate_publisher = JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')).get(publisher, {}) mindate = datetime.date(today.year - 1, today.month, 1) for date, activity_blacklist in gitaggregate_publisher.get('activities_with_future_transactions', {}).items(): if parse_iso_date(date) >= mindate and activity_blacklist: diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 9a013a900..7cc3dd7ee 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -1,10 +1,5 @@ """Views for the IATI Dashboard""" -# Note: in the page views I am unsure where "rulesets" and "registration_agencies" should -# belong - they exist in text.page_tiles but I can't find the route to these in make_html.py -# so not sure where they should fit. I've not included them in the page_view_names so hopefully -# an exception will be raised if they are referenced somewhere. - import datetime import dateutil.parser import subprocess @@ -15,7 +10,7 @@ from django.template import loader import comprehensiveness -import config +import filepaths import forwardlooking import humanitarian import text @@ -43,15 +38,15 @@ COMMIT_HASH = subprocess.run('git show --format=%H --no-patch'.split(), - cwd=config.join_base_path(""), + cwd=filepaths.join_base_path(""), capture_output=True).stdout.decode().strip() STATS_COMMIT_HASH = subprocess.run('git -C stats-calculated show --format=%H --no-patch'.split(), - cwd=config.join_base_path(""), + cwd=filepaths.join_base_path(""), capture_output=True).stdout.decode().strip() STATS_GH_URL = 'https://github.com/codeforIATI/IATI-Stats-public/tree/' + STATS_COMMIT_HASH # Load all the licenses and generate data for each licence and publisher. -with open(config.join_stats_path('licenses.json')) as handler: +with open(filepaths.join_stats_path('licenses.json')) as handler: LICENSE_URLS = json.load(handler) LICENSES = [