From 3460ae57864f2fdaa1b5c83764728ad5fc5ff298 Mon Sep 17 00:00:00 2001 From: Kevin Ngai Date: Fri, 28 Jun 2024 16:06:16 +0000 Subject: [PATCH] Order query results of get-bps-metadata.sh - Add pypandoc as requirements-dev.txt --- migration/bps/get-bps-metadata.sh | 14 +++++++------- requirements-dev.txt | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) mode change 100644 => 100755 migration/bps/get-bps-metadata.sh diff --git a/migration/bps/get-bps-metadata.sh b/migration/bps/get-bps-metadata.sh old mode 100644 new mode 100755 index f22a636b..62a43fd8 --- a/migration/bps/get-bps-metadata.sh +++ b/migration/bps/get-bps-metadata.sh @@ -72,19 +72,19 @@ fi PROJECTS_QUERY="SELECT DISTINCT(project.project_acronym) AS project_id FROM project" -DATASETS_QUERY="SELECT data_category AS dataset_id, data_class, data_level FROM dataset_type_definition" +DATASETS_QUERY="SELECT data_category AS dataset_id, data_class, data_level FROM dataset_type_definition ORDER BY data_category, data_level" -CONTRIBUTORS_QUERY="SELECT agency.agency_name AS name, agency.acronym AS acronym, country.country_code AS country_id, project.project_acronym AS project_id, country.wmo_region AS wmo_region_id, agency.url, REPLACE(email.email_address, ',', ';') AS email, agency.ftpdir AS ftp_username, DATE(agency.eff_start_datetime) AS start_date, DATE(agency.eff_end_datetime) AS end_date, ST_X(agency.the_geom) AS x, ST_Y(agency.the_geom) AS y FROM agency JOIN country USING (country_id) JOIN email USING (email_id) JOIN project USING (project_id)" +CONTRIBUTORS_QUERY="SELECT agency.agency_name AS name, agency.acronym AS acronym, country.country_code AS country_id, project.project_acronym AS project_id, country.wmo_region AS wmo_region_id, agency.url, REPLACE(email.email_address, ',', ';') AS email, agency.ftpdir AS ftp_username, DATE(agency.eff_start_datetime) AS start_date, DATE(agency.eff_end_datetime) AS end_date, ST_X(agency.the_geom) AS x, ST_Y(agency.the_geom) AS y FROM agency JOIN country USING (country_id) JOIN email USING (email_id) JOIN project USING (project_id) ORDER BY name, acronym, country_id" -STATIONS_QUERY="SELECT DISTINCT ON (station_id) platform.woudc_platform_identifier AS station_id, platform.platform_name AS station_name, platform_type AS station_type, gaw.gaw_platform_identifier AS gaw_id, country.country_code AS country_id, country.wmo_region AS wmo_region_id, DATE(platform.eff_start_datetime) AS start_date, DATE(platform.eff_end_datetime) AS end_date, ST_X(gaw.the_geom) AS x, ST_Y(gaw.the_geom) AS y, ST_Z(gaw.the_geom) AS z FROM platform JOIN platform_type_definition USING (platform_type_id) JOIN agency USING (agency_id) JOIN country ON platform.country_id = country.country_id JOIN platform_gaw_properties gaw ON platform.platform_id = gaw.platform_id" +STATIONS_QUERY="SELECT DISTINCT ON (station_id) platform.woudc_platform_identifier AS station_id, platform.platform_name AS station_name, platform_type AS station_type, gaw.gaw_platform_identifier AS gaw_id, country.country_code AS country_id, country.wmo_region AS wmo_region_id, DATE(platform.eff_start_datetime) AS start_date, DATE(platform.eff_end_datetime) AS end_date, ST_X(gaw.the_geom) AS x, ST_Y(gaw.the_geom) AS y, ST_Z(gaw.the_geom) AS z FROM platform JOIN platform_type_definition USING (platform_type_id) JOIN agency USING (agency_id) JOIN country ON platform.country_id = country.country_id JOIN platform_gaw_properties gaw ON platform.platform_id = gaw.platform_id ORDER BY station_id" -STATION_NAMES_QUERY="(SELECT DISTINCT woudc_platform_identifier AS station_id, data_payload.platform_name AS name FROM data_payload FULL JOIN platform ON data_payload.platform_id = platform.platform_id WHERE data_payload.platform_name IS NOT NULL) UNION (SELECT DISTINCT woudc_platform_identifier AS station_id, platform_name AS name FROM platform) ORDER BY station_id" +STATION_NAMES_QUERY="(SELECT DISTINCT woudc_platform_identifier AS station_id, data_payload.platform_name AS name FROM data_payload FULL JOIN platform ON data_payload.platform_id = platform.platform_id WHERE data_payload.platform_name IS NOT NULL) UNION (SELECT DISTINCT woudc_platform_identifier AS station_id, platform_name AS name FROM platform) ORDER BY station_id, name" -INSTRUMENTS_QUERY="SELECT platform.woudc_platform_identifier AS station_id, dtd.data_category AS dataset_id, dtd.data_level AS data_level, itd.instrument_type AS name, im.instrument_model AS model, instrument.instrument_serial_number AS serial, agency.acronym AS contributor, project_acronym AS project, DATE(instrument.eff_start_datetime) AS start_date, DATE(instrument.eff_end_datetime) AS end_date, ST_X(instrument.the_geom) AS x, ST_Y(instrument.the_geom) AS y, ST_Z(instrument.the_geom) AS z FROM instrument JOIN platform USING (platform_id) JOIN agency USING (agency_id) JOIN project USING (project_id) JOIN dataset_type_definition dtd USING (dataset_type_id) JOIN instrument_model im USING (instrument_model_id) JOIN instrument_type_definition itd USING (instrument_type_id)" +INSTRUMENTS_QUERY="SELECT platform.woudc_platform_identifier AS station_id, dtd.data_category AS dataset_id, dtd.data_level AS data_level, itd.instrument_type AS name, im.instrument_model AS model, instrument.instrument_serial_number AS serial, agency.acronym AS contributor, project_acronym AS project, DATE(instrument.eff_start_datetime) AS start_date, DATE(instrument.eff_end_datetime) AS end_date, ST_X(instrument.the_geom) AS x, ST_Y(instrument.the_geom) AS y, ST_Z(instrument.the_geom) AS z FROM instrument JOIN platform USING (platform_id) JOIN agency USING (agency_id) JOIN project USING (project_id) JOIN dataset_type_definition dtd USING (dataset_type_id) JOIN instrument_model im USING (instrument_model_id) JOIN instrument_type_definition itd USING (instrument_type_id) ORDER BY station_id, dataset_id, data_level, name, model, serial" -DEPLOYMENTS_QUERY="SELECT platform.woudc_platform_identifier AS station_id, CONCAT(agency.acronym, ':', project.project_acronym) AS contributor_id, DATE(platform.eff_start_datetime) AS start_date, DATE(platform.eff_end_datetime) AS end_date FROM agency JOIN platform USING (agency_id) JOIN project USING (project_id)" +DEPLOYMENTS_QUERY="SELECT platform.woudc_platform_identifier AS station_id, CONCAT(agency.acronym, ':', project.project_acronym) AS contributor_id, DATE(platform.eff_start_datetime) AS start_date, DATE(platform.eff_end_datetime) AS end_date FROM agency JOIN platform USING (agency_id) JOIN project USING (project_id) ORDER BY station_id, contributor_id" -NOTIFICATIONS_QUERY="SELECT title_en, title_fr, description_en, description_fr, tags_en, tags_fr, published, banner, visible, ST_X(the_geom) AS x, ST_Y(the_geom) AS y FROM notifications" +NOTIFICATIONS_QUERY="SELECT title_en, title_fr, description_en, description_fr, tags_en, tags_fr, published, banner, visible, ST_X(the_geom) AS x, ST_Y(the_geom) AS y FROM notifications ORDER BY published" echo "Extracting metadata from woudc-archive" export PGPASSWORD=$WOUDC_ARCHIVE_PASSWORD diff --git a/requirements-dev.txt b/requirements-dev.txt index 04f09f62..a90ae266 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ +pypandoc alembic coverage flake8