Skip to content

Commit

Permalink
update kobo survey retrieval according to new API version
Browse files Browse the repository at this point in the history
  • Loading branch information
langbart committed Oct 13, 2024
1 parent a41d5ff commit 7d3c21b
Show file tree
Hide file tree
Showing 33 changed files with 249 additions and 721 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/data-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ on:
- cron: '0 0 */2 * *'

env:
PESKAS_KOBO_TOKEN_2: ${{ secrets.PESKAS_KOBO_TOKEN_2 }}
PESKAS_KOBO_TOKEN_1: ${{ secrets.PESKAS_KOBO_TOKEN_1 }}
KOBO_PASSWORD: ${{ secrets.KOBO_PASSWORD }}
KOBO_USERNAME: ${{ secrets.KOBO_USERNAME }}
KOBO_PESKAS1: ${{ secrets.KOBO_PESKAS1 }}
KOBO_PESKAS2: ${{ secrets.KOBO_PESKAS2 }}
KOBO_PESKAS3: ${{ secrets.KOBO_PESKAS3 }}
GCP_SA_KEY: ${{ secrets.PESKAS_DATAINGESTION_GCS_KEY }}
AIRTABLE_KEY: ${{ secrets.AIRTABLE_KEY }}
PDS_TOKEN: ${{ secrets.PESKAS_PDS_TOKEN }}
Expand Down
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: peskas.timor.data.pipeline
Title: Functions to Implement the Timor Small Scale Fisheries
Data Pipeline
Version: 3.2.0
Version: 3.3.0
Authors@R:
c(person(given = "Lorenzo",
family = "Longobardi",
Expand Down Expand Up @@ -40,7 +40,8 @@ Imports:
tidyr,
rlang (>= 0.1.2),
googlesheets4,
reticulate
reticulate,
httr2
Suggests:
covr,
pkgdown,
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ RUN install2.r --error --skipinstalled \
tibble \
tidyr \
rlang \
googlesheets4
googlesheets4 \
httr2

# Install suggests
RUN install2.r --error --skipinstalled \
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile.prod
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ RUN install2.r --error --skipinstalled \
tibble \
tidyr \
rlang \
googlesheets4
googlesheets4 \
httr2

# Install suggests
RUN install2.r --error --skipinstalled \
Expand Down
7 changes: 1 addition & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export(get_fao_composition)
export(get_fish_length)
export(get_full_tracks)
export(get_full_trips)
export(get_host_url)
export(get_kobo_data)
export(get_merged_landings)
export(get_merged_trips)
export(get_models)
Expand All @@ -54,14 +54,12 @@ export(get_validation_sheet)
export(ingest_complete_tracks)
export(ingest_kepler_tracks)
export(ingest_landings)
export(ingest_legacy_landings)
export(ingest_metadata_tables)
export(ingest_pds_map)
export(ingest_pds_matched_trips)
export(ingest_pds_tracks)
export(ingest_pds_trips)
export(ingest_rfish_table)
export(ingest_updated_landings)
export(ingest_validation_tables)
export(insistent_download_cloud_file)
export(insistent_upload_cloud_file)
Expand Down Expand Up @@ -92,9 +90,6 @@ export(retrieve_pds_tracks)
export(retrieve_pds_tracks_data)
export(retrieve_pds_trips)
export(retrieve_pds_trips_data)
export(retrieve_survey)
export(retrieve_survey_data)
export(retrieve_survey_metadata)
export(send_sites_report)
export(send_validation_mail)
export(sym)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# peskas.timor.data.pipeline 3.3.0

### Breaking changes

- Now kobo surveys are retrieved according the new v2 API. The function `retrieve_survey_data()` has been replaced by `get_kobo_data` updated to retrieve data from the new API. The function `retrieve_survey_metadata()` has been deprecated.


# peskas.timor.data.pipeline 3.2.0

## Improvements
Expand Down
166 changes: 33 additions & 133 deletions R/ingest-landings.R
Original file line number Diff line number Diff line change
@@ -1,81 +1,28 @@
#' Ingest Landings Survey data (Peskas 2)
#'
#' Downloads updated (Peskas 2) landings information that has been collected using Kobo Toolbox and
#' uploads it to cloud storage services.
#'
#' This function downloads the survey metadata (survey information) as well as
#' the survey responses. Afterwards it uploads this information to cloud
#' services. File names used contain a
#' versioning string that includes the date-time and, if available, the first 7
#' digits of the git commit sha. This is acomplished using [add_version()]
#'
#' The parameters needed in `conf.yml` are:
#'
#' ```
#' surveys:
#' landings_3:
#' api:
#' survey_id:
#' token:
#' file_prefix:
#' storage:
#' storage_name:
#' key:
#' options:
#' project:
#' bucket:
#' service_account_key:
#' ```
#'
#' Progress through the function is tracked using the package *logger*.
#'
#'
#' @param log_threshold The (standard Apache logj4) log level used as a
#' threshold for the logging infrastructure. See [logger::log_levels] for more
#' details
#'
#' @keywords workflow
#'
#' @return No output. This funcrion is used for it's side effects
#' @export
#'
ingest_updated_landings <- function(log_threshold = logger::DEBUG) {
logger::log_threshold(log_threshold)

pars <- read_config()

file_list <- retrieve_survey(pars$surveys$landings_3$file_prefix,
api = pars$surveys$landings_3$api,
id = pars$surveys$landings_3$survey_id,
token = pars$surveys$landings_3$token
)

logger::log_info("Uploading files to cloud...")
# Iterate over multiple storage providers if there are more than one
purrr::map(pars$storage, ~ upload_cloud_file(file_list, .$key, .$options))
logger::log_success("File upload succeded")
}

#' Ingest Landings Survey data
#'
#' Downloads landings information that has been collected using Kobo Toolbox and
#' Downloads landings information for multiple surveys collected using Kobo Toolbox and
#' uploads it to cloud storage services.
#'
#' This function downloads the survey metadata (survey information) as well as
#' the survey responses. Afterwards it uploads this information to cloud
#' services. File names used contain a
#' versioning string that includes the date-time and, if available, the first 7
#' digits of the git commit sha. This is acomplished using [add_version()]
#' This function downloads the survey metadata and responses for multiple landing surveys
#' (landings_1, landings_2, landings_3). It then uploads this information to cloud
#' services. File names contain a versioning string that includes the date-time and,
#' if available, the first 7 digits of the git commit sha, accomplished using [add_version()].
#'
#' The parameters needed in `conf.yml` are:
#'
#' ```
#' surveys:
#' kobo_username:
#' kobo_password:
#' landings_1:
#' file_prefix:
#' asset_id:
#' landings_2:
#' api:
#' survey_id:
#' token:
#' file_prefix:
#' asset_id:
#' landings_3:
#' file_prefix:
#' asset_id:
#' storage:
#' storage_name:
#' key:
Expand All @@ -87,87 +34,40 @@ ingest_updated_landings <- function(log_threshold = logger::DEBUG) {
#'
#' Progress through the function is tracked using the package *logger*.
#'
#'
#' @param log_threshold The (standard Apache logj4) log level used as a
#' threshold for the logging infrastructure. See [logger::log_levels] for more
#' details
#' threshold for the logging infrastructure. See [logger::log_levels] for more details
#'
#' @keywords workflow
#'
#' @return No output. This funcrion is used for it's side effects
#' @return No output. This function is used for its side effects
#' @export
#'
ingest_landings <- function(log_threshold = logger::DEBUG) {
logger::log_threshold(log_threshold)

pars <- read_config()

file_list <- retrieve_survey(pars$surveys$landings_2$file_prefix,
api = pars$surveys$landings_2$api,
id = pars$surveys$landings_2$survey_id,
token = pars$surveys$landings_2$token
)

logger::log_info("Uploading files to cloud...")
# Iterate over multiple storage providers if there are more than one
purrr::map(pars$storage, ~ upload_cloud_file(file_list, .$key, .$options))
logger::log_success("File upload succeded")
}
# Create a vector of landing numbers
landing_numbers <- 1:3

#' Ingest legacy Landings Survey data
#'
#' Downloads legacy landings information that has been collected using Kobo Toolbox and
#' uploads it to cloud storage services.
#'
#' This function downloads the survey metadata (survey information) as well as
#' the survey responses. Afterwards it uploads this information to cloud
#' services. File names used contain a
#' versioning string that includes the date-time and, if available, the first 7
#' digits of the git commit sha. This is acomplished using [add_version()]
#'
#' The parameters needed in `conf.yml` are:
#'
#' ```
#' surveys:
#' landings_1:
#' api:
#' survey_id:
#' token:
#' file_prefix:
#' storage:
#' storage_name:
#' key:
#' options:
#' project:
#' bucket:
#' service_account_key:
#' ```
#'
#' Progress through the function is tracked using the package *logger*.
#'
#'
#' @param log_threshold The (standard Apache logj4) log level used as a
#' threshold for the logging infrastructure. See [logger::log_levels] for more
#' details
#'
#' @keywords workflow
#'
#' @return No output. This funcrion is used for it's side effects
#' @export
#'
ingest_legacy_landings <- function(log_threshold = logger::DEBUG) {
logger::log_threshold(log_threshold)
# Use purrr::map to apply get_kobo_data() to each landing number
file_lists <- purrr::map(landing_numbers, function(i) {
survey_name <- paste0("landings_", i)
logger::log_info("Retrieving data for {survey_name}...")

pars <- read_config()
get_kobo_data(
prefix = pars$surveys[[survey_name]]$file_prefix,
uname = pars$surveys$kobo_username,
pwd = pars$surveys$kobo_password,
assetid = pars$surveys[[survey_name]]$asset_id
)
})

file_list <- retrieve_survey(pars$surveys$landings_1$file_prefix,
api = pars$surveys$landings_1$api,
id = pars$surveys$landings_1$survey_id,
token = pars$surveys$landings_1$token
)
# Combine all file lists
all_files <- unlist(file_lists, recursive = FALSE)

logger::log_info("Uploading files to cloud...")
# Iterate over multiple storage providers if there are more than one
purrr::map(pars$storage, ~ upload_cloud_file(file_list, .$key, .$options))
logger::log_success("File upload succeded")
purrr::walk(pars$storage, ~ upload_cloud_file(all_files, .$key, .$options))
logger::log_success("File upload succeeded")
}
4 changes: 2 additions & 2 deletions R/preprocess-landings.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#' Progress through the function is tracked using the package *logger*.
#'
#' @param log_threshold
#' @inheritParams ingest_updated_landings
#' @inheritParams ingest_landings
#' @keywords workflow
#' @return no outputs. This function is used for it's side effects
#' @export
Expand Down Expand Up @@ -333,7 +333,7 @@ preprocess_landings_step_2 <- function(log_threshold = logger::DEBUG) {
#' Progress through the function is tracked using the package *logger*.
#'
#' @param log_threshold
#' @inheritParams ingest_legacy_landings
#' @inheritParams ingest_landings
#' @keywords workflow
#' @return no outputs. This funcrion is used for it's side effects
#' @export
Expand Down
Loading

0 comments on commit 7d3c21b

Please sign in to comment.