From aee47c212cabb49898895be6f7c1f3c82e4ba316 Mon Sep 17 00:00:00 2001 From: Jorrit Poelen Date: Mon, 7 Oct 2024 11:57:02 -0500 Subject: [PATCH 1/2] add GloBI index config; related to https://github.com/globalbioticinteractions/globalbioticinteractions/issues/777; fyi @zedomel --- .github/workflows/review.yml | 41 ++++ .gitignore | 33 +--- README.md | 44 +---- globi.json | 364 +++++++++++++++++++++++++++++++++++ 4 files changed, 411 insertions(+), 71 deletions(-) create mode 100644 .github/workflows/review.yml create mode 100644 globi.json diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml new file mode 100644 index 0000000..1f17fbd --- /dev/null +++ b/.github/workflows/review.yml @@ -0,0 +1,41 @@ +# This workflow will review a GloBI indexed dataset. +# For more information see: https://globalbioticinteractions.org + +name: GloBI review by Elton + +on: + push: + branches: [ '*' ] + pull_request: + branches: [ '*' ] + schedule: + - cron: "0 0 * * 1" + +jobs: + review: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 1.8 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '8' + - name: download review script + run: curl --silent -L "https://raw.githubusercontent.com/globalbioticinteractions/globinizer/master/check-dataset.sh" > check-dataset.sh + - name: download network compiler script + run: | + curl --silent -L "https://raw.githubusercontent.com/globalbioticinteractions/globinizer/master/compile-network.sh" > compile-network.sh + chmod +x compile-network.sh + - name: review dataset + run: bash check-dataset.sh "${GITHUB_REPOSITORY}" + - name: Share review report + uses: actions/upload-artifact@v4 + with: + name: review-report + path: | + README.txt + datasets/ + index.* + indexed-* + review* diff --git a/.gitignore b/.gitignore index 5b284bf..9172584 100644 --- a/.gitignore +++ b/.gitignore @@ -1,30 +1,3 @@ -# History files -.Rhistory -.Rapp.history -# Session Data files -.RData -# Example code in package build process -*-Ex.R -# Output files from R CMD build -/*.tar.gz -# Output files from R CMD check -/*.Rcheck/ -# RStudio files -.Rproj.user/ -# produced vignettes -vignettes/*.html -vignettes/*.pdf -# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 -.httr-oauth -# knitr and R markdown default cache directories -/*_cache/ -/cache/ -# Temporary files created by R markdown -*.utf8.md -*.knit.md -#Custom view -.DS_Store -Thumbs.db -OBservData.Rproj -#Embargo Folder -Processing_files/Datasets_Processing/Embargo/ +.elton/ +add_travis_artifact_upload_keys.sh +datasets/ \ No newline at end of file diff --git a/README.md b/README.md index 1758a21..83726c1 100644 --- a/README.md +++ b/README.md @@ -1,43 +1,5 @@ -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7932602.svg)](https://doi.org/10.5281/zenodo.7932602) +[![GloBI Review by Elton](../../actions/workflows/review.yml/badge.svg)](../../actions/workflows/review.yml) [![GloBI](https://api.globalbioticinteractions.org/interaction.svg?accordingTo=globi:ibartomeus/OBservData&refutes=true&refutes=false)](https://globalbioticinteractions.org/?accordingTo=globi:ibartomeus/OBservData) -# Crop Pollination Database - -## How to use the database: - -The database is stored as three `.csv` files you can find in `Final_Data` folder. Here you will find the latest version (See `News.md`). Stable releases can be found in Zenodo: DOI: [10.5281/zenodo.4311291](https://zenodo.org/badge/latestdoi/240485535) - -- `CropPol_field_level_data.csv` contains one row per field and study system with summary values per field incuding field coordinates, pollintor richness, yield, etc... -- `CropPol_sampling_data.csv` contains pollinator sampling data on which pollinators where collected per field. -- `CropPol_data_ownership.csv` contains information on data authors, affiliations and funding. - -Please, check Metadata [here](http://htmlpreview.github.io/?https://github.com/ibartomeus/OBservData/blob/master/Metadata/docs/index_Rev.html) and understand the data limitations [here](https://github.com/ibartomeus/OBservData/blob/master/Manuscript/OBSerData_Final.pdf) - -If you want to dig into raw data files, all reproducible data and code to build the database is under the `Processing_files` folder. - -## How to cite the database: - -Please cite CropPol database when the data are used in bulk in publications and cite individual studies when pertinent. - -- Version 1.1.1 is published as a Data Paper in Ecology: Allen-Perkins et al. 2022. CropPol: A Dynamic, Open and Global Database on Crop Pollination. Ecology 103(3):e3614. https://doi.org/10.1002/ecy.3614 - -- Bugs are updated in the third digit (e.g. v1.0.1), new datasets in the second digit (e.g. v1.1.0) and major upgrades in the first digit (e.g. v2.0.0). You can see how to cite more recent versions with updated data and contributing authors in: https://github.com/ibartomeus/OBservData/releases and in Zenodo. - -## How to contribute? - -We encourage you to add new data to this database. If you are github user, just clone the repo, and follow the instrucions in `Your_new_study` folder. When ready, make us a pull request and we will add the new data. - -If you are not familiar with github, don't worry, just [download the template](https://github.com/ibartomeus/OBservData/raw/master/Template/Template_example_V9.ods), fill it up and open an [issue](https://github.com/ibartomeus/OBservData/issues/new?assignees=AlfonsoAllen&labels=New+data&template=new-data.md&title=%5Bdata%5D) to let us know where we can access the template. - -This is the workflow for github users (A) and non-users (B). - -![](Your_new_study/workflow.jpeg) - - -## Spotted any error? - -Please let us know in an [issue](https://github.com/ibartomeus/OBservData/issues/new?assignees=AlfonsoAllen&labels=bug&template=bug_report.md&title=%5Bbug%5D) - -### Thanks to: - -We started using livedat **Template Repo** designed to assist in setting up a repository for regularly-updated data. Details in [this PLOS Biology paper](https://doi.org/10.1371/journal.pbio.3000125). Instructions for creating an updating data workflow can be found at this companion website: [UpdatingData.org](https://www.updatingdata.org/). +Configuration to help Global Biotic Interactions (GloBI, https://globalbioticinteractions.org) index: +Allen-Perkins, Alfonso, Magrach, Ainhoa, Dainese, Matteo, Garibaldi, Lucas A., Kleijn, David, Rader, Romina, Reilly, James R., et al. 2022. “ CropPol: A Dynamic, Open and Global Database on Crop Pollination.” Ecology 103(3): e3614. https://doi.org/10.1002/ecy.3614 \ No newline at end of file diff --git a/globi.json b/globi.json new file mode 100644 index 0000000..90b7a61 --- /dev/null +++ b/globi.json @@ -0,0 +1,364 @@ +{ + "@context" : [ "http://www.w3.org/ns/csvw", { + "@language" : "en" + } ], + "rdfs:comment" : [ "inspired by https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/" ], + "tables" : [ { + "@context" : [ "http://www.w3.org/ns/csvw", { + "@language" : "en" + } ], + "rdfs:comment" : [ "inspired by https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/" ], + "interactionTypeName": "pollinatedBy", + "interactionTypeId": "http://purl.obolibrary.org/obo/RO_0002456", + "url" : "Final_Data/CropPol_sampling_data.csv", + "dcterms:bibliographicCitation" : "Allen-Perkins, Alfonso, Magrach, Ainhoa, Dainese, Matteo, Garibaldi, Lucas A., Kleijn, David, Rader, Romina, Reilly, James R., et al. 2022. \"CropPol: A Dynamic, Open and Global Database on Crop Pollination.\" Ecology 103(3): e3614. https://doi.org/10.1002/ecy.3614", + "delimiter" : ",", + "headerRowCount" : 1, + "null" : [ "" ], + "tableSchema" : { + "columns" : [ { + "name" : "study_id", + "titles" : "study_id", + "datatype" : "string" + }, { + "name" : "site_id", + "titles" : "site_id", + "datatype" : "string" + }, { + "name" : "sampling_method", + "titles" : "sampling_method", + "datatype" : "string" + }, { + "name" : "targetTaxonName", + "titles" : "pollinator", + "datatype" : "string" + }, { + "name" : "identified_to", + "titles" : "identified_to", + "datatype" : "string" + }, { + "name" : "guild", + "titles" : "guild", + "datatype" : "string" + }, { + "name" : "abundance", + "titles" : "abundance", + "datatype" : "string" + }, { + "name" : "total_sampled_area", + "titles" : "total_sampled_area", + "datatype" : "string" + }, { + "name" : "total_sampled_time", + "titles" : "total_sampled_time", + "datatype" : "string" + }, { + "name" : "total_sampled_flowers", + "titles" : "total_sampled_flowers", + "datatype" : "string" + }, { + "name" : "description", + "titles" : "description", + "datatype" : "string" + }, { + "name" : "notes", + "titles" : "notes", + "datatype" : "string" + } ], + "foreignKeys": [ + { + "columnReference": "study_id", + "reference": { + "columnReference": "study_id" + } + } + ] + } + }, { + "@context" : [ "http://www.w3.org/ns/csvw", { + "@language" : "en" + } ], + "rdfs:comment" : [ "inspired by https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/" ], + "url" : "Final_Data/CropPol_field_level_data.csv", + "dcterms:bibliographicCitation" : "Allen-Perkins, Alfonso, Magrach, Ainhoa, Dainese, Matteo, Garibaldi, Lucas A., Kleijn, David, Rader, Romina, Reilly, James R., et al. 2022. \"CropPol: A Dynamic, Open and Global Database on Crop Pollination.\" Ecology 103(3): e3614. https://doi.org/10.1002/ecy.3614", + "delimiter" : ",", + "headerRowCount" : 1, + "null" : [ "" ], + "tableSchema" : { + "columns" : [ { + "name" : "study_id", + "titles" : "study_id", + "datatype" : "string" + }, { + "name" : "study_id2", + "titles" : "study_id2", + "datatype" : "string" + }, { + "name" : "site_id", + "titles" : "site_id", + "datatype" : "string" + }, { + "name" : "sourceTaxonName", + "titles" : "crop", + "datatype" : "string" + }, { + "name" : "variety", + "titles" : "variety", + "datatype" : "string" + }, { + "name" : "management", + "titles" : "management", + "datatype" : "string" + }, { + "name" : "sourceCountryName", + "titles" : "country", + "datatype" : "string" + }, { + "name" : "decimalLatitude", + "titles" : "latitude", + "datatype" : "string" + }, { + "name" : "decimalLongitude", + "titles" : "longitude", + "datatype" : "string" + }, { + "name" : "X_UTM", + "titles" : "X_UTM", + "datatype" : "string" + }, { + "name" : "Y_UTM", + "titles" : "Y_UTM", + "datatype" : "string" + }, { + "name" : "zone_UTM", + "titles" : "zone_UTM", + "datatype" : "string" + }, { + "name" : "sampling_start_month", + "titles" : "sampling_start_month", + "datatype" : "string" + }, { + "name" : "sampling_end_month", + "titles" : "sampling_end_month", + "datatype" : "string" + }, { + "name" : "http://rs.tdwg.org/dwc/terms/eventDate", + "titles" : "sampling_year", + "datatype" : "string" + }, { + "name" : "field_size", + "titles" : "field_size", + "datatype" : "string" + }, { + "name" : "yield", + "titles" : "yield", + "datatype" : "string" + }, { + "name" : "yield_units", + "titles" : "yield_units", + "datatype" : "string" + }, { + "name" : "yield2", + "titles" : "yield2", + "datatype" : "string" + }, { + "name" : "yield2_units", + "titles" : "yield2_units", + "datatype" : "string" + }, { + "name" : "yield_treatments_no_pollinators", + "titles" : "yield_treatments_no_pollinators", + "datatype" : "string" + }, { + "name" : "yield_treatments_pollen_supplement", + "titles" : "yield_treatments_pollen_supplement", + "datatype" : "string" + }, { + "name" : "yield_treatments_no_pollinators2", + "titles" : "yield_treatments_no_pollinators2", + "datatype" : "string" + }, { + "name" : "yield_treatments_pollen_supplement2", + "titles" : "yield_treatments_pollen_supplement2", + "datatype" : "string" + }, { + "name" : "fruits_per_plant", + "titles" : "fruits_per_plant", + "datatype" : "string" + }, { + "name" : "fruit_weight", + "titles" : "fruit_weight", + "datatype" : "string" + }, { + "name" : "plant_density", + "titles" : "plant_density", + "datatype" : "string" + }, { + "name" : "seeds_per_fruit", + "titles" : "seeds_per_fruit", + "datatype" : "string" + }, { + "name" : "seeds_per_plant", + "titles" : "seeds_per_plant", + "datatype" : "string" + }, { + "name" : "seed_weight", + "titles" : "seed_weight", + "datatype" : "string" + }, { + "name" : "taxa_recorded", + "titles" : "taxa_recorded", + "datatype" : "string" + }, { + "name" : "use_visits_or_abundance", + "titles" : "use_visits_or_abundance", + "datatype" : "string" + }, { + "name" : "sampling_richness", + "titles" : "sampling_richness", + "datatype" : "string" + }, { + "name" : "observed_pollinator_richness", + "titles" : "observed_pollinator_richness", + "datatype" : "string" + }, { + "name" : "other_pollinator_richness", + "titles" : "other_pollinator_richness", + "datatype" : "string" + }, { + "name" : "other_richness_estimator_method", + "titles" : "other_richness_estimator_method", + "datatype" : "string" + }, { + "name" : "richness_restriction", + "titles" : "richness_restriction", + "datatype" : "string" + }, { + "name" : "sampling_abundance", + "titles" : "sampling_abundance", + "datatype" : "string" + }, { + "name" : "abundance", + "titles" : "abundance", + "datatype" : "string" + }, { + "name" : "ab_honeybee", + "titles" : "ab_honeybee", + "datatype" : "string" + }, { + "name" : "ab_bombus", + "titles" : "ab_bombus", + "datatype" : "string" + }, { + "name" : "ab_wildbees", + "titles" : "ab_wildbees", + "datatype" : "string" + }, { + "name" : "ab_syrphids", + "titles" : "ab_syrphids", + "datatype" : "string" + }, { + "name" : "ab_humbleflies", + "titles" : "ab_humbleflies", + "datatype" : "string" + }, { + "name" : "ab_other_flies", + "titles" : "ab_other_flies", + "datatype" : "string" + }, { + "name" : "ab_beetles", + "titles" : "ab_beetles", + "datatype" : "string" + }, { + "name" : "ab_lepidoptera", + "titles" : "ab_lepidoptera", + "datatype" : "string" + }, { + "name" : "ab_nonbee_hymenoptera", + "titles" : "ab_nonbee_hymenoptera", + "datatype" : "string" + }, { + "name" : "ab_others", + "titles" : "ab_others", + "datatype" : "string" + }, { + "name" : "total_sampled_area", + "titles" : "total_sampled_area", + "datatype" : "string" + }, { + "name" : "total_sampled_time", + "titles" : "total_sampled_time", + "datatype" : "string" + }, { + "name" : "sampling_visitation", + "titles" : "sampling_visitation", + "datatype" : "string" + }, { + "name" : "visitation_rate_units", + "titles" : "visitation_rate_units", + "datatype" : "string" + }, { + "name" : "visitation_rate", + "titles" : "visitation_rate", + "datatype" : "string" + }, { + "name" : "visit_honeybee", + "titles" : "visit_honeybee", + "datatype" : "string" + }, { + "name" : "visit_bombus", + "titles" : "visit_bombus", + "datatype" : "string" + }, { + "name" : "visit_wildbees", + "titles" : "visit_wildbees", + "datatype" : "string" + }, { + "name" : "visit_syrphids", + "titles" : "visit_syrphids", + "datatype" : "string" + }, { + "name" : "visit_humbleflies", + "titles" : "visit_humbleflies", + "datatype" : "string" + }, { + "name" : "visit_other_flies", + "titles" : "visit_other_flies", + "datatype" : "string" + }, { + "name" : "visit_beetles", + "titles" : "visit_beetles", + "datatype" : "string" + }, { + "name" : "visit_lepidoptera", + "titles" : "visit_lepidoptera", + "datatype" : "string" + }, { + "name" : "visit_nonbee_hymenoptera", + "titles" : "visit_nonbee_hymenoptera", + "datatype" : "string" + }, { + "name" : "visit_others", + "titles" : "visit_others", + "datatype" : "string" + }, { + "name" : "referenceDoi", + "titles" : "Publication", + "datatype" : "string" + }, { + "name" : "referenceCitation", + "titles" : "Credit", + "datatype" : "string" + }, { + "name" : "Email_contact", + "titles" : "Email_contact", + "datatype" : "string" + }, { + "name" : "notes", + "titles" : "notes", + "datatype" : "string" + } ], + "primaryKey": "study_id" + } + } ] +} From fd3d97a9009c69189f11b33b3d0cd04c822f13ea Mon Sep 17 00:00:00 2001 From: Jorrit Poelen Date: Mon, 7 Oct 2024 14:35:50 -0500 Subject: [PATCH 2/2] append GloBI indexing info to bottom of readme --- README.md | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 83726c1..5bc3583 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,50 @@ +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7932602.svg)](https://doi.org/10.5281/zenodo.7932602) + +# Crop Pollination Database + +## How to use the database: + +The database is stored as three `.csv` files you can find in `Final_Data` folder. Here you will find the latest version (See `News.md`). Stable releases can be found in Zenodo: DOI: [10.5281/zenodo.4311291](https://zenodo.org/badge/latestdoi/240485535) + +- `CropPol_field_level_data.csv` contains one row per field and study system with summary values per field incuding field coordinates, pollintor richness, yield, etc... +- `CropPol_sampling_data.csv` contains pollinator sampling data on which pollinators where collected per field. +- `CropPol_data_ownership.csv` contains information on data authors, affiliations and funding. + +Please, check Metadata [here](http://htmlpreview.github.io/?https://github.com/ibartomeus/OBservData/blob/master/Metadata/docs/index_Rev.html) and understand the data limitations [here](https://github.com/ibartomeus/OBservData/blob/master/Manuscript/OBSerData_Final.pdf) + +If you want to dig into raw data files, all reproducible data and code to build the database is under the `Processing_files` folder. + +## How to cite the database: + +Please cite CropPol database when the data are used in bulk in publications and cite individual studies when pertinent. + +- Version 1.1.1 is published as a Data Paper in Ecology: Allen-Perkins et al. 2022. CropPol: A Dynamic, Open and Global Database on Crop Pollination. Ecology 103(3):e3614. https://doi.org/10.1002/ecy.3614 + +- Bugs are updated in the third digit (e.g. v1.0.1), new datasets in the second digit (e.g. v1.1.0) and major upgrades in the first digit (e.g. v2.0.0). You can see how to cite more recent versions with updated data and contributing authors in: https://github.com/ibartomeus/OBservData/releases and in Zenodo. + +## How to contribute? + +We encourage you to add new data to this database. If you are github user, just clone the repo, and follow the instrucions in `Your_new_study` folder. When ready, make us a pull request and we will add the new data. + +If you are not familiar with github, don't worry, just [download the template](https://github.com/ibartomeus/OBservData/raw/master/Template/Template_example_V9.ods), fill it up and open an [issue](https://github.com/ibartomeus/OBservData/issues/new?assignees=AlfonsoAllen&labels=New+data&template=new-data.md&title=%5Bdata%5D) to let us know where we can access the template. + +This is the workflow for github users (A) and non-users (B). + +![](Your_new_study/workflow.jpeg) + + +## Spotted any error? + +Please let us know in an [issue](https://github.com/ibartomeus/OBservData/issues/new?assignees=AlfonsoAllen&labels=bug&template=bug_report.md&title=%5Bbug%5D) + +### Thanks to: + +We started using livedat **Template Repo** designed to assist in setting up a repository for regularly-updated data. Details in [this PLOS Biology paper](https://doi.org/10.1371/journal.pbio.3000125). Instructions for creating an updating data workflow can be found at this companion website: [UpdatingData.org](https://www.updatingdata.org/). + +## Indexing + [![GloBI Review by Elton](../../actions/workflows/review.yml/badge.svg)](../../actions/workflows/review.yml) [![GloBI](https://api.globalbioticinteractions.org/interaction.svg?accordingTo=globi:ibartomeus/OBservData&refutes=true&refutes=false)](https://globalbioticinteractions.org/?accordingTo=globi:ibartomeus/OBservData) Configuration to help Global Biotic Interactions (GloBI, https://globalbioticinteractions.org) index: -Allen-Perkins, Alfonso, Magrach, Ainhoa, Dainese, Matteo, Garibaldi, Lucas A., Kleijn, David, Rader, Romina, Reilly, James R., et al. 2022. “ CropPol: A Dynamic, Open and Global Database on Crop Pollination.” Ecology 103(3): e3614. https://doi.org/10.1002/ecy.3614 \ No newline at end of file +Allen-Perkins, Alfonso, Magrach, Ainhoa, Dainese, Matteo, Garibaldi, Lucas A., Kleijn, David, Rader, Romina, Reilly, James R., et al. 2022. “ CropPol: A Dynamic, Open and Global Database on Crop Pollination.” Ecology 103(3): e3614. https://doi.org/10.1002/ecy.3614