Skip to content

Commit

Permalink
[ENH] Merge the add-description changes (#35)
Browse files Browse the repository at this point in the history
* Add "Description" for Neurobagel CLI

* Add missing values for "nan"

* Auto add missing values to age

We don't have the real values yet,
so we just add them as a temorary safeguard

* Fix adding missing values

* Also clean up after ourselves

* Prevent duplicates in added missing values

* Update portalURI and session path

* remove empty json

* remove venv activation from bagel CLI runner script

---------

Co-authored-by: Alyssa Dai <alyssa.ydai@gmail.com>
  • Loading branch information
surchs and alyssadai authored Oct 30, 2023
1 parent d581d48 commit a6dfa82
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 5 deletions.
29 changes: 29 additions & 0 deletions add_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import json
from pathlib import Path
import logging

import typer


logger = logging.getLogger(__name__)


def main(in_json: Path):
"""Add a description to a data dictionary."""
with open(in_json, "r") as f:
data_dict = json.load(f)

have_written = False
for k, v in data_dict.items():
if "Description" not in v:
data_dict[k]["Description"] = "added description for Neurobagel"
have_written = True

logger.warning(f"Have written: {have_written}")

with open(in_json, "w") as f:
json.dump(data_dict, f, indent=2)


if __name__ == "__main__":
typer.run(main)
8 changes: 6 additions & 2 deletions process_annotation_to_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,17 @@ def describe_continuous(df: pd.DataFrame) -> dict:
return {
"Annotations": {
**describe_isabout(get_col_rows(df)["controlled_term"].item()),
"Transformation": {"TermURL": t_url, "Label": t_label}
"Transformation": {"TermURL": t_url, "Label": t_label},
"MissingValues": ["", "n/a", " "]
}
}


def get_missing(df: pd.DataFrame) -> list:
return [row["value"] for rid, row in df.iterrows() if row["controlled_term"] == "nb:MissingValue"]
missing = [row["value"] for rid, row in df.iterrows() if row["controlled_term"] == "nb:MissingValue"]
if "nan" in missing:
missing.extend(["n/a", "", " "])
return list(set(missing))


def describe_discrete(df: pd.DataFrame) -> dict:
Expand Down
12 changes: 9 additions & 3 deletions run_bagel_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ ldout=outputs/openneuro-jsonld/

ds="$1"
ds_name="$2"
ds_portal=https://openneuro.org/datasets/${ds}
ds_portal=https://github.com/OpenNeuroDatasets-JSONLD/${ds}.git
workdir=`realpath ${ldin}/$ds`
container_dir=/${ds}
out=(${ldout}/${ds}.jsonld)

if [ "$ds_name" == "None" ]; then
Expand All @@ -23,10 +24,15 @@ fi
echo $ds "$ds_name"
if [ ! -e ${out} ]; then

echo Checking data dictionary for descriptions!
python3 add_description.py ${workdir}/participants.json

echo bagel pheno --pheno ${workdir}/participants.tsv --dictionary ${workdir}/participants.json --output ${workdir} --name "$ds_name" --portal $ds_portal
docker run -v ${workdir}:${workdir} neurobagel/bagelcli:latest pheno --pheno ${workdir}/participants.tsv --dictionary ${workdir}/participants.json --output ${workdir} --name "$ds_name" --portal $ds_portal
docker run -v ${workdir}:${workdir} neurobagel/bagelcli:latest bids --jsonld-path ${workdir}/pheno.jsonld --bids-dir ${workdir} --output ${workdir}
docker run -v ${workdir}:${container_dir} neurobagel/bagelcli:latest pheno --pheno ${container_dir}/participants.tsv --dictionary ${container_dir}/participants.json --output ${container_dir} --name "$ds_name" --portal $ds_portal
docker run -v ${workdir}:${container_dir} neurobagel/bagelcli:latest bids --jsonld-path ${container_dir}/pheno.jsonld --bids-dir ${container_dir} --output ${container_dir}

echo Resetting dataset to HEAD
git -C ${workdir} checkout HEAD -- participants.json

cp ${workdir}/pheno_bids.jsonld ${out}
fi
1 change: 1 addition & 0 deletions tests_proc_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def test_describe_continuous(continuous_annotation):
"TermURL": "nb:float",
"Label": "float data",
},
"MissingValues": ["", "n/a", " "]
}}


Expand Down

0 comments on commit a6dfa82

Please sign in to comment.