Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Merge the add-description changes #35

Merged
merged 9 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions add_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import json
from pathlib import Path
import logging

import typer


logger = logging.getLogger(__name__)


def main(in_json: Path):
"""Add a description to a data dictionary."""
with open(in_json, "r") as f:
data_dict = json.load(f)

have_written = False
for k, v in data_dict.items():
if "Description" not in v:
data_dict[k]["Description"] = "added description for Neurobagel"
have_written = True

logger.warning(f"Have written: {have_written}")

with open(in_json, "w") as f:
json.dump(data_dict, f, indent=2)


if __name__ == "__main__":
typer.run(main)
8 changes: 6 additions & 2 deletions process_annotation_to_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,17 @@ def describe_continuous(df: pd.DataFrame) -> dict:
return {
"Annotations": {
**describe_isabout(get_col_rows(df)["controlled_term"].item()),
"Transformation": {"TermURL": t_url, "Label": t_label}
"Transformation": {"TermURL": t_url, "Label": t_label},
"MissingValues": ["", "n/a", " "]
}
}


def get_missing(df: pd.DataFrame) -> list:
return [row["value"] for rid, row in df.iterrows() if row["controlled_term"] == "nb:MissingValue"]
missing = [row["value"] for rid, row in df.iterrows() if row["controlled_term"] == "nb:MissingValue"]
if "nan" in missing:
missing.extend(["n/a", "", " "])
return list(set(missing))


def describe_discrete(df: pd.DataFrame) -> dict:
Expand Down
12 changes: 9 additions & 3 deletions run_bagel_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ ldout=outputs/openneuro-jsonld/

ds="$1"
ds_name="$2"
ds_portal=https://openneuro.org/datasets/${ds}
ds_portal=https://github.com/OpenNeuroDatasets-JSONLD/${ds}.git
workdir=`realpath ${ldin}/$ds`
container_dir=/${ds}
out=(${ldout}/${ds}.jsonld)

if [ "$ds_name" == "None" ]; then
Expand All @@ -23,10 +24,15 @@ fi
echo $ds "$ds_name"
if [ ! -e ${out} ]; then

echo Checking data dictionary for descriptions!
python3 add_description.py ${workdir}/participants.json
alyssadai marked this conversation as resolved.
Show resolved Hide resolved

echo bagel pheno --pheno ${workdir}/participants.tsv --dictionary ${workdir}/participants.json --output ${workdir} --name "$ds_name" --portal $ds_portal
docker run -v ${workdir}:${workdir} neurobagel/bagelcli:latest pheno --pheno ${workdir}/participants.tsv --dictionary ${workdir}/participants.json --output ${workdir} --name "$ds_name" --portal $ds_portal
docker run -v ${workdir}:${workdir} neurobagel/bagelcli:latest bids --jsonld-path ${workdir}/pheno.jsonld --bids-dir ${workdir} --output ${workdir}
docker run -v ${workdir}:${container_dir} neurobagel/bagelcli:latest pheno --pheno ${container_dir}/participants.tsv --dictionary ${container_dir}/participants.json --output ${container_dir} --name "$ds_name" --portal $ds_portal
docker run -v ${workdir}:${container_dir} neurobagel/bagelcli:latest bids --jsonld-path ${container_dir}/pheno.jsonld --bids-dir ${container_dir} --output ${container_dir}

echo Resetting dataset to HEAD
git -C ${workdir} checkout HEAD -- participants.json

cp ${workdir}/pheno_bids.jsonld ${out}
fi
1 change: 1 addition & 0 deletions tests_proc_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def test_describe_continuous(continuous_annotation):
"TermURL": "nb:float",
"Label": "float data",
},
"MissingValues": ["", "n/a", " "]
}}


Expand Down