diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index fe61374..6aae64f 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -21,10 +21,10 @@ jobs: password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Build and push - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: context: . file: ./Dockerfile diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 00f9460..17bf55d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,6 @@ jobs: coverage run -m pytest coverage lcov -o ./coverage/lcov.info - name: Coveralls GitHub Action - uses: coverallsapp/github-action@v2.2.1 + uses: coverallsapp/github-action@v2.2.3 with: github-token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/bagel/cli.py b/bagel/cli.py index d0e548e..47288be 100644 --- a/bagel/cli.py +++ b/bagel/cli.py @@ -9,7 +9,7 @@ import bagel.bids_utils as butil import bagel.pheno_utils as putil from bagel import mappings, models -from bagel.utility import load_json +from bagel.utility import check_overwrite, load_json bagel = typer.Typer() @@ -32,14 +32,6 @@ def pheno( dir_okay=False, resolve_path=True, ), - output: Path = typer.Option( - ..., - help="The directory where outputs should be created.", - exists=True, - file_okay=False, - dir_okay=True, - resolve_path=True, - ), name: str = typer.Option( ..., help="A descriptive name for the dataset the input belongs to. " @@ -51,6 +43,18 @@ def pheno( callback=putil.validate_portal_uri, help="URL (HTTP/HTTPS) to a website or page that describes the dataset and access instructions (if available).", ), + output: Path = typer.Option( + default="pheno.jsonld", + help="The path for the output .jsonld file.", + file_okay=True, + dir_okay=False, + resolve_path=True, + ), + overwrite: bool = typer.Option( + False, + "--overwrite", + help="Overwrite output file if it already exists.", + ), ): """ Process a tabular phenotypic file (.tsv) that has been successfully annotated @@ -61,6 +65,9 @@ def pheno( graph datamodel for the provided phenotypic file in the .jsonld format. You can upload this .jsonld file to the Neurobagel graph. """ + # Check if output file already exists + check_overwrite(output, overwrite) + data_dictionary = load_json(dictionary) pheno_df = pd.read_csv(pheno, sep="\t", keep_default_na=False, dtype=str) putil.validate_inputs(data_dictionary, pheno_df) @@ -140,11 +147,10 @@ def pheno( # TODO: we should revisit this because there may be reasons to have None be meaningful in the future context.update(**dataset.dict(exclude_none=True)) - output_filename = output / "pheno.jsonld" - with open(output_filename, "w") as f: + with open(output, "w") as f: f.write(json.dumps(context, indent=2)) - print(f"Saved output to: {output_filename}") + print(f"Saved output to: {output}") @bagel.command() @@ -166,13 +172,17 @@ def bids( resolve_path=True, ), output: Path = typer.Option( - ..., - help="The directory where outputs should be created", - exists=True, - file_okay=False, - dir_okay=True, + help="The path for the output .jsonld file.", + default="pheno_bids.jsonld", + file_okay=True, + dir_okay=False, resolve_path=True, ), + overwrite: bool = typer.Option( + False, + "--overwrite", + help="Overwrite output file if it already exists.", + ), ): """ Extract imaging metadata from a valid BIDS dataset and combine them @@ -183,6 +193,9 @@ def bids( graph datamodel for the combined metadata in the .jsonld format. You can upload this .jsonld file to the Neurobagel graph. """ + # Check if output file already exists + check_overwrite(output, overwrite) + jsonld = load_json(jsonld_path) layout = BIDSLayout(bids_dir, validate=True) @@ -264,8 +277,7 @@ def bids( merged_dataset = {**context, **pheno_dataset.dict(exclude_none=True)} - output_filename = output / "pheno_bids.jsonld" - with open(output_filename, "w") as f: + with open(output, "w") as f: f.write(json.dumps(merged_dataset, indent=2)) - print(f"Saved output to: {output_filename}") + print(f"Saved output to: {output}") diff --git a/bagel/tests/test_cli_bids.py b/bagel/tests/test_cli_bids.py index b2534af..1812c0c 100644 --- a/bagel/tests/test_cli_bids.py +++ b/bagel/tests/test_cli_bids.py @@ -1,10 +1,21 @@ from pathlib import Path +import pytest + from bagel.cli import bagel +@pytest.fixture(scope="function") +def default_pheno_bids_output_path(tmp_path): + "Return temporary bids command output filepath that uses the default filename." + return tmp_path / "pheno_bids.jsonld" + + def test_bids_valid_inputs_run_successfully( - runner, test_data_upload_path, bids_synthetic, tmp_path + runner, + test_data_upload_path, + bids_synthetic, + default_pheno_bids_output_path, ): """Basic smoke test for the "add-bids" subcommand""" result = runner.invoke( @@ -16,12 +27,12 @@ def test_bids_valid_inputs_run_successfully( "--bids-dir", bids_synthetic, "--output", - tmp_path, + default_pheno_bids_output_path, ], ) assert result.exit_code == 0, f"Errored out. STDOUT: {result.output}" assert ( - tmp_path / "pheno_bids.jsonld" + default_pheno_bids_output_path ).exists(), "The pheno_bids.jsonld output was not created." @@ -29,7 +40,7 @@ def test_bids_sessions_have_correct_labels( runner, test_data_upload_path, bids_synthetic, - tmp_path, + default_pheno_bids_output_path, load_test_json, ): """Check that sessions added to pheno_bids.jsonld have the expected labels.""" @@ -42,11 +53,11 @@ def test_bids_sessions_have_correct_labels( "--bids-dir", bids_synthetic, "--output", - tmp_path, + default_pheno_bids_output_path, ], ) - pheno_bids = load_test_json(tmp_path / "pheno_bids.jsonld") + pheno_bids = load_test_json(default_pheno_bids_output_path) for sub in pheno_bids["hasSamples"]: assert ["ses-01", "ses-02"] == [ ses["hasLabel"] for ses in sub["hasSession"] @@ -56,7 +67,7 @@ def test_bids_sessions_have_correct_labels( def test_bids_data_with_sessions_have_correct_paths( runner, test_data_upload_path, - tmp_path, + default_pheno_bids_output_path, load_test_json, ): """ @@ -72,11 +83,11 @@ def test_bids_data_with_sessions_have_correct_paths( "--bids-dir", Path(__file__).parent / "../../bids-examples/synthetic", "--output", - tmp_path, + default_pheno_bids_output_path, ], ) - pheno_bids = load_test_json(tmp_path / "pheno_bids.jsonld") + pheno_bids = load_test_json(default_pheno_bids_output_path) for sub in pheno_bids["hasSamples"]: for ses in sub["hasSession"]: assert sub["hasLabel"] in ses["hasFilePath"] diff --git a/bagel/tests/test_cli_pheno.py b/bagel/tests/test_cli_pheno.py index 8729e3d..76f811a 100644 --- a/bagel/tests/test_cli_pheno.py +++ b/bagel/tests/test_cli_pheno.py @@ -1,8 +1,16 @@ +import os + import pytest from bagel.cli import bagel +@pytest.fixture(scope="function") +def default_pheno_output_path(tmp_path): + "Return temporary pheno command output filepath that uses the default filename." + return tmp_path / "pheno.jsonld" + + @pytest.mark.parametrize( "example", [ @@ -16,7 +24,11 @@ ], ) def test_pheno_valid_inputs_run_successfully( - runner, test_data, test_data_upload_path, tmp_path, example + runner, + test_data, + test_data_upload_path, + default_pheno_output_path, + example, ): """Basic smoke test for the "pheno" subcommand""" if example == "example_synthetic": @@ -29,7 +41,7 @@ def test_pheno_valid_inputs_run_successfully( "--dictionary", test_data_upload_path / f"{example}.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "synthetic", ], @@ -44,14 +56,14 @@ def test_pheno_valid_inputs_run_successfully( "--dictionary", test_data / f"{example}.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "do not care name", ], ) assert result.exit_code == 0, f"Errored out. STDOUT: {result.output}" assert ( - tmp_path / "pheno.jsonld" + default_pheno_output_path ).exists(), "The pheno.jsonld output was not created." @@ -93,7 +105,12 @@ def test_pheno_valid_inputs_run_successfully( ], ) def test_invalid_inputs_are_handled_gracefully( - runner, test_data, tmp_path, example, expected_exception, expected_message + runner, + test_data, + default_pheno_output_path, + example, + expected_exception, + expected_message, ): """Assures that we handle expected user errors in the input files gracefully""" with pytest.raises(expected_exception) as e: @@ -106,7 +123,7 @@ def test_invalid_inputs_are_handled_gracefully( "--dictionary", test_data / f"{example}.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "do not care name", ], @@ -129,7 +146,7 @@ def test_invalid_inputs_are_handled_gracefully( def test_invalid_portal_uris_produces_error( runner, test_data, - tmp_path, + default_pheno_output_path, portal, ): """Tests that invalid or non-HTTP/HTTPS URLs result in a user-friendly error.""" @@ -142,7 +159,7 @@ def test_invalid_portal_uris_produces_error( "--dictionary", test_data / "example2.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "test dataset 2", "--portal", @@ -162,7 +179,7 @@ def test_invalid_portal_uris_produces_error( def test_missing_bids_levels_raises_warning( runner, test_data, - tmp_path, + default_pheno_output_path, ): with pytest.warns(UserWarning) as w: runner.invoke( @@ -174,7 +191,7 @@ def test_missing_bids_levels_raises_warning( "--dictionary", test_data / "example12.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "testing dataset", ], @@ -190,7 +207,7 @@ def test_missing_bids_levels_raises_warning( def test_bids_neurobagel_levels_mismatch_raises_warning( runner, test_data, - tmp_path, + default_pheno_output_path, ): with pytest.warns(UserWarning) as w: runner.invoke( @@ -202,7 +219,7 @@ def test_bids_neurobagel_levels_mismatch_raises_warning( "--dictionary", test_data / "example13.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "testing dataset", ], @@ -222,7 +239,7 @@ def test_bids_neurobagel_levels_mismatch_raises_warning( def test_unused_missing_values_raises_warning( runner, test_data, - tmp_path, + default_pheno_output_path, ): """ Tests that an informative warning is raised when annotated missing values are not found in the @@ -238,7 +255,7 @@ def test_unused_missing_values_raises_warning( "--dictionary", test_data / "example10.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "testing dataset", ], @@ -256,7 +273,7 @@ def test_unused_missing_values_raises_warning( def test_that_output_file_contains_dataset_level_attributes( - runner, test_data, tmp_path, load_test_json + runner, test_data, default_pheno_output_path, load_test_json ): runner.invoke( bagel, @@ -267,7 +284,7 @@ def test_that_output_file_contains_dataset_level_attributes( "--dictionary", test_data / "example2.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "my_dataset_name", "--portal", @@ -275,14 +292,14 @@ def test_that_output_file_contains_dataset_level_attributes( ], ) - pheno = load_test_json(tmp_path / "pheno.jsonld") + pheno = load_test_json(default_pheno_output_path) assert pheno.get("hasLabel") == "my_dataset_name" assert pheno.get("hasPortalURI") == "http://my_dataset_site.com" def test_diagnosis_and_control_status_handled( - runner, test_data, tmp_path, load_test_json + runner, test_data, default_pheno_output_path, load_test_json ): runner.invoke( bagel, @@ -293,13 +310,13 @@ def test_diagnosis_and_control_status_handled( "--dictionary", test_data / "example6.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "my_dataset_name", ], ) - pheno = load_test_json(tmp_path / "pheno.jsonld") + pheno = load_test_json(default_pheno_output_path) assert ( pheno["hasSamples"][0]["hasDiagnosis"][0]["identifier"] @@ -316,7 +333,11 @@ def test_diagnosis_and_control_status_handled( "attribute", ["hasSex", "hasDiagnosis", "hasAssessment", "isSubjectGroup"] ) def test_controlled_terms_have_identifiers( - attribute, runner, test_data_upload_path, tmp_path, load_test_json + attribute, + runner, + test_data_upload_path, + default_pheno_output_path, + load_test_json, ): runner.invoke( bagel, @@ -327,13 +348,13 @@ def test_controlled_terms_have_identifiers( "--dictionary", test_data_upload_path / "example_synthetic.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "do not care name", ], ) - pheno = load_test_json(tmp_path / "pheno.jsonld") + pheno = load_test_json(default_pheno_output_path) for sub in pheno["hasSamples"]: if attribute in sub.keys(): @@ -346,7 +367,7 @@ def test_controlled_terms_have_identifiers( def test_controlled_term_classes_have_uri_type( - runner, test_data_upload_path, tmp_path, load_test_json + runner, test_data_upload_path, default_pheno_output_path, load_test_json ): """Tests that classes specified as schemaKeys (@type) for subject-level attributes in a .jsonld are also defined in the context.""" runner.invoke( @@ -358,7 +379,7 @@ def test_controlled_term_classes_have_uri_type( "--dictionary", test_data_upload_path / "example_synthetic.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "do not care name", ], @@ -393,7 +414,12 @@ def test_controlled_term_classes_have_uri_type( ], ) def test_assessment_data_are_parsed_correctly( - runner, test_data, tmp_path, load_test_json, assessment, subject + runner, + test_data, + default_pheno_output_path, + load_test_json, + assessment, + subject, ): runner.invoke( bagel, @@ -404,13 +430,13 @@ def test_assessment_data_are_parsed_correctly( "--dictionary", test_data / "example6.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "my_dataset_name", ], ) - pheno = load_test_json(tmp_path / "pheno.jsonld") + pheno = load_test_json(default_pheno_output_path) assert assessment == pheno["hasSamples"][subject].get("hasAssessment") @@ -420,7 +446,12 @@ def test_assessment_data_are_parsed_correctly( [(20.5, 0), (pytest.approx(25.66, 0.01), 1)], ) def test_cli_age_is_processed( - runner, test_data, tmp_path, load_test_json, expected_age, subject + runner, + test_data, + default_pheno_output_path, + load_test_json, + expected_age, + subject, ): runner.invoke( bagel, @@ -431,18 +462,20 @@ def test_cli_age_is_processed( "--dictionary", test_data / "example2.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "my_dataset_name", ], ) - pheno = load_test_json(tmp_path / "pheno.jsonld") + pheno = load_test_json(default_pheno_output_path) assert expected_age == pheno["hasSamples"][subject]["hasAge"] -def test_output_includes_context(runner, test_data, tmp_path, load_test_json): +def test_output_includes_context( + runner, test_data, default_pheno_output_path, load_test_json +): runner.invoke( bagel, [ @@ -452,13 +485,13 @@ def test_output_includes_context(runner, test_data, tmp_path, load_test_json): "--dictionary", test_data / "example2.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "my_dataset_name", ], ) - pheno = load_test_json(tmp_path / "pheno.jsonld") + pheno = load_test_json(default_pheno_output_path) assert pheno.get("@context") is not None assert all( @@ -477,7 +510,7 @@ def test_output_includes_context(runner, test_data, tmp_path, load_test_json): def test_output_excludes_properties_for_missing_vals( runner, test_data_upload_path, - tmp_path, + default_pheno_output_path, load_test_json, sub_id, missing_val_property, @@ -496,16 +529,81 @@ def test_output_excludes_properties_for_missing_vals( "--dictionary", test_data_upload_path / "example_synthetic.json", "--output", - tmp_path, + default_pheno_output_path, "--name", "BIDS synthetic test", ], ) - pheno = load_test_json(tmp_path / "pheno.jsonld") + pheno = load_test_json(default_pheno_output_path) for sub in pheno["hasSamples"]: if sub["hasLabel"] == sub_id: for entry in missing_val_property: assert ( sub.get(entry) is None ), f"{sub_id} output contains value for {entry} where annotated as missing" + + +def test_default_output_filename(runner, test_data_upload_path, tmp_path): + """Tests that the default output filename is used correctly when --output is not set.""" + os.chdir(tmp_path) + + runner.invoke( + bagel, + [ + "pheno", + "--pheno", + test_data_upload_path / "example_synthetic.tsv", + "--dictionary", + test_data_upload_path / "example_synthetic.json", + "--name", + "BIDS synthetic test", + ], + ) + + assert (tmp_path / "pheno.jsonld").exists() + + +@pytest.mark.parametrize( + "overwrite_flag, expected_stdout", + [ + ([], "already exists"), + (["--overwrite"], "Saved output to"), + ], +) +def test_overwrite_flag_behaviour( + runner, test_data_upload_path, tmp_path, overwrite_flag, expected_stdout +): + """Tests that an existing output file is only overwritten if --overwrite is used.""" + runner.invoke( + bagel, + [ + "pheno", + "--pheno", + test_data_upload_path / "example_synthetic.tsv", + "--dictionary", + test_data_upload_path / "example_synthetic.json", + "--name", + "BIDS synthetic test", + "--output", + tmp_path / "synthetic_dataset.jsonld", + ], + ) + + overwrite_result = runner.invoke( + bagel, + [ + "pheno", + "--pheno", + test_data_upload_path / "example_synthetic.tsv", + "--dictionary", + test_data_upload_path / "example_synthetic.json", + "--name", + "BIDS synthetic test", + "--output", + tmp_path / "synthetic_dataset.jsonld", + ] + + overwrite_flag, + ) + + assert expected_stdout in overwrite_result.output diff --git a/bagel/utility.py b/bagel/utility.py index a7f33cb..71d0a97 100644 --- a/bagel/utility.py +++ b/bagel/utility.py @@ -1,8 +1,21 @@ import json from pathlib import Path +import typer + def load_json(input_p: Path) -> dict: """Load a user-specified json type file.""" with open(input_p, "r") as f: return json.load(f) + + +def check_overwrite(output: Path, overwrite: bool): + """Exit program gracefully if an output file already exists but --overwrite has not been set.""" + if output.exists() and not overwrite: + raise typer.Exit( + typer.style( + f"Output file {output} already exists. Use --overwrite to overwrite.", + fg=typer.colors.RED, + ) + ) diff --git a/neurobagel_examples b/neurobagel_examples index 63dc797..05ab0bd 160000 --- a/neurobagel_examples +++ b/neurobagel_examples @@ -1 +1 @@ -Subproject commit 63dc797f8c717bdde7ae94db5f8b0d59e79fba7d +Subproject commit 05ab0bdfee095f2e25797a57f26943eeaff248ec