[ENH] implement one-way ANOVA at group level to compare groups (#1296)

* start adding a 3 group dataset * [ENH] use sub commands for python CLI (#1292) * start adding sub commands * preproc subcommand * deal with create roi parser * deal with default_model sub command * deal with bms action * deal with stats actions * start adapting cli * add test for command building * add test for CLI for preproc, smooth... * add test for command building for stats * add test for command building for bms * rename function * start switching CLI * several fixes * more fix * keep switchingµ * fix * fix * linti * fixes * fixes * minor fixes * additional fixes * fix output ROI * visualize output in circle ci * add temp script for boutiques and bids model graph * start adding tweaks * try 3 groups * rename file * update code and tests * add multi group demo * fixes * adapt example * fix contrasts * deal with results * update doc * minor changes * tmp * refactor * refactor * refactor * several fixes * several fixes * fix and refactor * fix and refactor * fix and refactor * fix models * fix * fix * fix fast tests * fixes * fix and implement F test * run group level node by node * add F test to demo * octave fix
cpp-lln-lab · Jul 30, 2024 · 64b14da · 64b14da
1 parent 9e3d18d
commit 64b14da
Show file tree

Hide file tree

Showing 87 changed files with 2,417 additions and 1,136 deletions.
diff --git a/.flake8 b/.flake8
@@ -9,6 +9,7 @@ exclude =
     tests/*
     _version.py
     demos/*
+    WIP/*
 count = True
 show-source = True
 statistics = True
diff --git a/.github/workflows/run_tests_cli.yml b/.github/workflows/run_tests_cli.yml
@@ -60,10 +60,10 @@ jobs:
                 coverage run --source src -m pytest
                 coverage xml
 
-        -   name: Code coverage
-            uses: codecov/codecov-action@v4
-            with:
-                file: coverage.xml
-                flags: cli
-                name: codecov-cli
-                fail_ci_if_error: false
+        # -   name: Code coverage
+        #     uses: codecov/codecov-action@v4
+        #     with:
+        #         file: coverage.xml
+        #         flags: cli
+        #         name: codecov-cli
+        #         fail_ci_if_error: false
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+* [ENH] add support for one-way ANOVA across groups at the group level #1296 by @Remi-Gau
+* [ENH] allow for 2 sample T-Test, within group T-Test and one-way ANOVA to ne more flexible with respect to what praticipants.tsv column to use to allocate subjects in each group #1296 by @Remi-Gau
 * [ENH] make `addConfoundsToDesignMatrix` a method of `BidsModel` #1294 by @Remi-Gau
 * [ENH] add Apptainer definition #1254 by @Remi-Gau and @monique2208
 * [ENH] allow to copy anat only on raw datasets #1181 by @Remi-Gau
@@ -43,7 +45,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
-
 * [ENH] align specification of F contrasts on the BIDS stats model: they should now be specified as a 2D matrix and not a 1D vector. #1276 @Remi-Gau
 * [DOC] change theme and structure of the documentation #1256 @Remi-Gau
 * [REF] Refactor and update CLI in #1096 @Remi-Gau

diff --git a/WIP/find_data_set_with_3_groups.py b/WIP/find_data_set_with_3_groups.py
@@ -0,0 +1,43 @@
+from pathlib import Path
+
+import pandas as pd
+from pandas.errors import ParserError
+
+openneuro = Path("/home/remi/datalad/datasets.datalad.org/openneuro")
+
+for ds in openneuro.iterdir():
+    file = ds / "participants.tsv"
+
+    if file.exists():
+
+        try:
+            df = pd.read_csv(file, sep="\t")
+        except ParserError:
+            ...
+
+        if len(df) < 10:
+            continue
+        if "group" in df.columns:
+            col = "group"
+        elif "Group" in df.columns:
+            col = "Group"
+        else:
+            continue
+        if len(df[col].value_counts()) < 3:
+            continue
+        if "age" in df.columns and df["age"].mean() < 18:
+            continue
+        bold_files = list(ds.glob("**/*bold*"))
+
+        has_func = len(bold_files) > 0
+        if not has_func:
+            continue
+
+        tasks = {x.name.split("task-")[1].split("_")[0] for x in bold_files}
+        if len(tasks) == 1 and "rest" in tasks:
+            continue
+
+        print()
+        print(file)
+        print(df[col].value_counts())
+        print(tasks)
diff --git a/demos/openneuro/Makefile b/demos/openneuro/Makefile
@@ -74,3 +74,13 @@ data_ds002799:
 	datalad get -d inputs/ds002799 inputs/ds002799/derivatives/fmriprep/sub-292/*/func/*tsv
 	datalad get -d inputs/ds002799 inputs/ds002799/derivatives/fmriprep/sub-30[27]/*/func/*MNI152NLin2009cAsym* -J 2
 	datalad get -d inputs/ds002799 inputs/ds002799/derivatives/fmriprep/sub-30[27]/*/func/*tsv -J 2
+
+data_ds003397:
+	mkdir -p inputs
+	cd inputs && datalad install ///openneuro/ds003397
+	cd inputs && datalad install https://github.com/OpenNeuroDerivatives/ds003397-fmriprep
+	cd inputs/ds003397-fmriprep && datalad get sub-[01][1267]/anat/*MNI152NLin2009cAsym*T1w.nii.gz -J 12
+	cd inputs/ds003397-fmriprep && datalad get sub-[01][1267]/anat/*mask*.nii.gz -J 12
+	cd inputs/ds003397-fmriprep && datalad get sub-[01][1267]/func/*time*tsv -J 12
+	cd inputs/ds003397-fmriprep && datalad get sub-[01][1267]/func/*json -J 12
+	cd inputs/ds003397-fmriprep && datalad get sub-[01][1267]/func/*MNI152NLin2009cAsym*desc-preproc*bold.nii.gz -J 12
diff --git a/demos/openneuro/ds000114_run.m b/demos/openneuro/ds000114_run.m
@@ -1,3 +1,5 @@
+% Demo to compare activations across sessions.
+
 % (C) Copyright 2023 bidspm developers
 
 clear;

diff --git a/demos/openneuro/ds003397_run.m b/demos/openneuro/ds003397_run.m
@@ -0,0 +1,61 @@
+% Run a one-way ANOVA across group
+%
+% Only a few subjects are run because of the large size of each run.
+
+% (C) Copyright 2024 bidspm developers
+
+clear;
+clc;
+
+addpath(fullfile(pwd, '..', '..'));
+bidspm();
+
+task = 'checkerboard';
+
+% The directory where the data are located
+root_dir = fileparts(mfilename('fullpath'));
+bids_dir = fullfile(root_dir, 'inputs', 'ds003397');
+fmriprep_dir = fullfile(root_dir, 'inputs', 'ds003397-fmriprep');
+output_dir = fullfile(root_dir, 'outputs', 'ds003397', 'derivatives');
+
+space = {'MNI152NLin2009cAsym'};
+participant_label = {'01', '06', '11', '12'};
+
+% Copy (& smooth)
+FWHM = 0;
+bidspm(fmriprep_dir, output_dir, 'subject', ...
+       'participant_label', participant_label, ...
+       'action', 'smooth', ...
+       'task', task, ...
+       'space', space, ...
+       'fwhm', FWHM, ...
+       'verbosity', 3);
+
+%% Stats
+preproc_dir = fullfile(output_dir, 'bidspm-preproc');
+
+model_file = fullfile(root_dir, ...
+                      'models', ...
+                      'model-ds003397_smdl.json');
+
+bidspm(bids_dir, output_dir, 'subject', ...
+       'participant_label', participant_label, ...
+       'action', 'stats', ...
+       'preproc_dir', preproc_dir, ...
+       'model_file', model_file, ...
+       'roi_atlas', 'hcpex', ...
+       'space', space, ...
+       'fwhm', 0, ...
+       'skip_validation', true, ...
+       'verbosity', 3);
+
+bidspm(bids_dir, output_dir, 'dataset', ...
+       'participant_label', participant_label, ...
+       'action', 'stats', ...
+       'preproc_dir', preproc_dir, ...
+       'model_file', model_file, ...
+       'roi_atlas', 'hcpex', ...
+       'space', space, ...
+       'fwhm', 0, ...
+       'skip_validation', true, ...
+       'verbosity', 3);
diff --git a/demos/openneuro/models/model-ds003397_smdl.json b/demos/openneuro/models/model-ds003397_smdl.json
@@ -0,0 +1,218 @@
+{
+  "Name": "1_way_ANOVA",
+  "BIDSModelVersion": "1.0.0",
+  "Input": {
+    "task": [
+      "checkerboard"
+    ],
+    "space": [
+      "MNI152NLin2009cAsym"
+    ]
+  },
+  "Nodes": [
+    {
+      "Level": "Run",
+      "Name": "run_level",
+      "GroupBy": [
+        "run",
+        "subject"
+      ],
+      "Model": {
+        "Type": "glm",
+        "X": [
+          "trial_type.flashing checkerboard",
+          "trans_?",
+          "rot_?"
+        ],
+        "HRF": {
+          "Variables": [
+            "trial_type.flashing checkerboard"
+          ],
+          "Model": "spm"
+        }
+      },
+      "Contrasts": [
+        {
+          "Name": "flashing checkerboard",
+          "ConditionList": [
+            "trial_type.flashing checkerboard"
+          ],
+          "Weights": [
+            1
+          ],
+          "Test": "t"
+        }
+      ]
+    },
+    {
+      "Level": "Subject",
+      "Name": "subject_level",
+      "GroupBy": [
+        "contrast",
+        "subject"
+      ],
+      "Model": {
+        "Type": "glm",
+        "X": [
+          1
+        ],
+        "Software": {
+          "bidspm": {
+            "Results": [
+              {
+                "name": [
+                  "flashing checkerboard"
+                ],
+                "p": 0.05,
+                "MC": "FWE",
+                "png": true,
+                "binary": false,
+                "nidm": false,
+                "montage": {
+                  "do": true,
+                  "slices": [
+                    -4,
+                    0,
+                    4,
+                    8,
+                    16
+                  ],
+                  "background": {
+                    "suffix": "T1w",
+                    "desc": "preproc",
+                    "modality": "anat"
+                  }
+                }
+              }
+            ]
+          }
+        }
+      },
+      "DummyContrasts": {
+        "Test": "t"
+      }
+    },
+    {
+      "Level": "Dataset",
+      "Name": "dataset_level",
+      "Description": "average across all subjects",
+      "GroupBy": [
+        "contrast"
+      ],
+      "Model": {
+        "Type": "glm",
+        "X": [
+          1
+        ]
+      }
+    },
+    {
+      "Level": "Dataset",
+      "Name": "between_groups",
+      "Description": "one way anova",
+      "GroupBy": [
+        "contrast"
+      ],
+      "Model": {
+        "Type": "glm",
+        "X": [
+          1,
+          "group"
+        ],
+        "Software": {
+          "bidspm": {
+            "Results": [
+              {
+                "name": [
+                  "B > I",
+                  "average across groups"
+                ],
+                "p": 0.01,
+                "MC": "FWE",
+                "png": true,
+                "binary": false,
+                "nidm": false,
+                "montage": {
+                  "do": false
+                }
+              }
+            ]
+          }
+        }
+      },
+      "Contrasts": [
+        {
+          "Name": "B > I",
+          "ConditionList": [
+            "group.B",
+            "group.I"
+          ],
+          "Weights": [
+            1,
+            -1
+          ],
+          "Test": "t"
+        },
+        {
+          "Name": "average across groups",
+          "ConditionList": [
+            "group.B",
+            "group.I",
+            "group.BI"
+          ],
+          "Weights": [
+            1,
+            1,
+            1
+          ],
+          "Test": "t"
+        },
+        {
+          "Name": "some F test",
+          "ConditionList": [
+            "group.B",
+            "group.BI",
+            "group.I"
+          ],
+          "Weights": [
+            [
+              1,
+              0,
+              0
+            ],
+            [
+              0,
+              1,
+              0
+            ],
+            [
+              0,
+              0,
+              1
+            ]
+          ],
+          "Test": "F"
+        }
+      ]
+    }
+  ],
+  "Edges": [
+    {
+      "Source": "run_level",
+      "Destination": "subject_level"
+    },
+    {
+      "Source": "subject_level",
+      "Destination": "dataset_level"
+    },
+    {
+      "Source": "subject_level",
+      "Destination": "between_groups",
+      "Filter": {
+        "contrast": [
+          "flashing checkerboard"
+        ]
+      }
+    }
+  ]
+}