-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #20 from ivadomed/ks/sci-col
Add preprocessing and config files of the sci-colorado dataset
- Loading branch information
Showing
2 changed files
with
324 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
{ | ||
"command": "train", | ||
"gpu_ids": [2], | ||
"path_output": "les_out", | ||
"model_name": "ModifiedUnet3d_singleContrast", | ||
"debugging": true, | ||
"object_detection_params": { | ||
"object_detection_path": null, | ||
"safety_factor": [1.0, 1.0, 1.0] | ||
}, | ||
"wandb": { | ||
"wandb_api_key": "", | ||
"project_name": "sci-col", | ||
"group_name": "lesion", | ||
"run_name": "ID1", | ||
"log_grads_every": 100 | ||
}, | ||
"loader_parameters": { | ||
"path_data": ["sci-col_lesion_prep_clean_no-json"], | ||
"subject_selection:": {"n": [], "metadata": [], "value": []}, | ||
"target_suffix": ["_lesion-manual"], | ||
"extensions": [".nii.gz"], | ||
"roi_params": { | ||
"suffix": null, | ||
"slice_filter_roi": null | ||
}, | ||
"contrast_params": { | ||
"training_validation": ["T2w"], | ||
"testing": ["T2w"], | ||
"balance": {} | ||
}, | ||
"slice_filter_params": { | ||
"filter_empty_mask": false, | ||
"filter_empty_input": false | ||
}, | ||
"slice_axis": "axial", | ||
"multichannel": false, | ||
"soft_gt": false | ||
}, | ||
"split_dataset": { | ||
"fname_split": null, | ||
"random_seed": 42, | ||
"split_method" : "participant_id", | ||
"data_testing": {"data_type": null, "data_value":[]}, | ||
"balance": null, | ||
"train_fraction": 0.7, | ||
"test_fraction": 0.2 | ||
}, | ||
"training_parameters": { | ||
"batch_size": 8, | ||
"loss": { | ||
"name": "DiceLoss" | ||
}, | ||
"training_time": { | ||
"num_epochs": 300, | ||
"early_stopping_patience": 100, | ||
"early_stopping_epsilon": 0.001 | ||
}, | ||
"scheduler": { | ||
"initial_lr": 1e-3, | ||
"lr_scheduler": { | ||
"name": "CosineAnnealingLR", | ||
"base_lr": 1e-5, | ||
"max_lr": 1e-3 | ||
} | ||
}, | ||
"balance_samples": {"applied": false, "type": "gt"} | ||
}, | ||
"default_model": { | ||
"name": "Unet", | ||
"dropout_rate": 0.3, | ||
"bn_momentum": 0.1, | ||
"final_activation": "sigmoid", | ||
"is_2d": false, | ||
"depth": 6 | ||
}, | ||
"Modified3DUNet": { | ||
"applied": true, | ||
"length_3D": [176, 144, 64], | ||
"stride_3D": [88, 72, 32], | ||
"attention": false, | ||
"n_filters": 4 | ||
}, | ||
"uncertainty": { | ||
"epistemic": false, | ||
"aleatoric": false, | ||
"n_it": 0 | ||
}, | ||
"postprocessing": { | ||
"binarize_prediction": {"thr": 0.5}, | ||
"uncertainty": {"thr": -1, "suffix": "_unc-vox.nii.gz"} | ||
}, | ||
"evaluation_parameters": {}, | ||
"transformation": { | ||
"Resample": { | ||
"wspace": 0.5, | ||
"hspace": 0.5, | ||
"dspace": 1 | ||
}, | ||
"CenterCrop": { | ||
"size": [176, 144, 64] | ||
}, | ||
"RandomAffine": { | ||
"degrees": 10, | ||
"scale": [0.3, 0.3, 0.3], | ||
"translate": [0.1, 0.1, 0.1], | ||
"applied_to": ["im", "gt"], | ||
"dataset_type": ["training"] | ||
}, | ||
"ElasticTransform": { | ||
"alpha_range": [25.0, 35.0], | ||
"sigma_range": [3.5, 4.5], | ||
"p": 0.5, | ||
"applied_to": ["im", "gt"], | ||
"dataset_type": ["training"] | ||
}, | ||
"RandomReverse": { | ||
"applied_to": ["im", "gt"], | ||
"dataset_type": ["training"] | ||
}, | ||
"RandomGamma": { | ||
"log_gamma_range": [-1.5, 1.5], | ||
"p": 0.5, | ||
"applied_to": ["im"], | ||
"dataset_type": ["training"] | ||
}, | ||
"RandomBiasField": { | ||
"coefficients": 0.5, | ||
"order": 3, | ||
"p": 0.3, | ||
"applied_to": ["im"], | ||
"dataset_type": ["training"] | ||
}, | ||
"RandomBlur": { | ||
"sigma_range": [0.0, 1.0], | ||
"p": 0.3, | ||
"applied_to": ["im"], | ||
"dataset_type": ["training"] | ||
}, | ||
"NumpyToTensor": {}, | ||
"NormalizeInstance": {"applied_to": ["im"]} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
#!/bin/bash | ||
# | ||
# Preprocess data. | ||
# | ||
# Dependencies (versions): | ||
# - SCT (5.4.0) | ||
# | ||
# Usage: | ||
# sct_run_batch -script preprocess_data.sh -path-data <PATH-TO-DATASET> -path-output <PATH-TO-OUTPUT> -jobs <num-cpu-cores> | ||
|
||
# Manual segmentations or labels should be located under: | ||
# PATH_DATA/derivatives/labels/SUBJECT/anat/ | ||
|
||
# The following global variables are retrieved from the caller sct_run_batch | ||
# but could be overwritten by uncommenting the lines below: | ||
# PATH_DATA_PROCESSED="~/data_processed" | ||
# PATH_RESULTS="~/results" | ||
# PATH_LOG="~/log" | ||
# PATH_QC="~/qc" | ||
|
||
# Global variables | ||
CENTERLINE_METHOD="svm" # method sct_deepseg_sc uses for extracting the centerline: 'svm', 'cnn' | ||
|
||
|
||
# Uncomment for full verbose | ||
set -x | ||
|
||
# Immediately exit if error | ||
set -e -o pipefail | ||
|
||
# Exit if user presses CTRL+C (Linux) or CMD+C (OSX) | ||
trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT | ||
|
||
|
||
# CONVENIENCE FUNCTIONS | ||
# ====================================================================================================================== | ||
|
||
segment_if_does_not_exist() { | ||
### | ||
# This function checks if a manual spinal cord segmentation file already exists, then: | ||
# - If it does, copy it locally. | ||
# - If it doesn't, perform automatic spinal cord segmentation. | ||
# This allows you to add manual segmentations on a subject-by-subject basis without disrupting the pipeline. | ||
### | ||
local file="$1" | ||
local contrast="$2" | ||
local centerline_method="$3" | ||
# Update global variable with segmentation file name | ||
FILESEG="${file}_seg" | ||
FILESEGMANUAL="${PATH_DATA}/derivatives/labels/${SUBJECT}/anat/${FILESEG}-manual.nii.gz" | ||
echo | ||
echo "Looking for manual segmentation: $FILESEGMANUAL" | ||
if [[ -e $FILESEGMANUAL ]]; then | ||
echo "Found! Using manual segmentation." | ||
rsync -avzh $FILESEGMANUAL ${FILESEG}.nii.gz | ||
sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT} | ||
else | ||
echo "Not found. Proceeding with automatic segmentation." | ||
# Segment spinal cord based on the specified centerline method | ||
if [[ $centerline_method == "cnn" ]]; then | ||
sct_deepseg_sc -i ${file}.nii.gz -c $contrast -brain 1 -centerline cnn -qc ${PATH_QC} -qc-subject ${SUBJECT} | ||
elif [[ $centerline_method == "svm" ]]; then | ||
sct_deepseg_sc -i ${file}.nii.gz -c $contrast -centerline svm -qc ${PATH_QC} -qc-subject ${SUBJECT} | ||
else | ||
echo "Centerline extraction method = ${centerline_method} is not recognized!" | ||
exit 1 | ||
fi | ||
fi | ||
} | ||
|
||
# Retrieve input params and other params | ||
SUBJECT=$1 | ||
|
||
# get starting time: | ||
start=`date +%s` | ||
|
||
|
||
# SCRIPT STARTS HERE | ||
# ============================================================================== | ||
# Display useful info for the log, such as SCT version, RAM and CPU cores available | ||
sct_check_dependencies -short | ||
|
||
# Go to folder where data will be copied and processed | ||
cd $PATH_DATA_PROCESSED | ||
|
||
# Copy BIDS-required files to processed data folder (e.g. list of participants) | ||
if [[ ! -f "participants.tsv" ]]; then | ||
rsync -avzh $PATH_DATA/participants.tsv . | ||
fi | ||
if [[ ! -f "participants.json" ]]; then | ||
rsync -avzh $PATH_DATA/participants.json . | ||
fi | ||
if [[ ! -f "dataset_description.json" ]]; then | ||
rsync -avzh $PATH_DATA/dataset_description.json . | ||
fi | ||
if [[ ! -f "README" ]]; then | ||
rsync -avzh $PATH_DATA/README . | ||
fi | ||
|
||
# Copy source images | ||
# Note: we use '/./' in order to include the sub-folder 'ses-0X' | ||
rsync -Ravzh $PATH_DATA/./$SUBJECT . | ||
|
||
# Copy segmentation ground truths (GT) | ||
mkdir -p derivatives/labels | ||
rsync -Ravzh $PATH_DATA/derivatives/labels/./$SUBJECT derivatives/labels/. | ||
|
||
# Go to subject folder for source images | ||
cd ${SUBJECT}/anat | ||
|
||
# Define variables | ||
# We do a substitution '/' --> '_' in case there is a subfolder 'ses-0X/' | ||
#file="${SUBJECT//[\/]/_}" | ||
|
||
# Add suffix corresponding to contrast | ||
# file=${file}_T2w | ||
file="${SUBJECT}_T2w" | ||
|
||
# Make sure the image metadata is a valid JSON object | ||
if [[ ! -s ${file}.json ]]; then | ||
echo "{}" >> ${file}.json | ||
fi | ||
|
||
# Spinal cord segmentation using the T2w contrast | ||
segment_if_does_not_exist ${file} t2 ${CENTERLINE_METHOD} | ||
file_seg="${FILESEG}" | ||
|
||
# Dilate spinal cord mask | ||
sct_maths -i ${file_seg}.nii.gz -dilate 5 -shape ball -o ${file_seg}_dilate.nii.gz | ||
|
||
# Use dilated mask to crop the original image and manual MS segmentations | ||
sct_crop_image -i ${file}.nii.gz -m ${file_seg}_dilate.nii.gz -o ${file}_crop.nii.gz | ||
|
||
# Go to subject folder for segmentation GTs | ||
cd $PATH_DATA_PROCESSED/derivatives/labels/$SUBJECT/anat | ||
|
||
# Define variables | ||
file_gt="${file}_seg-manual" | ||
#file_gt="${file}_lesion-manual" | ||
|
||
|
||
# Redefine variable for final SC segmentation mask as path changed | ||
file_seg_dil=${PATH_DATA_PROCESSED}/${SUBJECT}/anat/${file_seg}_dilate | ||
|
||
# Make sure the first rater metadata is a valid JSON object | ||
if [[ ! -s ${file_gt}.json ]]; then | ||
echo "{}" >> ${file_gt}.json | ||
fi | ||
|
||
# Crop the manual seg | ||
sct_crop_image -i ${file_gt}.nii.gz -m ${file_seg_dil}.nii.gz -o ${file_gt}_crop.nii.gz | ||
|
||
# Go back to the root output path | ||
cd $PATH_OUTPUT | ||
|
||
# Create and populate clean data processed folder for training | ||
PATH_DATA_PROCESSED_CLEAN="${PATH_DATA_PROCESSED}_clean" | ||
|
||
# Copy over required BIDs files | ||
mkdir -p $PATH_DATA_PROCESSED_CLEAN $PATH_DATA_PROCESSED_CLEAN/${SUBJECT} $PATH_DATA_PROCESSED_CLEAN/${SUBJECT}/anat | ||
rsync -avzh $PATH_DATA_PROCESSED/dataset_description.json $PATH_DATA_PROCESSED_CLEAN/ | ||
rsync -avzh $PATH_DATA_PROCESSED/participants.* $PATH_DATA_PROCESSED_CLEAN/ | ||
rsync -avzh $PATH_DATA_PROCESSED/README $PATH_DATA_PROCESSED_CLEAN/ | ||
rsync -avzh $PATH_DATA_PROCESSED/dataset_description.json $PATH_DATA_PROCESSED_CLEAN/derivatives/ | ||
|
||
# For lesion segmentation task, copy SC crops as inputs and lesion annotations as targets | ||
rsync -avzh $PATH_DATA_PROCESSED/${SUBJECT}/anat/${file}_crop.nii.gz $PATH_DATA_PROCESSED_CLEAN/${SUBJECT}/anat/${file}.nii.gz | ||
rsync -avzh $PATH_DATA_PROCESSED/${SUBJECT}/anat/${file}.json $PATH_DATA_PROCESSED_CLEAN/${SUBJECT}/anat/${file}.json | ||
mkdir -p $PATH_DATA_PROCESSED_CLEAN/derivatives $PATH_DATA_PROCESSED_CLEAN/derivatives/labels $PATH_DATA_PROCESSED_CLEAN/derivatives/labels/${SUBJECT} $PATH_DATA_PROCESSED_CLEAN/derivatives/labels/${SUBJECT}/anat/ | ||
rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gt}_crop.nii.gz $PATH_DATA_PROCESSED_CLEAN/derivatives/labels/${SUBJECT}/anat/${file_gt}.nii.gz | ||
rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gt}.json $PATH_DATA_PROCESSED_CLEAN/derivatives/labels/${SUBJECT}/anat/${file_gt}.json | ||
|
||
# Display useful info for the log | ||
end=`date +%s` | ||
runtime=$((end-start)) | ||
echo | ||
echo "~~~" | ||
echo "SCT version: `sct_version`" | ||
echo "Ran on: `uname -nsr`" | ||
echo "Duration: $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec" | ||
echo "~~~" |