Skip to content

Commit

Permalink
Merge pull request #20 from ivadomed/ks/sci-col
Browse files Browse the repository at this point in the history
Add preprocessing and config files of the sci-colorado dataset
  • Loading branch information
naga-karthik authored Apr 29, 2022
2 parents c709e2e + 056cbd7 commit 0fb71bb
Show file tree
Hide file tree
Showing 2 changed files with 324 additions and 0 deletions.
143 changes: 143 additions & 0 deletions config/config_sci-colorado.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
{
"command": "train",
"gpu_ids": [2],
"path_output": "les_out",
"model_name": "ModifiedUnet3d_singleContrast",
"debugging": true,
"object_detection_params": {
"object_detection_path": null,
"safety_factor": [1.0, 1.0, 1.0]
},
"wandb": {
"wandb_api_key": "",
"project_name": "sci-col",
"group_name": "lesion",
"run_name": "ID1",
"log_grads_every": 100
},
"loader_parameters": {
"path_data": ["sci-col_lesion_prep_clean_no-json"],
"subject_selection:": {"n": [], "metadata": [], "value": []},
"target_suffix": ["_lesion-manual"],
"extensions": [".nii.gz"],
"roi_params": {
"suffix": null,
"slice_filter_roi": null
},
"contrast_params": {
"training_validation": ["T2w"],
"testing": ["T2w"],
"balance": {}
},
"slice_filter_params": {
"filter_empty_mask": false,
"filter_empty_input": false
},
"slice_axis": "axial",
"multichannel": false,
"soft_gt": false
},
"split_dataset": {
"fname_split": null,
"random_seed": 42,
"split_method" : "participant_id",
"data_testing": {"data_type": null, "data_value":[]},
"balance": null,
"train_fraction": 0.7,
"test_fraction": 0.2
},
"training_parameters": {
"batch_size": 8,
"loss": {
"name": "DiceLoss"
},
"training_time": {
"num_epochs": 300,
"early_stopping_patience": 100,
"early_stopping_epsilon": 0.001
},
"scheduler": {
"initial_lr": 1e-3,
"lr_scheduler": {
"name": "CosineAnnealingLR",
"base_lr": 1e-5,
"max_lr": 1e-3
}
},
"balance_samples": {"applied": false, "type": "gt"}
},
"default_model": {
"name": "Unet",
"dropout_rate": 0.3,
"bn_momentum": 0.1,
"final_activation": "sigmoid",
"is_2d": false,
"depth": 6
},
"Modified3DUNet": {
"applied": true,
"length_3D": [176, 144, 64],
"stride_3D": [88, 72, 32],
"attention": false,
"n_filters": 4
},
"uncertainty": {
"epistemic": false,
"aleatoric": false,
"n_it": 0
},
"postprocessing": {
"binarize_prediction": {"thr": 0.5},
"uncertainty": {"thr": -1, "suffix": "_unc-vox.nii.gz"}
},
"evaluation_parameters": {},
"transformation": {
"Resample": {
"wspace": 0.5,
"hspace": 0.5,
"dspace": 1
},
"CenterCrop": {
"size": [176, 144, 64]
},
"RandomAffine": {
"degrees": 10,
"scale": [0.3, 0.3, 0.3],
"translate": [0.1, 0.1, 0.1],
"applied_to": ["im", "gt"],
"dataset_type": ["training"]
},
"ElasticTransform": {
"alpha_range": [25.0, 35.0],
"sigma_range": [3.5, 4.5],
"p": 0.5,
"applied_to": ["im", "gt"],
"dataset_type": ["training"]
},
"RandomReverse": {
"applied_to": ["im", "gt"],
"dataset_type": ["training"]
},
"RandomGamma": {
"log_gamma_range": [-1.5, 1.5],
"p": 0.5,
"applied_to": ["im"],
"dataset_type": ["training"]
},
"RandomBiasField": {
"coefficients": 0.5,
"order": 3,
"p": 0.3,
"applied_to": ["im"],
"dataset_type": ["training"]
},
"RandomBlur": {
"sigma_range": [0.0, 1.0],
"p": 0.3,
"applied_to": ["im"],
"dataset_type": ["training"]
},
"NumpyToTensor": {},
"NormalizeInstance": {"applied_to": ["im"]}
}
}
181 changes: 181 additions & 0 deletions preprocessing/preprocess_data_sci-colorado.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#!/bin/bash
#
# Preprocess data.
#
# Dependencies (versions):
# - SCT (5.4.0)
#
# Usage:
# sct_run_batch -script preprocess_data.sh -path-data <PATH-TO-DATASET> -path-output <PATH-TO-OUTPUT> -jobs <num-cpu-cores>

# Manual segmentations or labels should be located under:
# PATH_DATA/derivatives/labels/SUBJECT/anat/

# The following global variables are retrieved from the caller sct_run_batch
# but could be overwritten by uncommenting the lines below:
# PATH_DATA_PROCESSED="~/data_processed"
# PATH_RESULTS="~/results"
# PATH_LOG="~/log"
# PATH_QC="~/qc"

# Global variables
CENTERLINE_METHOD="svm" # method sct_deepseg_sc uses for extracting the centerline: 'svm', 'cnn'


# Uncomment for full verbose
set -x

# Immediately exit if error
set -e -o pipefail

# Exit if user presses CTRL+C (Linux) or CMD+C (OSX)
trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT


# CONVENIENCE FUNCTIONS
# ======================================================================================================================

segment_if_does_not_exist() {
###
# This function checks if a manual spinal cord segmentation file already exists, then:
# - If it does, copy it locally.
# - If it doesn't, perform automatic spinal cord segmentation.
# This allows you to add manual segmentations on a subject-by-subject basis without disrupting the pipeline.
###
local file="$1"
local contrast="$2"
local centerline_method="$3"
# Update global variable with segmentation file name
FILESEG="${file}_seg"
FILESEGMANUAL="${PATH_DATA}/derivatives/labels/${SUBJECT}/anat/${FILESEG}-manual.nii.gz"
echo
echo "Looking for manual segmentation: $FILESEGMANUAL"
if [[ -e $FILESEGMANUAL ]]; then
echo "Found! Using manual segmentation."
rsync -avzh $FILESEGMANUAL ${FILESEG}.nii.gz
sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT}
else
echo "Not found. Proceeding with automatic segmentation."
# Segment spinal cord based on the specified centerline method
if [[ $centerline_method == "cnn" ]]; then
sct_deepseg_sc -i ${file}.nii.gz -c $contrast -brain 1 -centerline cnn -qc ${PATH_QC} -qc-subject ${SUBJECT}
elif [[ $centerline_method == "svm" ]]; then
sct_deepseg_sc -i ${file}.nii.gz -c $contrast -centerline svm -qc ${PATH_QC} -qc-subject ${SUBJECT}
else
echo "Centerline extraction method = ${centerline_method} is not recognized!"
exit 1
fi
fi
}

# Retrieve input params and other params
SUBJECT=$1

# get starting time:
start=`date +%s`


# SCRIPT STARTS HERE
# ==============================================================================
# Display useful info for the log, such as SCT version, RAM and CPU cores available
sct_check_dependencies -short

# Go to folder where data will be copied and processed
cd $PATH_DATA_PROCESSED

# Copy BIDS-required files to processed data folder (e.g. list of participants)
if [[ ! -f "participants.tsv" ]]; then
rsync -avzh $PATH_DATA/participants.tsv .
fi
if [[ ! -f "participants.json" ]]; then
rsync -avzh $PATH_DATA/participants.json .
fi
if [[ ! -f "dataset_description.json" ]]; then
rsync -avzh $PATH_DATA/dataset_description.json .
fi
if [[ ! -f "README" ]]; then
rsync -avzh $PATH_DATA/README .
fi

# Copy source images
# Note: we use '/./' in order to include the sub-folder 'ses-0X'
rsync -Ravzh $PATH_DATA/./$SUBJECT .

# Copy segmentation ground truths (GT)
mkdir -p derivatives/labels
rsync -Ravzh $PATH_DATA/derivatives/labels/./$SUBJECT derivatives/labels/.

# Go to subject folder for source images
cd ${SUBJECT}/anat

# Define variables
# We do a substitution '/' --> '_' in case there is a subfolder 'ses-0X/'
#file="${SUBJECT//[\/]/_}"

# Add suffix corresponding to contrast
# file=${file}_T2w
file="${SUBJECT}_T2w"

# Make sure the image metadata is a valid JSON object
if [[ ! -s ${file}.json ]]; then
echo "{}" >> ${file}.json
fi

# Spinal cord segmentation using the T2w contrast
segment_if_does_not_exist ${file} t2 ${CENTERLINE_METHOD}
file_seg="${FILESEG}"

# Dilate spinal cord mask
sct_maths -i ${file_seg}.nii.gz -dilate 5 -shape ball -o ${file_seg}_dilate.nii.gz

# Use dilated mask to crop the original image and manual MS segmentations
sct_crop_image -i ${file}.nii.gz -m ${file_seg}_dilate.nii.gz -o ${file}_crop.nii.gz

# Go to subject folder for segmentation GTs
cd $PATH_DATA_PROCESSED/derivatives/labels/$SUBJECT/anat

# Define variables
file_gt="${file}_seg-manual"
#file_gt="${file}_lesion-manual"


# Redefine variable for final SC segmentation mask as path changed
file_seg_dil=${PATH_DATA_PROCESSED}/${SUBJECT}/anat/${file_seg}_dilate

# Make sure the first rater metadata is a valid JSON object
if [[ ! -s ${file_gt}.json ]]; then
echo "{}" >> ${file_gt}.json
fi

# Crop the manual seg
sct_crop_image -i ${file_gt}.nii.gz -m ${file_seg_dil}.nii.gz -o ${file_gt}_crop.nii.gz

# Go back to the root output path
cd $PATH_OUTPUT

# Create and populate clean data processed folder for training
PATH_DATA_PROCESSED_CLEAN="${PATH_DATA_PROCESSED}_clean"

# Copy over required BIDs files
mkdir -p $PATH_DATA_PROCESSED_CLEAN $PATH_DATA_PROCESSED_CLEAN/${SUBJECT} $PATH_DATA_PROCESSED_CLEAN/${SUBJECT}/anat
rsync -avzh $PATH_DATA_PROCESSED/dataset_description.json $PATH_DATA_PROCESSED_CLEAN/
rsync -avzh $PATH_DATA_PROCESSED/participants.* $PATH_DATA_PROCESSED_CLEAN/
rsync -avzh $PATH_DATA_PROCESSED/README $PATH_DATA_PROCESSED_CLEAN/
rsync -avzh $PATH_DATA_PROCESSED/dataset_description.json $PATH_DATA_PROCESSED_CLEAN/derivatives/

# For lesion segmentation task, copy SC crops as inputs and lesion annotations as targets
rsync -avzh $PATH_DATA_PROCESSED/${SUBJECT}/anat/${file}_crop.nii.gz $PATH_DATA_PROCESSED_CLEAN/${SUBJECT}/anat/${file}.nii.gz
rsync -avzh $PATH_DATA_PROCESSED/${SUBJECT}/anat/${file}.json $PATH_DATA_PROCESSED_CLEAN/${SUBJECT}/anat/${file}.json
mkdir -p $PATH_DATA_PROCESSED_CLEAN/derivatives $PATH_DATA_PROCESSED_CLEAN/derivatives/labels $PATH_DATA_PROCESSED_CLEAN/derivatives/labels/${SUBJECT} $PATH_DATA_PROCESSED_CLEAN/derivatives/labels/${SUBJECT}/anat/
rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gt}_crop.nii.gz $PATH_DATA_PROCESSED_CLEAN/derivatives/labels/${SUBJECT}/anat/${file_gt}.nii.gz
rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gt}.json $PATH_DATA_PROCESSED_CLEAN/derivatives/labels/${SUBJECT}/anat/${file_gt}.json

# Display useful info for the log
end=`date +%s`
runtime=$((end-start))
echo
echo "~~~"
echo "SCT version: `sct_version`"
echo "Ran on: `uname -nsr`"
echo "Duration: $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec"
echo "~~~"

0 comments on commit 0fb71bb

Please sign in to comment.