lanceur_scalar.slrm

#!/bin/bash
#SBATCH --array=1-NJOBS%1   # NJOBS is the number of jobs in the chain
#SBATCH --account=def-ngrisoua
#SBATCH --gres=gpu:1  # Request GPU "generic resources"
#SBATCH --cpus-per-task=1
#SBATCH --mem=MEMOG
#SBATCH --time=0-ZXZX:00  # DD-HH:MM:SS
#SBATCH --mail-user=yadayadaydada@yadayada.com
#SBATCH --mail-type=ALL

OUTDIR=/scratch/hannn/pix2pix-for-swot/$SLURM_JOB_NAME   
CODEDIR=/home/hannn/pix2pix-for-swot/p2p_tf2_hw
CMAP_INS=XXXX  # colormap for the (ssh_ins) (to be filled in from master.bash)
CMAP_COS=YYYY  # colormap for the (ssh_cos)

printf "\n%%%% Preparing virtualenv and data organisation %%%%\n"
module load python/3.8 
virtualenv --no-download $SLURM_TMPDIR/env
source $SLURM_TMPDIR/env/bin/activate
pip install --no-index tensorflow_gpu==2.3.0  psutil numpy==1.18.4


pip freeze > $OUTDIR/requirements.txt
# above is for documentation purposes, to know what is in the venv.

# Prepare data
tar xf $OUTDIR/data.tar -C $SLURM_TMPDIR/. #-xf means extract; -C means extract to 
#rm $OUTDIR/data.tar 

cd $SLURM_TMPDIR

input_all=$SLURM_TMPDIR # images to train on
output_dir=$OUTDIR # folder to save trained model 

printf "\n\n %%%% Training Phase %%%% \n\n"
tensorboard --logdir=/tmp/logs/testtraining/ --host 0.0.0.0 &  
 python $CODEDIR/ZZZZ \
       --input_dir $input_all \
	   --output_dir $output_dir \
       --max_epochs XYXY \
       --batch_size 1 \
	   --LAMBDA LLLL \
	   --n_layers NLNL

exit