-
Notifications
You must be signed in to change notification settings - Fork 0
/
07_foldseek_easy-search.sh
134 lines (114 loc) · 4.86 KB
/
07_foldseek_easy-search.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/bin/bash -l
# Use the current working directory
#SBATCH -D ./
# Use the current environment for this job.
#SBATCH --export=ALL
# Define job name
#SBATCH -J foldseek
# Define a standard output file. When the job is running, %u will be replaced by user name,
# %N will be replaced by the name of the node that runs the batch script, and %j will be replaced by job id number.
#SBATCH -o foldseek.%u.%N.%j.out
# Define a standard error file
#SBATCH -e foldseek.%u.%N.%j.err
# Request the partition
#SBATCH -p nodes
# Request the number of nodes
#SBATCH -N 1
# Request the number of cores
#SBATCH -n 20
# Specify time limit in format a-bb:cc:dd, where a is days, b is hours, c is minutes, and d is seconds.
#SBATCH -t 1-00:00:00
# Request the memory on the node or request memory per core
# PLEASE don't set the memory option as we should use the default memory which is based on the number of cores
##SBATCH --mem-per-cpu=9000M
# Insert your own username to get e-mail notifications (note: keep just one "#" before SBATCH)
##SBATCH --mail-user=username@liverpool.ac.uk
# Notify user by email when certain event types occur
#SBATCH --mail-type=ALL
#
# Set the maximum stack size to unlimited
ulimit -s unlimited
# Set OpenMP thread number
export OMP_NUM_THREADS=$SLURM_NTASKS
# Load your own modules
module purge
# List all modules
module list
#
#
echo =========================================================
echo SLURM job: submitted date = `date`
date_start=`date +%s`
echo -------------
echo Job output begins
echo -----------------
echo
hostname
echo "Print the following environmetal variables:"
echo "Job name : $SLURM_JOB_NAME"
echo "Job ID : $SLURM_JOB_ID"
echo "Job user : $SLURM_JOB_USER"
echo "Job array index : $SLURM_ARRAY_TASK_ID"
echo "Submit directory : $SLURM_SUBMIT_DIR"
echo "Temporary directory : $TMPDIR"
echo "Submit host : $SLURM_SUBMIT_HOST"
echo "Queue/Partition name : $SLURM_JOB_PARTITION"
echo "Node list : $SLURM_JOB_NODELIST"
echo "Hostname of 1st node : $HOSTNAME"
echo "Number of nodes allocated : $SLURM_JOB_NUM_NODES or $SLURM_NNODES"
echo "Number of processes : $SLURM_NTASKS"
echo "Number of processes per node : $SLURM_TASKS_PER_NODE"
echo "Requested tasks per node : $SLURM_NTASKS_PER_NODE"
echo "Requested CPUs per task : $SLURM_CPUS_PER_TASK"
echo "Scheduling priority : $SLURM_PRIO_PROCESS"
echo
echo "Running job:"
echo
echo =========================================================
# Foldseek easy-search script to perform all vs all comparisons for unique genes and core genes
# Exhaustive search (don't pre-filter list)
# Single use e.g. foldseek easy-search ./pdbs/unique_genes/FOXG_06911.pdb /pdbs/unique_genes/ result_FOXG_06911.m8 tmp --format-output "query,target,alntmscore,lddt,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits" --exhaustive-search
# See file './code/foldseek_testing.sh' for more usage examples
# Tutorial available @ https://github.com/steineggerlab/foldseek
# Create directories
mkdir -p ./outputs/structural_alignment/unique_genes
mkdir -p ./outputs/structural_alignment/core_genes
# Unique genes
for i in ./pdbs/unique_genes/*.pdb; do
NAME=$(basename ${i} .pdb)
echo "Querying ${NAME}.pdb for structural homology"
foldseek easy-search \
./pdbs/unique_genes/$NAME.pdb \
./pdbs/unique_genes/ result_$NAME.m8 tmp \
--format-output "query,target,alntmscore,lddt,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits" \
--exhaustive-search
mv result*.m8 ./outputs/structural_alignment/unique_genes
done
# Core genes
for i in ./pdbs/core_genes/*.pdb; do
NAME=$(basename ${i} .pdb)
echo "Querying ${NAME}.pdb for structural homology"
foldseek easy-search \
./pdbs/unique_genes/$NAME.pdb \
./pdbs/unique_genes/ result_$NAME.m8 tmp \
--format-output "query,target,alntmscore,lddt,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits" \
--exhaustive-search
mv result*.m8 ./outputs/structural_alignment/core_genes
done
echo =========================================================
# the ret flag is the return code, so you can spot easily if your code failed.
ret=$?
echo
echo ---------------
echo Job output ends
date_end=`date +%s`
seconds=$((date_end-date_start))
minutes=$((seconds/60))
seconds=$((seconds-60*minutes))
hours=$((minutes/60))
minutes=$((minutes-60*hours))
echo =========================================================
echo SLURM job: finished date = `date`
echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
echo =========================================================
exit $ret