-
Notifications
You must be signed in to change notification settings - Fork 1
/
GEObuilder.sh
746 lines (540 loc) · 22.6 KB
/
GEObuilder.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
#!/bin/bash
##########################################################################
# Copyright 2017, Jelena Telenius (jelena.telenius@imm.ox.ac.uk) #
# #
# This file is part of GEObuilder . #
# #
# GEObuilder is free software: you can redistribute it and/or modify #
# it under the terms of the MIT license.
#
#
# #
# GEObuilder is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# MIT license for more details.
# #
# You should have received a copy of the MIT license
# along with GEObuilder.
##########################################################################
printRunStartArrays(){
echo
echo "Ready to run ! - here printout of main for loop parameters : "
echo
for k in $( seq 0 $((${#nameList[@]} - 1)) ); do
echo "nameList[$k] ${nameList[$k]}"
done
echo
for k in $( seq 0 $((${#fileList1[@]} - 1)) ); do
echo "fileList1[$k] ${fileList1[$k]}"
done
echo
# The "processedfile" does not have 3rd column - ever.
if [ "${runtype}" == "FASTQ" ] ; then
if [ "${SINGLE_END}" -eq 0 ] ; then
for k in $( seq 0 $((${#fileList2[@]} - 1)) ); do
echo "fileList2[$k] ${fileList2[$k]}"
done
echo
fi
fi
}
# ------------------------------------------
LANES=1
GZIP=0
SINGLE_END=0
PACKED_PROCESSED_FILES=1
WINDOW=0
AUTOGENERATE=1
ONLYGENERATEPARAMS=0
TARRING=1
PYRAMIDPIPE=0
timestamp=$( date +%d%b%Y_%H_%M )
#------------------------------------------
# Help requests ..
if [ $# -eq 1 ]
then
if [ $@ == "-h" ] || [ $@ == "--help" ]
then
PipeTopPath="$( which $0 | sed 's/\/GEObuilder.sh$//' )"
BashHelpersPath="${PipeTopPath}/bashHelpers"
. ${BashHelpersPath}/usageAndVersion.sh
usage
exit 0
fi
fi
#------------------------------------------
echo "GEObuilder.sh - by Jelena Telenius, 14/02/2017"
echo
timepoint=$( date )
echo "run started : ${timepoint}"
echo
echo "Script located at"
which $0
echo
echo "RUNNING IN MACHINE : "
hostname --long
echo "run called with parameters :"
echo "GEObuilder.sh" $@
echo
# For making sure we know where we are ..
weAreHere=$( pwd )
#------------------------------------------
# Loading subroutines in ..
echo "Loading subroutines in .."
PipeTopPath="$( which $0 | sed 's/\/GEObuilder.sh$//' )"
BashHelpersPath="${PipeTopPath}/bashHelpers"
# AUTOGENERATING PARAMETER FILES (the main enabler of the NGseqBasic auto-GEO)
. ${BashHelpersPath}/autogenerate.sh
# READING THE AUTOGENERATED / MANUALLY GIVEN PARAMETER FILES IN, also integrity tester subroutine for the GEO_processedFilePaths.txt
. ${BashHelpersPath}/parameterFileReaders.sh
# TEST THE FASTQ PARAMETER FILES FOR INCONSISTENCIES (pyramid VS004 17Feb2017 copied subroutines - only testing, no generating or parsing)
. ${BashHelpersPath}/fastqChecksFromPyramid.sh
# LOADING FASTQS AND COMBINING LANES (NGseqBasic style - basic subroutines)
. ${BashHelpersPath}/inputFastqs.sh
# PRINTING HELP AND VERSION MESSAGES
. ${BashHelpersPath}/usageAndVersion.sh
# PRINTING TO LOG AND ERROR FILES
. ${BashHelpersPath}/logFilePrinter.sh
# TEST THE EXISTENCE OF INPUT FILES
. ${BashHelpersPath}/fileTesters.sh
#------------------------------------------
echo
echo "PipeTopPath ${PipeTopPath}"
echo "BashHelpersPath ${BashHelpersPath}"
echo
#------------------------------------------
OPTS=`getopt -o h --long help,unpackedProcessedFiles,onlyParamFiles,window,noTarring,pyramid -- "$@"`
if [ $? != 0 ]
then
usage ;
fi
eval set -- "$OPTS"
while true ; do
case "$1" in
-h) usage ; shift;;
--help) usage ; shift;;
--unpackedProcessedFiles) PACKED_PROCESSED_FILES=0 ; shift;;
--onlyParamFiles) ONLYGENERATEPARAMS=1 ; shift;;
--noTarring) TARRING=0 ; shift;;
--window) WINDOW=1 ; shift;;
--pyramid) PYRAMIDPIPE=1 ; shift;;
--) shift; break;;
esac
done
#--------Generating-the-parameter-files-for-the-subscripts------------------------------------------------------
if [ ! -s "./GEO_piperun.txt" ] && [ ! -s "./GEO_fastqPaths.txt" ] && [ ! -s "./GEO_processedFilePaths.txt" ] && [ ! -s "./GEO_fastqFileType.txt" ]; then
echo >&2
echo "No parameter files given ( GEO_piperun.txt GEO_fastqPaths.txt GEO_processedFilePaths.txt GEO_fastqFileType.txt ) - GEO generation aborted" >&2
echo >&2
echo "Usage instructions available with :" >&2
echo "GEObuilder.sh --help " >&2
echo >&2
exit 1
fi
if [ -s "./GEO_piperun.txt" ];then
if [ -s "./GEO_fastqPaths.txt" ] || [ -s "./GEO_processedFilePaths.txt" ] || [ -s "./GEO_fastqFileType.txt" ]; then
echo >&2
echo "Give either ONLY (a) GEO_piperun.txt ,OR SOME/ALL OF THE (b) GEO_fastqPaths.txt GEO_fastqFileType.txt GEO_processedFilePaths.txt - GEO generation aborted" >&2
echo >&2
echo "Usage instructions available with :" >&2
echo "GEObuilder.sh --help " >&2
echo >&2
exit 1
fi
fi
if [ ! -s "./GEO_piperun.txt" ] ; then
echo
echo "No parameter file GEO_piperun.txt provided - turning parameter file autogeneration off !"
echo
AUTOGENERATE=0
else
echo
echo "GEO_piperun.txt file found - will AUTO-GENERATE the needed parameter files from pipeline output !"
echo
fi
if [ -s "./GEO_fastqPaths.txt" ] && [ ! -s "./GEO_fastqFileType.txt" ] ; then
echo >&2
echo "Custom-parameter file GEO_fastqPaths.txt needs the parameter file GEO_fastqFileType.txt ! - GEO generation aborted" >&2
echo >&2
echo "Usage instructions available with :" >&2
echo "GEObuilder.sh --help " >&2
echo >&2
exit 1
fi
#---------------------------------------------------------
# Here parsing the parameter files - if they are not purely tab-limited, but partially space-limited, or multiple-tab limited, this fixes it.
echo
echo "PARAMETER FILES GIVEN IN RUN FOLDER :"
echo
for file in ./GEO*.txt
do
echo ${file}
sed -i 's/\s\s*/\t/g' ${file}
done
if [ -s "./GEO_fastqFileType.tx" ] ; then
echo ./GEO_fastqFileType.txt
sed -i 's/\s\s*/\t/g' GEO_fastqFileType.txt
fi
#---------------------------------------------------------
echo
echo "Run with parameters :"
echo ""
if [ "${AUTOGENERATE}" -eq 0 ] ; then
echo "PACKED_PROCESSED_FILES ${PACKED_PROCESSED_FILES} (TRUE=1, FALSE=0) - zero if processed data files are NOT in binary format (i.e. not bw, bb, bam etc) "
else
echo "WINDOW ${WINDOW} (TRUE=1, FALSE=0) - zero (0) if we want to fetch the filtered_pileup.bw, one (1) if we want the filtered_window.bw instead."
fi
echo
#---------------------------------------------------------
# Here autogenerating ..
if [ "${AUTOGENERATE}" -ne 0 ] ; then
# pwd >&2
cd ${weAreHere}
# pwd >&2
autogenerate
if [ ! -d USED_parameterFiles ]; then
mkdir USED_parameterFiles
fi
mkdir USED_parameterFiles/Run_at_${timestamp}
cp GEO_piperun.txt USED_parameterFiles/Run_at_${timestamp}/.
# Early exit if user only wants the param files
if [ "${ONLYGENERATEPARAMS}" -eq 1 ]; then
mv -f GEO_piperun.txt USED_GEO_piperun_${timestamp}.txt
echo
echo "Only parameter file generation requested, not proceeding further !"
echo
echo "All done !"
echo
exit 0
fi
fi
#---------------------------------------------------------
# Now our parameter files should look like this :
# If our GEO_piperun.txt was like this :
# run1 /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/1_runPipe (does not exist really)
# run2 /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe (does exist really)
# Our parameter files will look like this :
# GEO_fastqPaths.txt
# run1_A11 /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A011_S1_L001_R1_001.fastq /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A011_S1_L001_R2_001.fastq
# run1_A12 /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A012_S2_L001_R1_001.fastq /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A012_S2_L001_R2_001.fastq
# run1_A13 /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A013_S3_L001_R1_001.fastq /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A013_S3_L001_R2_001.fastq
# run1_A14 /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A014_S4_L001_R1_001.fastq /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A014_S4_L001_R2_001.fastq
# run2_A15 /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A015_S5_L001_R1_001.fastq /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A015_S5_L001_R2_001.fastq
# run2_A16 /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A016_S6_L001_R1_001.fastq /t1-data1/WTSA_Dev/jhughes/OffTarget_Sachith/A016_S6_L001_R2_001.fastq
# GEO_fastqFileType.txt
# SINGLE_END 0
# GZIP 1
# LANES 2
# GEO_processedFilePaths.txt (if the non-windowed files were used)
# run1_A11_hg38_filtered_pileup /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A11/hg38/BigWigs/filtered_pileup.bw
# run1_A12_hg38_filtered_pileup /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A12/hg38/BigWigs/filtered_pileup.bw
# run1_A13_hg38_filtered_pileup /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A13/hg38/BigWigs/filtered_pileup.bw
# run1_A14_hg38_filtered_pileup /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A14/hg38/BigWigs/filtered_pileup.bw
# run2_A15_hg38_filtered_pileup /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A15/hg38/BigWigs/filtered_pileup.bw
# run2_A16_hg38_filtered_pileup /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A16/hg38/BigWigs/filtered_pileup.bw
# Note the clever edit of the end of the path to the sample name :
# A11/hg38/BigWigs/filtered_pileup.bw --> run1_A11_hg38_filtered_pileup
# GEO_processedFilePaths.txt (if the windowed files were used)
# run1_A11_hg38_filtered_window /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A11/hg38/BigWigs/filtered_window.bw
# run1_A12_hg38_filtered_window /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A12/hg38/BigWigs/filtered_window.bw
# run1_A13_hg38_filtered_window /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A13/hg38/BigWigs/filtered_window.bw
# run1_A14_hg38_filtered_window /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A14/hg38/BigWigs/filtered_window.bw
# run2_A15_hg38_filtered_window /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A15/hg38/BigWigs/filtered_window.bw
# run2_A16_hg38_filtered_window /t1-data1/WTSA_Dev/telenius/runsAndAnalysis/Sacith_offTarget/offTargetCapture/jimBowtie2_081116/2_runPipe/A16/hg38/BigWigs/filtered_window.bw
# Note the clever edit of the end of the path to the sample name :
# A11/hg38/BigWigs/filtered_window.bw --> run1_A11_hg38_filtered_window
# ##########################################################################################
# Parameter file existence tests ..
# ##########################################################################################
parameterFilesFound=1
rm -f PARAMETERFILE_EXISTENCE.err
if [ ! -s "./GEO_fastqPaths.txt" ] ; then
parameterFilesFound=0
echo "GEO_fastqPaths.txt file missing or empty file !" >> PARAMETERFILE_EXISTENCE.err
fi
if [ ! -s "./GEO_processedFilePaths.txt" ] ; then
parameterFilesFound=0
echo "GEO_processedFilePaths.txt file missing or empty file !" >> PARAMETERFILE_EXISTENCE.err
fi
if [ ! -s "./GEO_fastqFileType.txt" ] ; then
parameterFilesFound=0
echo "GEO_fastqFileType.txt file missing or empty file !" >> PARAMETERFILE_EXISTENCE.err
fi
parametersFound=1
rm -f PARAMETER_FILE.err
if [ -s "./GEO_fastqFileType.txt" ] ; then
if [ $(cat GEO_fastqFileType.txt | grep -c '^LANES\s') -ne 1 ]; then
echo "LANES parameter missing or wrongly given in GEO_fastqFileType.txt!" >> PARAMETER_FILE.err
parametersFound=0
fi
if [ $(cat GEO_fastqFileType.txt | grep -c '^GZIP\s') -ne 1 ]; then
echo "GZIP parameter missing or wrongly given in GEO_fastqFileType.txt!" >> PARAMETER_FILE.err
parametersFound=0
fi
if [ $(cat GEO_fastqFileType.txt | grep -c '^SINGLE_END\s') -ne 1 ]; then
echo "SINGLE_END parameter missing or wrongly given in GEO_fastqFileType.txt!" >> PARAMETER_FILE.err
parametersFound=0
fi
if [ "${parametersFound}" -eq 1 ]; then
TEMPcount=$(($( cat GEO_fastqFileType.txt | grep '^LANES\s' | cut -f 2 )))
if [ ${TEMPcount} -lt 1 ] ; then
echo "LANES parameter missing or wrongly given in GEO_fastqFileType.txt!" >> PARAMETER_FILE.err
parametersFound=0
fi
TEMPcount=$(($( cat GEO_fastqFileType.txt | grep '^GZIP\s' | cut -f 2 )))
if [ ${TEMPcount} -ne 1 ] && [ ${TEMPcount} -ne 0 ]; then
echo "GZIP parameter missing or wrongly given in GEO_fastqFileType.txt!" >> PARAMETER_FILE.err
parametersFound=0
fi
TEMPcount=$(($( cat GEO_fastqFileType.txt | grep '^SINGLE_END\s' | cut -f 2 )))
if [ ${TEMPcount} -ne 1 ] && [ ${TEMPcount} -ne 0 ]; then
echo "SINGLE_END parameter missing or wrongly given in GEO_fastqFileType.txt!" >> PARAMETER_FILE.err
parametersFound=0
fi
fi
fi
#--------Crashing-if-needed---------------------------------------------------------------------
if [ "${parameterFilesFound}" -eq 0 ] || [ "${parametersFound}" -eq 0 ]; then
printThis="Run crashed - some parameter files were missing (were not given correctly, or autogeneration of them failed). Check the error log, and files PARAMETERFILE_EXISTENCE.err and PARAMETER_FILE.err! "
printToLogFile
printThis="You have to give ALL THREE : GEO_fastqPaths.txt GEO_processedFilePaths.txt GEO_fastqFileType.txt"
printToLogFile
printThis="or write GEO_piperun.txt in a correct manner, so that the above three can be made from the pipeline output."
printToLogFile
if [ -s "./PARAMETERFILE_EXISTENCE.err" ] ; then
cat PARAMETERFILE_EXISTENCE.err >&2
fi
if [ -s "./PARAMETER_FILE.err" ] ; then
cat PARAMETERFILE_EXISTENCE.err >&2
fi
exit 1
fi
# ##########################################################################################
# FIRST PART - PARAMETER FILE INTEGRITY TESTS (using pyramid-copied subroutines)
# ##########################################################################################
# Testing that parameter files make sense .
# If not, after tests crashing the run.
#--------THE-TEST-PARAMETER-FILE-LOOP-over-all-FASTQ-files------------------------------------------------------
fastqDataOK=0
if [ -s "./GEO_fastqPaths.txt" ] ; then
fastqDataOK=1
printThis="Found parameter file GEO_fastqPaths.txt - will check that the FASTQ parameters are fine .."
printNewChapterToLogFile
# pwd >&2
cd ${weAreHere}
# pwd >&2
# Test that we have uniq lines, uniq files, uniq lanes, etc, here ..
# ( using PYRAMID VS004 copied 17Feb2017 subroutines )
# The divideFastqFilenames needs file ../PIPE_fastqPaths.txt to read in (this has rep column as 2nd column)..
rm -f PIPE_fastqPaths.txt
cut -f 1 GEO_fastqPaths.txt > TEMPcol1.txt
# Generate the rep column by repeating name column.
paste TEMPcol1.txt GEO_fastqPaths.txt > PIPE_fastqPaths.txt
rm -f TEMPcol1.txt
checkFastqFiles
rm -f PIPE_fastqPaths.txt
fi
# The above generates FASTQ_LOAD.err - checking for the existence of it is enough to see if it went wrong !
# Also - parameter value fastqDataOK=0 would tell the same.
#--------THE-LOOP-over-all-PROCESSED-files------------------------------------------------------
processedDataOK=0
if [ -s "./GEO_processedFilePaths.txt" ] ; then
processedDataOK=1
printThis="Found parameter file GEO_processedFilePaths.txt - will check that the PROCESSED DATA parameters are fine .."
printNewChapterToLogFile
# pwd >&2
cd ${weAreHere}
# pwd >&2
processedParameterFileTester
fi
# The above generates PROCESSEDfile_LOAD.err - checking for the existence of it is enough to see if it went wrong !
# Also - parameter value fastqDataOK=0 would tell the same.
#--------Crashing-if-needed---------------------------------------------------------------------
if [ "${fastqDataOK}" -eq 0 ] || [ "${processedDataOK}" -eq 0 ]; then
printThis="Run crashed - parameter files give wrong. Check the error log file, and output files FASTQ_LOAD.err and/or PROCESSEDfile_LOAD.err "
printToLogFile
if [ -s "./FASTQ_LOAD.err" ] ; then
cat FASTQ_LOAD.err >&2
fi
if [ -s "./PROCESSEDfile_LOAD.err" ] ; then
cat PROCESSEDfile_LOAD.err >&2
fi
exit 1
fi
# ##########################################################################################
# SECOND PART - RUNNING THE STUFF - NOW AS WE KNOW PARAMETER FILES ARE FINE ..
# ##########################################################################################
#--------THE-LOOP-over-all-FASTQ-files------------------------------------------------------
if [ -s "./GEO_fastqPaths.txt" ] ; then
printThis="Found parameter file GEO_fastqPaths.txt - will proceed with FASTQ file storing !"
printNewChapterToLogFile
# pwd >&2
cd ${weAreHere}
# pwd >&2
nameList=()
fileList1=()
fileList2=()
fastqParameterFileReader
# The above reads GEO_fastqPaths.txt
# And sets these :
# LISTS : nameList fileList1 fileList2
# PARAMS : ${SINGLE_END} ${LANES} ${GZIP}
runtype="FASTQ"
printRunStartArrays
if [ ! -d "FILES" ] ; then
mkdir FILES
fi
cd FILES
for (( i=0; i<=$(( ${#nameList[@]} -1 )); i++ ))
do
printThis="Starting GEO FASTQ storing for sample : ${nameList[$i]}"
printNewChapterToLogFile
pwd
pwd >&2
#Fetch FASTQ :
if [ "$LANES" -eq 1 ] ; then
# If we have single lane sequencing.
GEOfetchFastq
inspectFastq
else
# If we have MULTIPLE lanes from sequencing.
GEOfetchFastqMultilane
inspectFastqMultilane
fi
done
# Empty the lists - to not to meddle with the same ones used later.
unset nameList
cd ..
fi
#--------THE-LOOP-over-all-PROCESSED-files------------------------------------------------------
if [ -s "./GEO_processedFilePaths.txt" ] ; then
printThis="Found parameter file GEO_processedFilePaths.txt - will proceed with PROCESSED DATA file storing !"
printNewChapterToLogFile
# pwd >&2
cd ${weAreHere}
# pwd >&2
nameList=()
fileList1=()
processedParameterFileReader
# The above reads GEO_processedFilePaths.txt
# And sets these :
# LISTS : nameList fileList1
runtype="PROCESSED_FILE"
printRunStartArrays
if [ ! -d "FILES" ] ; then
mkdir FILES
fi
cd FILES
for (( i=0; i<=$(( ${#nameList[@]} -1 )); i++ ))
do
printThis="Starting GEO processed file storage for sample : ${nameList[$i]}"
printNewChapterToLogFile
pwd
pwd >&2
#Fetch processed files :
fileType=""
fileType=$( echo ${fileList1[$i]} | sed 's/\./%/' | sed 's/.*%//' )
GEOfetchProcessed
inspectProcessed
done
cd ..
# Empty the lists - to not to meddle with the same ones used later.
unset nameList
fi
# ----------------------------------------
# Finally, take md5sums from them all !
# pwd >&2
cd ${weAreHere}
# pwd >&2
if [ -d "FILES" ] ; then
printThis="Generating md5sums for the files .."
printNewChapterToLogFile
rm -f md5sums_of_files.txt
cd FILES
rm -f md5sums_of_files.txt
md5sum * > ../md5sums_of_files.txt
cd ..
mv -f md5sums_of_files.txt FILES/.
printThis="Done with the md5sums."
printToLogFile
fi
# ----------------------------------------
# Finally, packing them to a tar-ball !
# pwd >&2
cd ${weAreHere}
# pwd >&2
if [ "${TARRING}" -eq 1 ] && [ -d "FILES" ] ; then
printThis="Packing all to a tar-ball .."
printNewChapterToLogFile
rm -f FILES.tar
tar -cvf FILES.tar FILES
if [ $? -ne 0 ];then
echo "If the above error reads : 'tar: FILES: file changed as we read it'" >&2
echo "This is due to a HARMLESS error-logging bug in the TAR version of our system (GNU tar 1.23)" >&2
echo "The actual integrity of the TAR file is checked via md5summing all the constituent files, below." >&2
fi
md5sum FILES.tar > FILES_tar_md5sum.txt
printThis="Done with the tar-ball generation."
printToLogFile
printThis="Check that tar-ball is intact .."
printNewChapterToLogFile
rm -rf testTarball
mkdir testTarball
cp FILES.tar testTarball/.
cd testTarball
tar -xvf FILES.tar
md5sum FILES/* > newmd5sums.txt
cat newmd5sums.txt | grep -v md5sums_of_files.txt | sort > newmd5sums_sorted.txt
cat ../FILES/md5sums_of_files.txt | sort > oldmd5sums_sorted.txt
echo
echo "Before tarring the md5sums of files were :"
echo
cat oldmd5sums_sorted.txt
echo
echo
echo "After tarring the md5sums of files are :"
echo
cat newmd5sums_sorted.txt
echo
cat oldmd5sums_sorted.txt | sed 's/\s.*//' > col1_oldmd5sums_sorted.txt
cat newmd5sums_sorted.txt | sed 's/\s.*//' > col1_newmd5sums_sorted.txt
isAnythingDifferent=$( diff col1_oldmd5sums_sorted.txt col1_newmd5sums_sorted.txt | grep -c "" )
if [ "${isAnythingDifferent}" -ne 0 ]; then
printThis="Tar-ball (FILES.tar) got corrupted during packing !"
printToLogFile
printThis="DO NOT USE THIS TAR-BALL ! - regenerate it with command : tar -cvf FILES.tar FILES "
printToLogFile
fi
cd ..
rm -rf testTarball
printThis="Done with the tar-ball integrity tests."
printToLogFile
fi
# ----------------------------------------
# Storing the parameter files :
# pwd >&2
cd ${weAreHere}
# pwd >&2
if [ ! -d USED_parameterFiles ]; then
mkdir USED_parameterFiles
fi
if [ ! -d USED_parameterFiles/Run_at_${timestamp} ]; then
mkdir USED_parameterFiles/Run_at_${timestamp}
fi
if [ -e GEO_fastqPaths.txt ]; then
cp GEO_fastqPaths.txt USED_parameterFiles/Run_at_${timestamp}/.
fi
if [ -e GEO_processedFilePaths.txt ]; then
cp GEO_processedFilePaths.txt USED_parameterFiles/Run_at_${timestamp}/.
fi
if [ -e GEO_fastqFileType.txt ]; then
cp GEO_fastqFileType.txt USED_parameterFiles/Run_at_${timestamp}/.
fi
# ----------------------------------------
# All done !
timepoint=$( date )
echo
echo "run finished : ${timepoint}"
echo
exit 0