-
Notifications
You must be signed in to change notification settings - Fork 1
/
viya_perf_tool.sh
executable file
·2197 lines (2078 loc) · 81.4 KB
/
viya_perf_tool.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
#
# Copyright © 2021, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
#
# ============================
# Viya Perf Tool
# Name: viya_perf_tool.sh
# Author: Jim Kuell, SAS <support@sas.com>
# Description: Test network and/or storage IO performance of Viya 3.5 hosts running RHEL or CentOS 7.x.
# Required Files: viya_perf_tool.conf
# ============================
#
# USAGE
# ./viya_perf_tool.sh (parameter)
# -y (optional) Auto accept config file values and immediately start running the tool.
# -h, --help Show usage info.
# -v, --version Show version info.
#
# ====================================================================
# INITIAL BASH CHECKS
# ====================================================================
if [ -z "${BASH_VERSINFO}" ] || [ -z "${BASH_VERSINFO[0]}" ]; then
echo
echo "ERROR: This script must be run with bash (v4+). Try running:"
echo " 'bash $0'"
echo
exit 125
elif ((${BASH_VERSINFO[0]}<4)); then
echo
echo "ERROR: Unsupported bash version detected. Bash v4+ required."
echo
exit 125
fi
set -o pipefail
# ====================================================================
# VARIABLES
# ====================================================================
PID=$$
GLOBAL_START_TIME="$(date)"
GLOBAL_SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
GLOBAL_SCRIPT_NAME="$(basename "$(readlink -f "$0")")"
GLOBAL_SCRIPT_VERSION="4.0.1"
GLOBAL_SCRIPT_BUILD_ID="20220201401"
GLOBAL_CONFIG_NAME="viya_perf_tool.conf"
GLOBAL_CONFIG_VERSION="1.0.0"
GLOBAL_CONFIG_FULL="${GLOBAL_SCRIPT_DIR}/${GLOBAL_CONFIG_NAME}"
GLOBAL_SH_FHOST="$(hostname -f | tr -d '\040\011\012\015')"
GLOBAL_SH_SHOST="$(hostname -s | tr -d '\040\011\012\015')"
if [[ -z "${GLOBAL_EPOCH_TIME}" ]]; then
GLOBAL_EPOCH_TIME="$(date +%s)"
fi
if [[ -z "${GLOBAL_SH_WORKDIR}" ]]; then
GLOBAL_SH_WORKDIR="/tmp/viya_perf_tool_${GLOBAL_EPOCH_TIME}_tmpdir"
fi
if [[ -z "${GLOBAL_LOG_FILE}" ]]; then
GLOBAL_LOG_FILE="${GLOBAL_SCRIPT_DIR}/viya_perf_tool_${GLOBAL_EPOCH_TIME}.log"
fi
if [[ -z "${GLOBAL_TEST_MODE}" ]]; then
if [[ -f "${GLOBAL_SCRIPT_DIR}/.test" ]]; then
echo "************TEST MODE*************"
GLOBAL_TEST_MODE=1
else
GLOBAL_TEST_MODE=0
fi
fi
OLDIFS="${IFS}"
IFS=':'
readonly PID GLOBAL_START_TIME GLOBAL_SCRIPT_DIR GLOBAL_SCRIPT_NAME GLOBAL_SCRIPT_VERSION GLOBAL_SCRIPT_BUILD_ID GLOBAL_CONFIG_NAME GLOBAL_CONFIG_VERSION GLOBAL_CONFIG_FULL GLOBAL_SH_FHOST GLOBAL_SH_SHOST GLOBAL_EPOCH_TIME GLOBAL_SH_WORKDIR GLOBAL_TEST_MODE OLDIFS IFS
auto_cont=0
full_trap=0
test_count=0
skip_iotest=0
exit_flag=0
clean_work=0
skip_msg=0
curr_test=""
declare -A short_hosts
declare -A test_errors
declare -A skipped_tests
# ====================================================================
# FUNCTIONS
# ====================================================================
#####
# Print version info and exit.
# Parameters:
# None
#####
show_version() {
echo
echo "Viya Perf Tool"
echo "${GLOBAL_SCRIPT_NAME}"
echo "Version: ${GLOBAL_SCRIPT_VERSION}"
echo "Build: ${GLOBAL_SCRIPT_BUILD_ID}"
echo "Copyright (c) 2020 SAS Institute Inc."
echo "Unpublished - All Rights Reserved."
echo
exit 0
}
#####
# Print usage info and exit.
# Parameters:
# None
#####
show_usage() {
echo
echo "<<USAGE>>"
echo " ${GLOBAL_SCRIPT_NAME} (parameter)"
echo
echo " Optional parameters:"
echo " -y Auto accept config file values and immediately start running the tool."
echo " -h, --help Show usage info."
echo " -v, --version Show version info."
echo
echo " Function: Test network and/or storage IO performance of Viya 3.5 hosts."
echo " All additional options are read from the config file: [${GLOBAL_CONFIG_FULL}]."
echo " Results and system info will be output to log file [e.g. viya_perf_tool_1616433582.log] unless OUTPUT_TO_FILE=N."
echo
exit 0
}
#####
# Wrapper for calling trap functions - catch and store what signal is being trapped.
# Parameters:
# $1 - Function to call when trap is triggered
#####
trap_with_arg() {
local func_name="$1"; shift
for signal in "$@"; do
trap "${func_name} ${signal}" "${signal}"
done
}
#####
# Trap function for main host - kill and cleanup all child processes and print current test status to the log.
# Parameters:
# $1 - Signal that was caught and stored by trap_with_arg()
#####
main_trap() {
trap - EXIT
trap "" SIGINT SIGTERM SIGHUP
local signal="$1"
skip_msg=1
if [[ "${OUTPUT_TO_FILE}" == "y" ]]; then
echo ""
echo "Interrupt signal [${signal}] caught. Beginning clean up..."
echo "To kill this script immediately, issue the command 'kill -9 ${PID}'. NOTE: doing this may result in stray files and processes."
echo ""
fi
echo_out ""
echo_out "*** Interrupt signal [${signal}] caught. Beginning clean up... ***"
echo_out "*** To kill this script immediately, issue the command 'kill -9 ${PID}'. NOTE: doing this may result in stray files and processes. ***"
echo_out ""
kill -s SIGTERM 0
wait
if [[ "${full_trap}" -eq 1 ]]; then
if [[ "${env_type}" == "mpp" ]]; then
# print all test output that's been gathered so far to the log
if [[ -z "${GLOBAL_REMEXEC_HOST}" && ! -z "${test_type}" && "${PARALLEL_IO_TESTS}" == "y" ]]; then
echo_out "-----------------------------"
echo_out "Begin printing current test output"
echo_out "-----------------------------"
# forces echo_out to print to console and log
unset curr_test
for host in ${HOSTS}; do
echo_out "---------------------------"
echo_out "Begin current test output for [${host}]"
echo_out "---------------------------"
if [[ "${OUTPUT_TO_FILE}" == "y" ]]; then
cat "${GLOBAL_SH_WORKDIR}/${test_type}.parallel.${host}.${GLOBAL_EPOCH_TIME}.out" >> "${GLOBAL_LOG_FILE}"
else
cat "${GLOBAL_SH_WORKDIR}/${test_type}.parallel.${host}.${GLOBAL_EPOCH_TIME}.out"
fi
echo_out "---------------------------"
echo_out "End current test output for [${host}]"
echo_out "---------------------------"
done
echo_out "-----------------------------"
echo_out "End printing current test output"
echo_out "-----------------------------"
fi
# wait for remote processes to finish cleaning up and exit
run_ssh "while pgrep -u \$(whoami) -fx \"/bin/bash ./${GLOBAL_SCRIPT_NAME} -${GLOBAL_EPOCH_TIME}\" >/dev/null; do sleep 1; done"
# for smp tests, test_type is only set when iotests are run
elif [[ ! -z "${test_type}" ]]; then
clean_up_data
fi
clean_work=1
clean_up_rem_work
fi
echo_out "Interrupt signal [${signal}] trap and cleanup complete. Exiting..."
exit 42
}
#####
# Trap function for remote hosts - kill and cleanup all child processes.
# Parameters:
# $1 - Signal that was caught and stored by trap_with_arg()
#####
remote_trap() {
trap - EXIT
trap "" SIGINT SIGTERM SIGHUP
local signal="$1"
echo_out "Remote interrupt signal [${signal}] caught. Beginning clean up..."
kill -s SIGTERM 0
wait
clean_up_data
if [[ "${GLOBAL_SH_SHOST}" != "${GLOBAL_REMEXEC_HOST}" ]]; then
clean_up_work
fi
exit 42
}
#####
# Message and Error Handling.
# Parameters:
# $1 - Code of message to be handled
# $2 - Message value 1
# $3 - Message value 2
#####
message_handle() {
wrapper_rc="$1"
local message_val_1="$2"
local message_val_2="$3"
if [[ "${wrapper_rc}" -eq 1 ]]; then
echo
echo_out "ERROR - Unable to find config file [${GLOBAL_CONFIG_FULL}]." "1"
echo
exit "${wrapper_rc}"
elif [[ "${wrapper_rc}" -eq 2 ]]; then
echo
echo_out "ERROR - Unable to parse config file [${GLOBAL_CONFIG_FULL}]. Possible syntax issue in the file." "1"
echo
exit "${wrapper_rc}"
elif [[ "${wrapper_rc}" -eq 3 ]]; then
echo
if [[ ! -z "${CONFIG_VERSION}" ]]; then
echo_out "ERROR - Invalid config file version detected [${CONFIG_VERSION}]. Expecting [${GLOBAL_CONFIG_VERSION}]. Update config file to the latest version and rerun." "1"
else
echo_out "ERROR - Unable to find config file version. Update config file to the latest version and rerun." "1"
fi
echo
exit "${wrapper_rc}"
elif [[ "${wrapper_rc}" -eq 4 ]]; then
echo
echo_out "ERROR - Unable to access /etc/os-release. Supported operating systems: RHEL 7 and CentOS 7." "1"
echo
exit "${wrapper_rc}"
elif [[ "${wrapper_rc}" -eq 5 ]]; then
echo
echo_out "ERROR - Operating system not supported. Supported operating systems: RHEL 7 and CentOS 7." "1"
echo
exit "${wrapper_rc}"
elif [[ "${wrapper_rc}" -eq 6 ]]; then
echo_out "ERROR - Unable to passwordless SSH to [${message_val_1}]." "1"
elif [[ "${wrapper_rc}" -eq 7 ]]; then
exit_flag=1
echo_out "ERROR - ${message_val_1} failed for ${message_val_2} host(s). See list of failed hosts above and correct!" "1"
echo
clean_up_data
clean_up_rem_work
elif [[ "${wrapper_rc}" -eq 8 ]]; then
(( config_error_catch+=1 ))
echo_out "ERROR - Duplicate host found in HOSTS list: [${message_val_1}]." "1"
elif [[ "${wrapper_rc}" -eq 12 ]]; then
echo_out "ERROR - Unable to create directory [${message_val_1}] on [${message_val_2}]. Check permissions and try again!" "1"
if [[ "${skip_iotest}" -eq 0 ]]; then
exit_flag=1
clean_up_data
fi
elif [[ "${wrapper_rc}" -eq 14 ]]; then
echo_out "ERROR - Failed to start iperf listener on [${message_val_1}] as part of ${curr_test}." "1"
elif [[ "${wrapper_rc}" -eq 15 ]]; then
echo_out "ERROR - Failed to start iperf sender on [${message_val_1}] connecting to [${message_val_2}] as part of ${curr_test}." "1"
elif [[ "${wrapper_rc}" -eq 19 ]]; then
echo_out "ERROR - Command [${message_val_1}] not found on host [${message_val_2}]." "1"
elif [[ "${wrapper_rc}" -eq 20 ]]; then
exit_flag=1
clean_work=1
echo_out "ERROR - Command check failed for hosts above. Install missing commands or update Bash PATH and rerun!" "1"
for host in "${!host_paths[@]}"; do
host_path="$(echo "${host_paths[${host}]}" | tr -d '\040\011\012\015')"
echo_out "INFO - Host [${host}]: PATH=[${host_path}]."
done
echo
clean_up_data
clean_up_rem_work
elif [[ "${wrapper_rc}" -eq 21 ]]; then
echo_out "ERROR - Cannot pass environment variables to remote host [${message_val_1}]. Check SSH server environment options for ${message_val_1}." "1"
elif [[ "${wrapper_rc}" -eq 22 ]]; then
echo_out "ERROR - Cannot execute [${message_val_1}] on [${message_val_2}]. Check SSH between [${GLOBAL_SH_SHOST}] and [${message_val_2}]." "1"
elif [[ "${wrapper_rc}" -eq 23 ]]; then
echo_out "ERROR - Ambiguous value for CLEANUP in config file. Directories will need to be cleaned up manually." "1"
elif [[ "${wrapper_rc}" -eq 24 ]]; then
clean_work=1
echo_out "ERROR - Unable to SCP copy [${message_val_1}] to [${message_val_2}]." "1"
elif [[ "${wrapper_rc}" -eq 25 ]]; then
echo_out "ERROR - Unable to gather system info for host [${message_val_1}]." "1"
elif [[ "${wrapper_rc}" -eq 26 ]]; then
echo_out "ERROR - Directory [${message_val_1}] does not exist on [${message_val_2}]." "1"
elif [[ "${wrapper_rc}" -eq 27 ]]; then
exit_flag=1
echo_out "ERROR - Unable to execute dd. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 28 ]]; then
exit_flag=1
echo_out "ERROR - Aborting current test due to errors. ${message_val_1} will be skipped for this host." "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 29 ]]; then
exit_flag=1
echo_out "ERROR - No physical cores found in /proc/cpuinfo. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 30 ]]; then
exit_flag=1
echo_out "ERROR - Unable to access [${message_val_1}]. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 31 ]]; then
echo_out "WARNING - Cannot find units for XFS preallocation size. Calculating max possible size." "0"
elif [[ "${wrapper_rc}" -eq 32 ]]; then
echo_out "WARNING - Unable to calculate XFS preallocation buffer. Skipping calculation." "0"
elif [[ "${wrapper_rc}" -eq 33 ]]; then
exit_flag=1
echo_out "ERROR - Unable to find file system type for [${message_val_1}]. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 34 ]]; then
exit_flag=1
echo_out "ERROR - Unable to find mount point for [${message_val_1}]. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 35 ]]; then
echo_out "WARNING - More than two sockets detected. CPU performance may be affected by NUMA." "0"
elif [[ "${wrapper_rc}" -eq 36 ]]; then
exit_flag=1
echo_out "ERROR - Unable to calculate iteration count. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 37 ]]; then
echo_out "WARNING - Insufficient free space in [${message_val_1}] for FULL test. Smaller file sizes will be used." "0"
elif [[ "${wrapper_rc}" -eq 38 ]]; then
exit_flag=1
echo_out "ERROR - [Available space - 10% total size buffer] is less than 1 KB in [${message_val_1}]. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 39 ]]; then
exit_flag=1
echo_out "ERROR - Unable to create readhold file [${message_val_1}]. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 40 ]]; then
exit_flag=1
echo_out "ERROR - Block count not calculated correctly. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 41 ]]; then
exit_flag=1
echo_out "ERROR - Block size not calculated correctly. Aborting test!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 42 ]]; then
exit_flag=1
echo_out "ERROR - Unable to verify size of output files. File size missing for iteration ${message_val_1} or ${message_val_2}. Aborting test!" "1"
failed_write_ls
clean_up_data
elif [[ "${wrapper_rc}" -eq 43 ]]; then
exit_flag=1
echo_out "ERROR - Target filesystem [${message_val_1}] does not have an adequate amount of free disk space to create test files. Aborting test!" "1"
failed_write_ls
clean_up_data
elif [[ "${wrapper_rc}" -eq 44 ]]; then
echo_out "WARNING - Unable to remove [${message_val_1}]. This may need to be cleaned up manually!" "1"
elif [[ "${wrapper_rc}" -eq 45 ]]; then
(( config_error_catch+=1 ))
echo_out "ERROR - [${message_val_1}=${message_val_2}] is invalid." "1"
elif [[ "${wrapper_rc}" -eq 46 ]]; then
(( config_error_catch+=1 ))
echo_out "ERROR - [${message_val_1}] is empty in config file. [${message_val_1}] requires a valid value." "1"
elif [[ "${wrapper_rc}" -eq 47 ]]; then
exit_flag=1
echo_out "ERROR - Error(s) encountered parsing config file [${GLOBAL_CONFIG_FULL}]. See list of errors above and correct!" "1"
echo
elif [[ "${wrapper_rc}" -eq 48 ]]; then
exit_flag=1
echo_out "ERROR - Flushing test files from cache failed. Unable to ${message_val_1} in [${message_val_2}]." "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 49 ]]; then
exit_flag=1
echo_out "ERROR - Write test failed for [${message_val_1}] in iteration [${message_val_2}]. Check file system!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 50 ]]; then
exit_flag=1
echo_out "ERROR - Read test failed for [${message_val_1}] in iteration [${message_val_2}]. Check file system!" "1"
clean_up_data
elif [[ "${wrapper_rc}" -eq 51 ]]; then
echo_out "ERROR - Required bash shell unavailable on host [${message_val_1}]. Bash v4+ required." "1"
elif [[ "${wrapper_rc}" -eq 52 ]]; then
echo_out "ERROR - Unsupported bash version detected on host [${message_val_1}]. Bash v4+ required." "1"
elif [[ "${wrapper_rc}" -eq 53 ]]; then
echo_out "ERROR - Unable to validate bash shell on host [${message_val_1}]. Bash v4+ required." "1"
elif [[ "${wrapper_rc}" -eq 54 ]]; then
echo_out "ERROR - Unable to ssh to host [${message_val_1}] to ${message_val_2}. Manual cleanup may be needed." "1"
clean_up_iotests
elif [[ "${wrapper_rc}" -eq 55 ]]; then
echo_out "ERROR - Unable to find ${GLOBAL_SCRIPT_NAME} process on [${message_val_1}]. This is likely due to an error encountered during remote testing." "1"
clean_up_iotests
elif [[ "${wrapper_rc}" -eq 56 ]]; then
echo_out "ERROR - ${message_val_2} host [${message_val_1}]." "1"
clean_up_iotests
else
exit_flag=1
wrapper_rc=256
echo
echo_out "ERROR - Ambiguous error received. Contact SAS Support" "1"
echo
clean_up_data
fi
if [[ "${exit_flag}" -eq 1 ]]; then
if [[ ! -z "${GLOBAL_REMEXEC_HOST}" && "${GLOBAL_SH_SHOST}" != "${GLOBAL_REMEXEC_HOST}" ]]; then
clean_up_work
fi
if [[ ! -z "${curr_test}" ]]; then
echo_out "-------------------------"
echo_out "End ${curr_test} for [${GLOBAL_SH_SHOST}]"
echo_out "-------------------------"
if [[ "${env_type}" = "smp" ]]; then
test_errors["${GLOBAL_SH_SHOST}"]="${curr_test}"
curr_test="DATA IO Test"
fi
fi
print_test_errors
trap - EXIT
exit "${wrapper_rc}"
fi
}
#####
# Check for non-zero exit code or output to stderr
# - initially added as workaround for a bug where iperf exits with 0 but still outputs to stderr.
# Parameters:
# $@ - All output of given command
#####
fail_if_stderr() {
local rc err
rc=$({
("$@" 2>&1 >&3 3>&- 4>&-; echo "$?" >&4) |
grep '^' >&2 3>&- 4>&-
} 4>&1)
err="$?"
[ "${rc}" -eq 0 ] || exit "${rc}"
[ "${err}" -ne 0 ] || exit 125
} 3>&1
#####
# Echo message to correct output destination.
# Parameters:
# $1 - Message to echo out
# $2 - Action to take with the message
#####
echo_out() {
local message_out="$1"
local message_action="$2"
local message_header=""
local timestamp
if [[ "${message_action}" != "2" ]]; then
timestamp="$(date -u +%FT%T.%3NZ)"
message_header="[${GLOBAL_SH_SHOST}][${timestamp}]: "
fi
if [[ "${OUTPUT_TO_FILE}" == "y" ]]; then
if [[ -z "${GLOBAL_REMEXEC_HOST}" ]]; then
echo "${message_header}${message_out}" >> "${GLOBAL_LOG_FILE}"
if [[ "${message_action}" == "1" && -z "${curr_test}" ]]; then
echo "${message_header}${message_out}"
fi
elif [[ ! -z "${GLOBAL_REMEXEC_HOST}" ]]; then
echo "${message_header}${message_out}"
fi
else
echo "${message_header}${message_out}"
fi
}
#####
# Clean up IO tests on remote systems if an error is detected.
# Parameters:
# None
#####
clean_up_iotests() {
if [[ "${stop_tests}" -eq 1 ]]; then
exit_flag=1
echo_out "INFO - Aborting all tests!" "1"
# check exit codes of nodes that have already finished
for ssh_host in "${!ssh_status[@]}"; do
if [[ -z "${skipped_tests[${ssh_host}]}" && -z "${test_errors[${ssh_host}]}" ]] && ! kill -0 "${ssh_status[${ssh_host}]}" >/dev/null 2>&1; then
pid_wait "${ssh_status[${ssh_host}]}"
fi
done
trap - EXIT
trap "" SIGINT SIGTERM SIGHUP
kill -s SIGTERM 0
wait
trap_with_arg 'main_trap' SIGINT SIGTERM SIGHUP EXIT
echo_out "-----------------------------"
echo_out "Begin printing current test output"
echo_out "-----------------------------"
for host in ${HOSTS}; do
echo_out "---------------------------"
echo_out "Begin current test output for [${host}]"
echo_out "---------------------------"
if [[ "${OUTPUT_TO_FILE}" = "y" ]]; then
cat "${GLOBAL_SH_WORKDIR}/${test_type}.parallel.${host}.${GLOBAL_EPOCH_TIME}.out" >> "${GLOBAL_LOG_FILE}"
else
cat "${GLOBAL_SH_WORKDIR}/${test_type}.parallel.${host}.${GLOBAL_EPOCH_TIME}.out"
fi
echo_out "---------------------------"
echo_out "End current test output for [${host}]"
echo_out "---------------------------"
done
echo_out "-----------------------------"
echo_out "End printing current test output"
echo_out "-----------------------------"
run_ssh "while pgrep -u \$(whoami) -fx \"/bin/bash ./${GLOBAL_SCRIPT_NAME} -${GLOBAL_EPOCH_TIME}\" >/dev/null; do sleep 1; done"
clean_up_data
fi
}
#####
# Clean up local work dir.
# Parameters:
# None
#####
clean_up_work() {
if [[ "${CLEANUP}" == "y" ]]; then
if [[ -d "${GLOBAL_SH_WORKDIR}" ]]; then
rm -rf "${GLOBAL_SH_WORKDIR}"
fi
if [[ -d "${GLOBAL_SH_WORKDIR}" ]]; then
message_handle 44 "${GLOBAL_SH_WORKDIR}"
elif [[ -z "${GLOBAL_REMEXEC_HOST}" ]]; then
echo_out "The working directory [${GLOBAL_SH_WORKDIR}] has been removed from host [${GLOBAL_SH_SHOST}]."
fi
elif [[ "${CLEANUP}" == "n" ]]; then
echo_out "CLEANUP=N. The working directory [${GLOBAL_SH_WORKDIR}] was not removed from host [${GLOBAL_SH_SHOST}]."
else
message_handle 23
fi
}
#####
# Clean up work dir on all hosts.
# Parameters:
# None
#####
clean_up_rem_work() {
if [[ "${clean_work}" -eq 1 ]]; then
if [[ "${CLEANUP}" == "y" ]]; then
clean_up_work
if [[ "${env_type}" == "mpp" && ( "${IO_TESTS_CDC}" == "y" || "${IO_TESTS_DATA}" == "y" ) ]]; then
local ssh_host clean_flag ssh_ret_code
for ssh_host in ${rem_host_list}; do
clean_flag=0
# 0 -> dir doesn't exist - ignore (likely cleaned up via another method)
# 1 -> dir exists and was cleaned up
# 2 -> dir exists and cannot be cleaned up
clean_flag="$(fail_if_stderr ssh -q -n -o StrictHostKeyChecking=no "${ssh_host}" "bash -c 'export GLOBAL_SH_WORKDIR=${GLOBAL_SH_WORKDIR}; if [[ -d \"\${GLOBAL_SH_WORKDIR}\" ]]; then rm -rf \"\${GLOBAL_SH_WORKDIR}\"; if [[ -d \"\${GLOBAL_SH_WORKDIR}\" ]]; then echo 2; exit; fi; fi; echo 0'")"
ssh_ret_code="$?"
clean_flag="$(echo "${clean_flag}" | tr -d '\040\011\012\015')"
if [[ "${ssh_ret_code}" -gt 0 ]]; then
echo_out "WARNING - Unable to ssh to host [${ssh_host}] to verify working directory [${GLOBAL_SH_WORKDIR}] was removed. Manual cleanup may be needed!"
elif [[ "${clean_flag}" -eq 0 ]]; then
echo_out "The working directory [${GLOBAL_SH_WORKDIR}] has been removed from host [${ssh_host}]."
else
echo_out "WARNING - Unable to remove the working directory [${GLOBAL_SH_WORKDIR}] from host [${ssh_host}]. This may need to be cleaned up manually!"
fi
done
fi
else
echo_out "CLEANUP=N. The working directory [${GLOBAL_SH_WORKDIR}] was not removed from remote hosts."
fi
fi
}
#####
# Clean up IO test data dir.
# Parameters:
# None
#####
clean_up_data() {
if [[ "${CLEANUP}" == "y" ]]; then
if [[ -d "${target_dir}" ]]; then
rm -rf "${target_dir}"
if [[ "$?" -gt 0 ]]; then
message_handle 44 "${target_dir}"
else
echo_out "The target directory [${target_dir}] has been removed."
fi
elif [[ ! -z "${target_dir}" ]]; then
echo_out "WARNING - The target directory [${target_dir}] was not removed. Directory not found."
fi
elif [[ "${CLEANUP}" == "n" ]]; then
if [[ -d "${target_dir}" && ! -z "${target_dir}" ]]; then
echo_out "CLEANUP=N. The target directory [${target_dir}] was not removed."
fi
else
message_handle 23
fi
}
#####
# Print any test errors that have been stored.
# Parameters:
# None
#####
print_test_errors() {
if [[ "${#test_errors[@]}" -gt 0 && "${skip_msg}" -eq 0 ]]; then
if [[ ! -z "${curr_test}" ]]; then
echo_out "-----------------------------"
echo_out "End ${curr_test}s"
echo_out "-----------------------------"
fi
clean_work=1
clean_up_rem_work
print_skipped_tests
if [[ "${OUTPUT_TO_FILE}" == "n" ]]; then
GLOBAL_LOG_FILE=""
fi
if [[ "${#skipped_tests[@]}" -eq 0 ]]; then
echo "" | tee -a "${GLOBAL_LOG_FILE}"
fi
echo "Exiting due to errors during the following tests. Check the output log for more details."
echo_out "Exiting due to errors during the following tests:" "2"
echo "********* ERRORS *********" | tee -a "${GLOBAL_LOG_FILE}"
for host in "${!test_errors[@]}"; do
echo " ${test_errors[${host}]} - ${host}" | tee -a "${GLOBAL_LOG_FILE}"
done
echo "**************************" | tee -a "${GLOBAL_LOG_FILE}"
echo ""
trap - EXIT
trap "" SIGINT SIGTERM SIGHUP
kill -s SIGTERM 0
wait
[[ -z "${wrapper_rc}" ]] && wrapper_rc=1
exit "${wrapper_rc}"
fi
}
#####
# Print any tests that were skipped.
# Parameters:
# None
#####
print_skipped_tests() {
if [[ "${#skipped_tests[@]}" -gt 0 ]]; then
if [[ "${OUTPUT_TO_FILE}" == "n" ]]; then
GLOBAL_LOG_FILE=""
fi
echo "" | tee -a "${GLOBAL_LOG_FILE}"
echo "The following tests were skipped due to errors. Check the output log for more details."
echo_out "The following tests were skipped due to errors:" "2"
echo "***** SKIPPED TESTS ******" | tee -a "${GLOBAL_LOG_FILE}"
echo "**************************" | tee -a "${GLOBAL_LOG_FILE}"
for host in "${!skipped_tests[@]}"; do
echo " ${skipped_tests[${host}]} - ${host}" | tee -a "${GLOBAL_LOG_FILE}"
done
echo "**************************" | tee -a "${GLOBAL_LOG_FILE}"
echo "" | tee -a "${GLOBAL_LOG_FILE}"
fi
}
#####
# Run calculation and output as integer with thousands separator (if LC_NUMERIC="en_US.UTF-8").
# Parameters:
# $@ - Calculation to be run
#####
print_calc_int() {
awk "BEGIN{printf \"%'d\", $@}"
}
#####
# Run calculation and output as a fp with 2 decimals and a thousands separator (if LC_NUMERIC="en_US.UTF-8").
# Parameters:
# $@ - Calculation to be run
#####
print_calc_2d() {
awk "BEGIN{printf \"%'.2f\", $@}"
}
#####
# Check if given var is an integer.
# Parameters:
# $1 - Var to check
#####
check_int() {
local -i num="$((10#${1}))"
echo "${num}"
}
#####
# Check if network port is in the valid port range.
# Parameters:
# $1 - Network port
#####
port_ok() {
local port="$1"
local -i port_num
port_num="$(check_int "${port}" 2>/dev/null)"
if [[ "${port_num}" -lt 1 || "${port_num}" -gt 65535 ]]; then
message_handle 45 "NETWORK_LISTEN_PORT" "${NETWORK_LISTEN_PORT}"
fi
}
#####
# Check OS and version.
# Parameters:
# None
#####
check_os() {
if [[ -r "/etc/os-release" ]]; then
local os_version os_name
os_version="$(grep -oP '(?<=^REDHAT_SUPPORT_PRODUCT_VERSION=).+' /etc/os-release | head -1 | tr -d '"' | awk '{ print substr($1,0,1)}')"
os_name="$(grep -oP '(?<=^REDHAT_SUPPORT_PRODUCT=).+' /etc/os-release | head -1 | tr -d '"' | tr '[:upper:]' '[:lower:]')"
if [[ "${os_version}" -ne 7 || ! ( "${os_name}" =~ "centos" || "${os_name}" =~ "red hat enterprise linux" ) ]]; then
message_handle 5
fi
else
message_handle 4
fi
}
#####
# Check if required commands are available.
# Parameters:
# None
#####
check_cmds() {
echo_out "Checking required commands on all hosts"
local cmd_error_catch=0
declare -A host_paths
if [[ "${env_type}" == "mpp" ]]; then
local ssh_host
check_errs=()
for ssh_host in ${HOSTS}; do
host_paths["${ssh_host}"]="$(fail_if_stderr ssh -q -n -o StrictHostKeyChecking=no "${ssh_host}" "bash -c 'echo \"\${PATH}\"'")"
if [[ "${IO_TESTS_DATA}" == "y" || "${IO_TESTS_CDC}" == "y" ]]; then
mapfile -t check_errs < <(fail_if_stderr ssh -q -n -o StrictHostKeyChecking=no "${ssh_host}" "bash -c 'cmds=(awk bc cat cp cut date dd df dirname egrep grep hostname mkdir mount readlink rm sed sort sync tail tar tee touch uname uniq wc /usr/bin/time); for cmd in \"\${cmds[@]}\"; do if ! cmd_type=\"\$(type -p \${cmd})\" || [ -z \"\${cmd_type}\" ]; then echo \"\${cmd}\"; fi; done'")
if [[ ! -z "${check_errs}" ]]; then
local check_err
for check_err in "${check_errs[@]}"; do
check_err="$(echo "${check_err}" | tr -d '\r')"
message_handle 19 "${check_err}" "${ssh_host}"
done
cmd_error_catch=1
fi
unset check_errs
fi
if [[ "${NETWORK_TESTS}" == "y" ]]; then
local iperf_pattern='iperf version 2'
local iperf_stat
iperf_stat="$(fail_if_stderr ssh -q -n -o StrictHostKeyChecking=no "${ssh_host}" "bash -c 'iperf -version'" 2>&1)"
if [[ ! "${iperf_stat}" =~ "${iperf_pattern}" ]]; then
message_handle 19 "${iperf_pattern}" "${ssh_host}"
cmd_error_catch=1
fi
fi
done
else
cmds=(awk bc cat cp cut date dd df dirname egrep grep hostname mkdir mount readlink rm sed sort sync tail tar tee touch uname uniq wc /usr/bin/time)
local cmd
for cmd in "${cmds[@]}"; do
if ! cmd_type="$(type -p ${cmd})" || [ -z "${cmd_type}" ]; then
message_handle 19 "${cmd}" "${GLOBAL_SH_SHOST}"
host_paths["${GLOBAL_SH_SHOST}"]="$(echo "${PATH}")"
cmd_error_catch=1
fi
done
fi
if [[ "${cmd_error_catch}" -gt 0 ]]; then
message_handle 20
fi
}
#####
# Validate passwordless ssh and check for duplicate hosts in host list.
# Parameters:
# None
#####
check_ssh() {
echo_out "Validating host list and passwordless SSH for user [${USER}] on all hosts"
local ssh_error_catch=0
local exec_cmd ssh_host ssh_output_string ssh_ret_code ssh_check host_check
exec_cmd="export GLOBAL_REMEXEC_HOST=${GLOBAL_SH_SHOST}; echo \"\${GLOBAL_REMEXEC_HOST},\$(hostname -s)\""
neat_rem_host_list="${HOSTS}"
short_host_list=""
dup_hosts=()
ssh_output=()
for ssh_host in ${HOSTS}; do
ssh_output_string="$(fail_if_stderr ssh -q -n -o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10 "${ssh_host}" "${exec_cmd}" 2>&1)"
ssh_ret_code="$?"
ssh_output=( $( echo "${ssh_output_string}" | tr -s ',' ':' | tr -d '\040\011\012\015' ) )
if [[ "${ssh_ret_code}" -gt 0 ]]; then
message_handle 6 "${ssh_host}"
(( ssh_error_catch+=1 ))
else
ssh_check="$(echo "${ssh_output[0]}")"
host_check="$(echo "${ssh_output[1]}")"
if [[ "${ssh_check}" != "${GLOBAL_SH_SHOST}" ]]; then
message_handle 21 "${ssh_host}"
(( ssh_error_catch+=1 ))
elif [[ "${short_hosts[@]}" =~ "${host_check}" && ! "${dup_hosts[@]}" =~ "${host_check}" ]]; then
dup_hosts+=("${host_check}")
message_handle 8 "${ssh_host}"
(( ssh_error_catch+=1 ))
else
short_hosts["${ssh_host}"]="${host_check}"
if [[ -z "${short_host_list}" ]]; then
short_host_list+="[${ssh_host}: ${host_check}]"
else
short_host_list+=" [${ssh_host}: ${host_check}]"
fi
if [[ "${host_check}" == "${GLOBAL_SH_SHOST}" ]]; then
neat_rem_host_list="$(echo "${neat_rem_host_list//$ssh_host}" | tr -s ':' ' ' | xargs)"
fi
fi
fi
unset ssh_output
unset ssh_check
unset host_check
done
if [[ "${ssh_error_catch}" -gt 0 ]]; then
message_handle 7 "Host list/passwordless SSH validation" "${ssh_error_catch}"
fi
echo_out "Host list and passwordless SSH validation completed successfully!"
rem_host_list="$(echo "${neat_rem_host_list}" | tr -s ' ' ':' | xargs)"
}
#####
# Check bash version on remote hosts.
# Parameters:
# None
#####
check_rmt_shell() {
echo_out "Checking for bash shell v4+ on all hosts"
local ssh_error_catch=0
local ssh_host shell_flag ssh_ret_code
for ssh_host in ${HOSTS}; do
shell_flag="$(fail_if_stderr ssh -q -n -o StrictHostKeyChecking=no "${ssh_host}" "bash -c 'if [ -z \"\${BASH_VERSINFO}\" ] || [ -z \"\${BASH_VERSINFO[0]}\" ]; then echo 1; elif ((\${BASH_VERSINFO[0]}<4)); then echo 2; else echo 3; fi'")"
ssh_ret_code="$?"
shell_flag="$(echo "${shell_flag}" | tr -d '\040\011\012\015')"
if [[ "${ssh_ret_code}" -gt 0 ]]; then
message_handle 22 "bash shell check" "${ssh_host}"
(( ssh_error_catch+=1 ))
elif [[ "${shell_flag}" -lt 3 ]]; then
(( ssh_error_catch+=1 ))
case "${shell_flag}" in
1) message_handle 51 "${ssh_host}" ;;
2) message_handle 52 "${ssh_host}" ;;
*) message_handle 53 "${ssh_host}" ;;
esac
fi
done
if [[ "${ssh_error_catch}" -gt 0 ]]; then
message_handle 7 "Remote bash shell check" "${ssh_error_catch}"
fi
echo_out "Bash shell check completed successfully!"
}
#####
# Parse config file and populate variables needed to execute tests based on user customization.
# Parameters:
# None
#####
parse_config() {
if [[ ! -r "${GLOBAL_CONFIG_FULL}" ]]; then
message_handle 1
fi
if [[ -z "${GLOBAL_REMEXEC_HOST}" ]]; then
echo "Using config file [${GLOBAL_CONFIG_FULL}]"
fi
# set default config values
declare -A config_vars
config_vars=(
[HOSTS]=""
[NETWORK_TESTS]="Y"
[NETWORK_LISTEN_PORT]="24975"
[PARALLEL_IO_TESTS]="N"
[IO_TESTS_CDC]="Y"
[CDC_DIR]=""
[SHARED_CDC_DIR]="N"
[IO_TESTS_DATA]="Y"
[DATA_DIR]=""
[SHARED_DATA_DIR]="N"
[CLEANUP]="Y"
[OUTPUT_TO_FILE]="Y"
[CONFIG_VERSION]=""
)
local line key value entry_regex
local config_regex_quotes="^[[:blank:]]*([[:alpha:]_][[:alnum:]_]*)[[:blank:]]*=[[:blank:]]*('[^']+'|\"[^\"]+\")[[:blank:]]*(#.*)*$"
local config_regex_loose="^[[:blank:]]*([[:alpha:]_][[:alnum:]_]*)[[:blank:]]*=[[:blank:]]*([^#]*[^#[:blank:]])*"
local config_output=()
local skip_network_tests=0
config_error_catch=0
# parse config file and overwrite default values
while read -r line; do
[[ ! -z "${line}" ]] || continue
[[ "${line}" =~ ${config_regex_quotes} ]] || [[ "${line}" =~ ${config_regex_loose} ]] || continue
key="${BASH_REMATCH[1]}"
[[ -z "${config_vars[${key}]+set}" ]] && continue
if [[ "${line}" =~ ${config_regex_quotes} ]]; then
value="${BASH_REMATCH[2]#[\'\"]}"
value="${value%[\'\"]}"
elif [[ "${line}" =~ ${config_regex_loose} ]]; then
value="${BASH_REMATCH[2]}"
fi
config_vars["${key}"]="${value}"
done < "${GLOBAL_CONFIG_FULL}"
if [[ "$?" -gt 0 ]]; then
message_handle 2
fi
for key in "${!config_vars[@]}"; do
value="${config_vars[${key}]}"
declare -g "${key}"="${value}"
done
HOSTS="$(echo "${HOSTS}" | tr -s ',; ' ':' | tr '[:upper:]' '[:lower:]')"
HOSTS="${HOSTS#:}"
NETWORK_TESTS="$(echo "${NETWORK_TESTS}" | tr '[:upper:]' '[:lower:]')"
IO_TESTS_DATA="$(echo "${IO_TESTS_DATA}" | tr '[:upper:]' '[:lower:]')"
SHARED_DATA_DIR="$(echo "${SHARED_DATA_DIR}" | tr '[:upper:]' '[:lower:]')"
IO_TESTS_CDC="$(echo "${IO_TESTS_CDC}" | tr '[:upper:]' '[:lower:]')"
SHARED_CDC_DIR="$(echo "${SHARED_CDC_DIR}" | tr '[:upper:]' '[:lower:]')"
PARALLEL_IO_TESTS="$(echo "${PARALLEL_IO_TESTS}" | tr '[:upper:]' '[:lower:]')"
OUTPUT_TO_FILE="$(echo "${OUTPUT_TO_FILE}" | tr '[:upper:]' '[:lower:]')"
CLEANUP="$(echo "${CLEANUP}" | tr '[:upper:]' '[:lower:]')"
if [[ -z "${GLOBAL_REMEXEC_HOST}" ]]; then
# start creating config output array - ignored if errs are found
config_output+=("[${GLOBAL_CONFIG_FULL}] validation successful")
echo_out "Using config file [${GLOBAL_CONFIG_FULL}]"
echo_out "Validating config file [${GLOBAL_CONFIG_FULL}]"
# validate config file version
if [[ -z "${CONFIG_VERSION}" || ! "${CONFIG_VERSION}" == "${GLOBAL_CONFIG_VERSION}" ]]; then
message_handle 3
fi
# make sure ips and hostnames are valid
if [[ ! -z "${HOSTS}" ]]; then
local host_error_catch=0
local ip_regex="^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$";
local hostname_regex="^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$";
local host
for host in ${HOSTS}; do
if [[ ! ( "${host}" =~ ${ip_regex} || "${host}" =~ ${hostname_regex} ) ]]; then
(( host_error_catch+=1 ))
fi
done
if [[ "${host_error_catch}" -gt 0 ]]; then
message_handle 45 "HOSTS" "${HOSTS}"
else
host_list="$(echo "${HOSTS}" | tr -s ':' ' ' | xargs)"
fi
else
message_handle 46 "${HOSTS}"
fi
# count number of hosts and start initial check for duplicates - addl checks in check_ssh()
num_hosts="$(echo "${HOSTS//:/ }" | wc -w)"
if [[ "${num_hosts}" -eq 0 ]]; then
message_handle 46 "HOSTS"
elif [[ "${num_hosts}" -gt 1 ]]; then
env_type="mpp"
dup_hosts=()
for host in ${HOSTS}; do
if [[ "$(echo "${HOSTS}" | grep -o "${host}" | wc -l)" -gt 1 && ! "${dup_hosts[@]}" =~ "${host}" ]]; then
dup_hosts+=("${host}")
message_handle 8 "${host}"
fi
done
else
env_type="smp"
HOSTS="${GLOBAL_SH_SHOST}"
host_list="${GLOBAL_SH_SHOST}"
fi
config_output+=("Hosts included in tests: ${host_list}")