diff --git a/.github/scripts/run_build.sh b/.github/scripts/run_build.sh index 74b6ac863..70f67c694 100755 --- a/.github/scripts/run_build.sh +++ b/.github/scripts/run_build.sh @@ -40,14 +40,52 @@ else adios=() fi +## HDF5 +if [ "${HDF5}" == "true" ]; then + echo + echo "enabling HDF5" + echo + hdf=(--with-hdf5 HDF5_INC="/usr/include/hdf5/openmpi/" HDF5_LIBS="-L/usr/lib/x86_64-linux-gnu/hdf5/openmpi") +else + hdf=() +fi + +## HIP +if [ "${HIP}" == "true" ]; then + echo + echo "enabling HIP" + echo + hip=(--with-hip HIPCC=g++ HIP_FLAGS="-O2 -g -std=c++17" HIP_PLATFORM=cpu HIP_INC=./external_libs/ROCm-HIP-CPU/include HIP_LIBS="-ltbb -lpthread -lstdc++") +else + hip=() +fi + +## special testflags +if [ "${TESTFLAGS}" == "check-mcmodel-medium" ]; then + # note: this is a work-around as using the 'env:' parameter in the workflow 'CI.yml' with TESTFLAGS: FLAGS_CHECK=".." + # won't work as the FLAGS_CHECK string will then get split up and ./configure .. complains about unknown parameters. + # here, we re-define TESTFLAGS with a single quote around FLAGS_CHECK=".." to avoid the splitting. + # use FLAGS_CHECK + flags=(FLAGS_CHECK="-O3 -mcmodel=medium -std=f2008 -Wall -Wno-do-subscript -Wno-conversion -Wno-maybe-uninitialized") + TESTFLAGS="" # reset +else + flags=() +fi + # configuration echo echo "configuration:" echo +# split TESTFLAGS into individual items +set -- ${TESTFLAGS} + ./configure \ -${adios[@]} \ -FC=gfortran MPIFC=mpif90 CC=gcc ${TESTFLAGS} +"${adios[@]}" \ +"${hdf[@]}" \ +"${hip[@]}" \ +"${flags[@]}" \ +FC=gfortran MPIFC=mpif90 CC=gcc "$@" # checks if [[ $? -ne 0 ]]; then echo "configuration failed:"; cat config.log; echo ""; echo "exiting..."; exit 1; fi @@ -62,8 +100,14 @@ sed -i "s:IMAIN .*:IMAIN = ISTANDARD_OUTPUT:" setup/constants.h # compilation echo -echo "compilation:" -make clean; make -j2 all +echo "clean" +echo +make clean + +echo +echo "compilation" +echo +make -j4 all # checks if [[ $? -ne 0 ]]; then exit 1; fi diff --git a/.github/scripts/run_install.sh b/.github/scripts/run_install.sh index db44a347b..769e9e148 100755 --- a/.github/scripts/run_install.sh +++ b/.github/scripts/run_install.sh @@ -15,10 +15,10 @@ if [[ $? -ne 0 ]]; then exit 1; fi # fortran/openMPI compiler sudo apt-get install -yq --no-install-recommends gfortran g++ openmpi-bin libopenmpi-dev -# parallel hdf5 -if [[ "${TEST}" == *"with-hdf5"* ]]; then +## parallel HDF5 +if [ "${HDF5}" == "true" ]; then echo - echo "additional installation: ${TEST}" + echo "HDF5 additional installation:" echo sudo apt-get install -yq --no-install-recommends libhdf5-mpi-dev ## checks installation paths @@ -34,6 +34,14 @@ if [[ "${TEST}" == *"with-hdf5"* ]]; then #echo fi +## HIP +if [ "${HIP}" == "true" ]; then + echo + echo "HIP additionals installation:" + echo + sudo apt-get install -yq --no-install-recommends libtbb-dev +fi + # checks exit code if [[ $? -ne 0 ]]; then exit 1; fi echo diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh index b8595d93d..7e68c06a9 100755 --- a/.github/scripts/run_tests.sh +++ b/.github/scripts/run_tests.sh @@ -22,12 +22,67 @@ echo # bash function for checking seismogram output with reference solutions my_test(){ - echo "testing seismograms:" + echo "######################################################################################################################" + echo "testing seismograms" ln -s $WORKDIR/utils/scripts/compare_seismogram_correlations.py ./compare_seismogram_correlations.py REF_SEIS/ OUTPUT_FILES/ if [[ $? -ne 0 ]]; then exit 1; fi ./compare_seismogram_correlations.py REF_SEIS/ OUTPUT_FILES/ | grep min/max | cut -d \| -f 3 | awk '{print "correlation:",$1; if ($1 < 0.999 ){print $1,"failed"; exit 1;}else{ print $1,"good"; exit 0;}}' if [[ $? -ne 0 ]]; then exit 1; fi + echo "######################################################################################################################" +} + +my_kernel_test(){ + # kernel value test - checks rho/kappa/mu kernel value outputs + echo "######################################################################################################################" + echo "testing kernel values" + file_ref=REF_KERNEL/output_solver.txt + file_out=output.log # captures the OUTPUT_FILES/output_solver.txt when running solver since IMAIN was set to standard out + if [ ! -e $file_ref ]; then echo "Please check if file $file_ref exists..."; ls -alR ./; exit 1; fi + if [ ! -e $file_out ]; then echo "Please check if file $file_out exists..."; ls -alR ./; exit 1; fi + # gets reference expected kernel values from REF_KERNEL/ folder + RHO=`grep -E 'maximum value of rho[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '` + KAPPA=`grep -E 'maximum value of kappa[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '` + MU=`grep -E 'maximum value of mu[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '` + # need at least rho & kappa (for acoustic kernels) + if [ "$RHO" == "" ] || [ "$KAPPA" == "" ]; then + echo " missing reference kernel values: RHO=$RHO KAPPA=$KAPPA MU=$MU" + echo + exit 1 + else + echo " reference kernel values: RHO=$RHO KAPPA=$KAPPA MU=$MU" + fi + # compares with test output - using a relative tolerance of 0.001 (1 promille) with respect to expected value + # final test result + PASSED=0 + # checks rho kernel value + if [ "$RHO" != "" ]; then + VAL=`grep -E 'maximum value of rho[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '` + echo "kernel rho : $VAL" + echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$RHO val=$VAL + if [[ $? -ne 0 ]]; then PASSED=1; fi + fi + # checks kappa kernel value + if [ "$KAPPA" != "" ]; then + VAL=`grep -E 'maximum value of kappa[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '` + echo "kernel kappa : $VAL" + echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$KAPPA val=$VAL + if [[ $? -ne 0 ]]; then PASSED=1; fi + fi + # checks mu kernel value (if available for elastic kernel) + if [ "$MU" != "" ]; then + VAL=`grep -E 'maximum value of mu[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '` + echo "kernel mu : $VAL" + echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$MU val=$VAL + if [[ $? -ne 0 ]]; then PASSED=1; fi + fi + # overall pass + if [[ $PASSED -ne 0 ]]; then + echo "testing kernel values: failed"; exit 1; + else + echo "testing kernel values: all good" + fi + echo "######################################################################################################################" } # test example @@ -37,7 +92,7 @@ cd $dir # limit time steps for testing sed -i "s:^NSTEP .*:NSTEP = 200:" DATA/Par_file # shortens output interval to avoid timeouts -sed -i "s:^NTSTEP_BETWEEN_OUTPUT_INFO .*:NTSTEP_BETWEEN_OUTPUT_INFO = 50:" DATA/Par_file +sed -i "s:^NTSTEP_BETWEEN_OUTPUT_INFO .*:NTSTEP_BETWEEN_OUTPUT_INFO = 100:" DATA/Par_file # limit time steps for specific examples # simple mesh example @@ -105,11 +160,13 @@ if [ "$TESTDIR" == "EXAMPLES/applications/meshfem3D_examples/sep_bathymetry/" ]; sed -i "s:^NSTEP .*:NSTEP = 1000:" DATA/Par_file fi -# hdf5 i/o example -if [[ "${TEST}" == *"with-hdf5"* ]]; then +## HDF5 - i/o example +if [ "${HDF5}" == "true" ]; then echo - echo "test run: ${TEST}" + echo "test run w/ HDF5" echo + # turns on HDF5 + echo "turning on HDF5" sed -i "s:^HDF5_ENABLED .*:HDF5_ENABLED = .true.:" DATA/Par_file sed -i "s:^HDF5_FOR_MOVIES .*:HDF5_FOR_MOVIES = .true.:" DATA/Par_file sed -i "s:^HDF5_IO_NODES .*:HDF5_IO_NODES = 1:" DATA/Par_file @@ -117,15 +174,31 @@ if [[ "${TEST}" == *"with-hdf5"* ]]; then cp -v run_this_example_HDF5_IO_server.sh run_this_example.sh fi -# adios +## adios if [ "${ADIOS2}" == "true" ]; then # turns on ADIOS + echo "turning on ADIOS" sed -i "s:^ADIOS_ENABLED .*:ADIOS_ENABLED = .true.:" DATA/Par_file fi -# default script -./run_this_example.sh +## GPU +if [ "${GPU}" == "true" ]; then + # turns on GPU + echo "turning on GPU" + sed -i "s:^GPU_MODE .*:GPU_MODE = .true.:" DATA/Par_file +fi + +# save Par_file state +cp -v DATA/Par_file DATA/Par_file.bak +# use kernel script +if [ "${RUN_KERNEL}" == "true" ]; then + # use kernel script + ./run_this_example_kernel.sh | tee output.log +else + # default script + ./run_this_example.sh +fi # checks exit code if [[ $? -ne 0 ]]; then exit 1; fi @@ -136,15 +209,52 @@ echo `date` echo # seismogram comparison -if [ "${DEBUG}" == "true" ]; then +if [ "${DEBUG}" == "true" ] || [ "${RUN_KERNEL}" == "true" ]; then # no comparisons - continue + : # do nothing else my_test fi +# checks exit code +if [[ $? -ne 0 ]]; then exit 1; fi + +# kernel test +if [ "${RUN_KERNEL}" == "true" ]; then + # check kernel values + my_kernel_test + # checks exit code + if [[ $? -ne 0 ]]; then exit 1; fi + # clean up + rm -rf OUTPUT_FILES/ SEM/ output.log + + # re-run kernel test w/ UNDO_ATT + echo + echo "*****************************************" + echo "run kernel w/ UNDO_ATTENUATION_AND_OR_PML" + echo "*****************************************" + echo + + # turns on UNDO_ATTENUATION_AND_OR_PML + echo "turning on UNDO_ATTENUATION_AND_OR_PML" + sed -i "s:^UNDO_ATTENUATION_AND_OR_PML .*:UNDO_ATTENUATION_AND_OR_PML = .true.:" DATA/Par_file + + # use kernel script + ./run_this_example_kernel.sh | tee output.log + # checks exit code + if [[ $? -ne 0 ]]; then exit 1; fi + # kernel test + my_kernel_test + # checks exit code + if [[ $? -ne 0 ]]; then exit 1; fi +fi + +# restore original Par_file +cp -v DATA/Par_file.bak DATA/Par_file # cleanup -rm -rf OUTPUT_FILES/ DATABASES_MPI/ +rm -rf OUTPUT_FILES/ +if [ -e DATABASES_MPI ]; then rm -rf DATABASES_MPI/; fi +if [ -e SEM ]; then rm -rf SEM/; fi echo echo "all good" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5e2cdbf16..e1732125e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -160,7 +160,7 @@ jobs: run: ./configure - name: make - run: make -j2 all + run: make -j4 all linuxCheck-Intel: @@ -299,7 +299,7 @@ jobs: make tests linuxTest_0: - name: Test run example 0 - make tests + name: Test 0 - make tests runs-on: ubuntu-latest needs: [linuxCheck] @@ -318,7 +318,7 @@ jobs: linuxTest_1: - name: Test run example 1 - meshfem3D simple model + name: Test 1 - meshfem3D simple model runs-on: ubuntu-latest needs: [linuxCheck] @@ -331,7 +331,7 @@ jobs: - name: Run build env: - TESTFLAGS: --with-mpi # --enable-vectorization + TESTFLAGS: --with-mpi run: ./.github/scripts/run_build.sh shell: bash @@ -342,7 +342,7 @@ jobs: shell: bash linuxTest_2: - name: Test run example 2 - fault tpv5 + name: Test 2 - fault tpv5 runs-on: ubuntu-latest needs: [linuxCheck] @@ -366,7 +366,7 @@ jobs: shell: bash linuxTest_3: - name: Test run example 3 - layered halfspace + name: Test 3 - layered halfspace runs-on: ubuntu-latest needs: [linuxCheck] @@ -391,7 +391,7 @@ jobs: shell: bash linuxTest_4: - name: Test run example 4 - small adjoint + name: Test 4 - small adjoint runs-on: ubuntu-latest needs: [linuxCheck] @@ -414,8 +414,114 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_5: - name: Test run example 5 - socal1D + linuxTest_5a: + name: Test 5a - kernel homogeneous acoustic + runs-on: ubuntu-latest + needs: [linuxCheck] + + steps: + - uses: actions/checkout@v4 + + - name: Install packages + run: ./.github/scripts/run_install.sh + shell: bash + + - name: Run build + run: ./.github/scripts/run_build.sh + shell: bash + + - name: Run test kernel + env: + TESTDIR: EXAMPLES/applications/homogeneous_acoustic/ + RUN_KERNEL: true + run: ./.github/scripts/run_tests.sh + shell: bash + + linuxTest_5b: + name: Test 5b - kernel homogeneous acoustic GPU HIP + runs-on: ubuntu-latest + needs: [linuxCheck] + + steps: + - uses: actions/checkout@v4 + with: + submodules: true # needs submodule content in folder external_libs/ROCm-HIP-CPU/ + + - name: Install packages + env: + HIP: true + run: ./.github/scripts/run_install.sh + shell: bash + + - name: Run build + env: + HIP: true + run: ./.github/scripts/run_build.sh + shell: bash + + - name: Run test kernel w/ GPU + env: + TESTDIR: EXAMPLES/applications/homogeneous_acoustic/ + RUN_KERNEL: true + GPU: true + run: ./.github/scripts/run_tests.sh + shell: bash + + linuxTest_6a: + name: Test 6a - kernel homogeneous halfspace + runs-on: ubuntu-latest + needs: [linuxCheck] + + steps: + - uses: actions/checkout@v4 + + - name: Install packages + run: ./.github/scripts/run_install.sh + shell: bash + + - name: Run build + run: ./.github/scripts/run_build.sh + shell: bash + + - name: Run test kernel + env: + TESTDIR: EXAMPLES/applications/homogeneous_halfspace/ + RUN_KERNEL: true + run: ./.github/scripts/run_tests.sh + shell: bash + + linuxTest_6b: + name: Test 6b - kernel homogeneous halfspace GPU HIP + runs-on: ubuntu-latest + needs: [linuxCheck] + + steps: + - uses: actions/checkout@v4 + with: + submodules: true # needs submodule content in folder external_libs/ROCm-HIP-CPU/ + + - name: Install packages + env: + HIP: true + run: ./.github/scripts/run_install.sh + shell: bash + + - name: Run build + env: + HIP: true + run: ./.github/scripts/run_build.sh + shell: bash + + - name: Run test kernel w/ GPU + env: + TESTDIR: EXAMPLES/applications/homogeneous_halfspace/ + RUN_KERNEL: true + GPU: true + run: ./.github/scripts/run_tests.sh + shell: bash + + linuxTest_7: + name: Test 7 - socal1D runs-on: ubuntu-latest needs: [linuxCheck] @@ -439,8 +545,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_6: - name: Test run example 6 - socal1D 1d_socal + linuxTest_8: + name: Test 8 - socal1D 1d_socal runs-on: ubuntu-latest needs: [linuxCheck] @@ -464,8 +570,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_7: - name: Test run example 7 - socal1D 1d_prem + linuxTest_9: + name: Test 9 - socal1D 1d_prem runs-on: ubuntu-latest needs: [linuxCheck] @@ -489,9 +595,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - - linuxTest_8: - name: Test run example 8 - socal1D 1d_cascadia + linuxTest_10: + name: Test 10 - socal1D 1d_cascadia runs-on: ubuntu-latest needs: [linuxCheck] @@ -515,8 +620,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_9: - name: Test run example 9 - coupling FK + linuxTest_11: + name: Test 11 - coupling FK runs-on: ubuntu-latest needs: [linuxCheck] @@ -539,8 +644,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_10: - name: Test run example 10 - homogeneous halfspace + linuxTest_12: + name: Test 12 - homogeneous halfspace runs-on: ubuntu-latest needs: [linuxCheck] @@ -553,7 +658,7 @@ jobs: - name: Run build env: - TESTFLAGS: --with-mpi + TESTFLAGS: check-mcmodel-medium run: ./.github/scripts/run_build.sh shell: bash @@ -563,8 +668,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_11: - name: Test run example 11 - poroelastic + linuxTest_13: + name: Test 13 - poroelastic runs-on: ubuntu-latest needs: [linuxCheck] @@ -587,8 +692,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_12: - name: Test run example 12 - PML elastic + linuxTest_14: + name: Test 14 - PML elastic runs-on: ubuntu-latest needs: [linuxCheck] @@ -611,8 +716,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_13: - name: Test run example 13 - PML acoustic + linuxTest_15: + name: Test 15 - PML acoustic runs-on: ubuntu-latest needs: [linuxCheck] @@ -635,8 +740,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_14: - name: Test run example 14 - waterlayered halfspace + linuxTest_16: + name: Test 16 - waterlayered halfspace runs-on: ubuntu-latest needs: [linuxCheck] @@ -659,8 +764,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_15: - name: Test run example 15 - tomographic model + linuxTest_17: + name: Test 17 - tomographic model runs-on: ubuntu-latest needs: [linuxCheck] @@ -683,8 +788,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_16: - name: Test run example 16 - cavity + linuxTest_18: + name: Test 18 - cavity runs-on: ubuntu-latest needs: [linuxCheck] @@ -707,8 +812,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_17: - name: Test run example 17 - sep bathymetry + linuxTest_19: + name: Test 19 - sep bathymetry runs-on: ubuntu-latest needs: [linuxCheck] @@ -731,8 +836,8 @@ jobs: run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_18: - name: Test run example 18 - socal1D hdf5 i/o + linuxTest_20: + name: Test 20 - socal1D HDF5 I/O runs-on: ubuntu-latest needs: [linuxCheck] @@ -741,26 +846,27 @@ jobs: - name: Install packages env: - TEST: with-hdf5 + HDF5: true run: ./.github/scripts/run_install.sh shell: bash - name: Run build env: - TESTFLAGS: --with-mpi --with-hdf5 HDF5_INC=/usr/include/hdf5/openmpi/ HDF5_LIBS=-L/usr/lib/x86_64-linux-gnu/hdf5/openmpi + TESTFLAGS: --with-mpi + HDF5: true run: ./.github/scripts/run_build.sh shell: bash - name: Run test env: - TEST: with-hdf5 TESTDIR: EXAMPLES/applications/meshfem3D_examples/socal1D/ TESTID: 0 + HDF5: true run: ./.github/scripts/run_tests.sh shell: bash - linuxTest_19: - name: Test run example 19 - meshfem3D simple model w/ ADIOS2 + linuxTest_21: + name: Test 21 - meshfem3D simple model w/ ADIOS2 runs-on: ubuntu-latest needs: [linuxCheck] diff --git a/.gitmodules b/.gitmodules index f8dfa9095..deb2a1ee8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "src/inverse_problem_for_source/pyCMT3D"] path = src/inverse_problem_for_source/pyCMT3D url = https://github.com/wjlei1990/pycmt3d +[submodule "external_libs/ROCm-HIP-CPU"] + path = external_libs/ROCm-HIP-CPU + url = git@github.com:ROCm/HIP-CPU.git diff --git a/EXAMPLES/applications/homogeneous_acoustic/DATA/Par_file b/EXAMPLES/applications/homogeneous_acoustic/DATA/Par_file index 2d9b37d3a..5f0a9fdbc 100644 --- a/EXAMPLES/applications/homogeneous_acoustic/DATA/Par_file +++ b/EXAMPLES/applications/homogeneous_acoustic/DATA/Par_file @@ -138,7 +138,7 @@ BOTTOM_FREE_SURFACE = .false. # but requires disk space for temporary storage, and uses a significant amount of memory used as buffers for temporary storage. # When that option is on the second parameter indicates how often the code dumps restart files to disk (if in doubt, use something between 100 and 1000). UNDO_ATTENUATION_AND_OR_PML = .false. -NT_DUMP_ATTENUATION = 500 +NT_DUMP_ATTENUATION = 100 #----------------------------------------------------------- # diff --git a/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/DB.X20.MXP.semp b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/DB.X20.MXP.semp new file mode 100644 index 000000000..6d5e35984 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/DB.X20.MXP.semp @@ -0,0 +1,300 @@ + 27.9664001 544.564331 + 27.8528004 -1584.77832 + 27.7392006 -5375.14160 + 27.6256008 -6260.00586 + 27.5119991 -5755.92969 + 27.3983994 -5217.72119 + 27.2847996 -5255.58154 + 27.1711998 -2809.80103 + 27.0576000 105.357338 + 26.9440002 1485.71472 + 26.8304005 1841.27283 + 26.7168007 1612.35913 + 26.6032009 2239.82764 + 26.4895992 2076.71411 + 26.3759995 1936.22205 + 26.2623997 2303.37207 + 26.1487999 3055.18896 + 26.0352001 2778.17969 + 25.9216003 2978.24780 + 25.8080006 3198.47485 + 25.6944008 2216.27148 + 25.5807991 26.7113819 + 25.4671993 -990.188171 + 25.3535995 -3020.76221 + 25.2399998 -5311.44531 + 25.1264000 -5445.22070 + 25.0128002 -4123.07471 + 24.8992004 -3494.73999 + 24.7856007 -3306.53955 + 24.6720009 -494.829742 + 24.5583992 1464.19690 + 24.4447994 2321.16357 + 24.3311996 1731.31226 + 24.2175999 2265.86035 + 24.1040001 2063.26221 + 23.9904003 740.865234 + 23.8768005 -774.699829 + 23.7632008 -101.673637 + 23.6495991 1183.26941 + 23.5359993 1342.85132 + 23.4223995 1530.43066 + 23.3087997 1119.14465 + 23.1952000 -1230.99353 + 23.0816002 -6171.64551 + 22.9680004 -12273.3516 + 22.8544006 -21132.4492 + 22.7408009 -31555.8711 + 22.6271992 -44689.6758 + 22.5135994 -60735.5078 + 22.3999996 -80647.2656 + 22.2863998 -106443.469 + 22.1728001 -139071.125 + 22.0592003 -180883.922 + 21.9456005 -232015.375 + 21.8320007 -292923.469 + 21.7183990 -360350.500 + 21.6047993 -432450.281 + 21.4911995 -505124.969 + 21.3775997 -568320.375 + 21.2639999 -615638.125 + 21.1504002 -635376.812 + 21.0368004 -617644.000 + 20.9232006 -548923.188 + 20.8096008 -420996.031 + 20.6959991 -225746.734 + 20.5823994 40509.9648 + 20.4687996 373987.188 + 20.3551998 765580.562 + 20.2416000 1190601.00 + 20.1280003 1622974.75 + 20.0144005 2026172.38 + 19.9008007 2357824.75 + 19.7872009 2574349.50 + 19.6735992 2642144.25 + 19.5599995 2535925.50 + 19.4463997 2240861.50 + 19.3327999 1766511.50 + 19.2192001 1139457.12 + 19.1056004 404580.875 + 18.9920006 -380841.062 + 18.8784008 -1151587.50 + 18.7647991 -1841239.88 + 18.6511993 -2393282.50 + 18.5375996 -2764036.75 + 18.4239998 -2932518.50 + 18.3104000 -2893954.75 + 18.1968002 -2667275.00 + 18.0832005 -2289551.00 + 17.9696007 -1805607.25 + 17.8560009 -1268753.50 + 17.7423992 -729574.500 + 17.6287994 -232765.625 + 17.5151997 190661.422 + 17.4015999 517393.719 + 17.2880001 741936.688 + 17.1744003 868827.062 + 17.0608006 908390.500 + 16.9472008 880048.875 + 16.8335991 802852.375 + 16.7199993 697491.250 + 16.6063995 579577.750 + 16.4927998 462539.688 + 16.3792000 354653.031 + 16.2656002 262361.594 + 16.1520004 188769.453 + 16.0384007 129681.711 + 15.9247999 86139.2656 + 15.8112001 55035.7344 + 15.6976004 35354.5781 + 15.5839996 21123.7676 + 15.4703999 11350.1660 + 15.3568001 6007.69775 + 15.2432003 3916.11475 + 15.1295996 2536.44238 + 15.0159998 39.0870628 + 14.9024000 706.456177 + 14.7888002 275.077942 + 14.6752005 -177.372177 + 14.5615997 -574.741455 + 14.4480000 273.897614 + 14.3344002 148.195999 + 14.2208004 26.6695576 + 14.1071997 96.0233154 + 13.9935999 -411.122437 + 13.8800001 672.651306 + 13.7664003 -552.439514 + 13.6527996 -490.040222 + 13.5391998 176.453094 + 13.4256001 1103.60010 + 13.3120003 -538.625610 + 13.1983995 -348.423157 + 13.0847998 284.437286 + 12.9712000 -249.851257 + 12.8576002 42.9155464 + 12.7440004 -174.715073 + 12.6303997 682.210449 + 12.5167999 -384.185730 + 12.4032001 -113.904007 + 12.2896004 -215.445999 + 12.1759996 840.190979 + 12.0623999 -629.883972 + 11.9488001 -289.133881 + 11.8352003 596.051270 + 11.7215996 -20.8606281 + 11.6079998 -730.626648 + 11.4944000 190.822723 + 11.3808002 966.601624 + 11.2672005 -88.0196838 + 11.1535997 -427.040833 + 11.0400000 -681.129272 + 10.9264002 474.171722 + 10.8128004 -50.8858643 + 10.6991997 -79.9151230 + 10.5855999 334.896088 + 10.4720001 687.630554 + 10.3584003 -929.366333 + 10.2447996 -229.713486 + 10.1311998 530.861511 + 10.0176001 -319.441559 + 9.90400028 -352.331818 + 9.79039955 613.851013 + 9.67679977 631.685120 + 9.56320000 -444.729340 + 9.44960022 -829.620911 + 9.33600044 45.9171791 + 9.22239971 1145.88806 + 9.10879993 -328.019318 + 8.99520016 -1475.16736 + 8.88160038 1013.47455 + 8.76799965 1406.02515 + 8.65439987 -1171.39771 + 8.54080009 -1070.24377 + 8.42720032 981.314087 + 8.31359959 627.278564 + 8.19999981 -805.840149 + 8.08640003 -251.505005 + 7.97279978 281.497559 + 7.85920000 704.267517 + 7.74560022 -440.305328 + 7.63199997 -484.658325 + 7.51840019 167.954758 + 7.40479994 497.673981 + 7.29120016 -152.318954 + 7.17759991 -259.604980 + 7.06400013 -33.6230774 + 6.95039988 -151.219727 + 6.83680010 401.256134 + 6.72319984 80.9131851 + 6.60960007 18.2357502 + 6.49599981 -301.181091 + 6.38240004 -87.3278961 + 6.26879978 195.631836 + 6.15520000 337.532715 + 6.04160023 -503.351929 + 5.92799997 -521.325684 + 5.81440020 789.367859 + 5.70079994 576.935913 + 5.58720016 -512.447327 + 5.47359991 -780.509521 + 5.36000013 292.994385 + 5.24639988 546.671753 + 5.13280010 159.262848 + 5.01919985 -584.490173 + 4.90560007 -308.834595 + 4.79199982 797.433167 + 4.67840004 477.932465 + 4.56479979 -1073.53381 + 4.45120001 -828.504944 + 4.33760023 1377.45874 + 4.22399998 783.816589 + 4.11040020 -1303.13086 + 3.99679995 -701.084656 + 3.88319993 1276.65894 + 3.76959991 622.466064 + 3.65599990 -1116.23511 + 3.54239988 -631.125610 + 3.42880011 717.494507 + 3.31520009 655.352783 + 3.20160007 -537.795227 + 3.08800006 -306.982452 + 2.97440004 457.808319 + 2.86080003 -3.93428397 + 2.74720001 -505.635590 + 2.63360000 134.206482 + 2.51999998 371.731140 + 2.40639997 82.5949936 + 2.29279995 31.0862484 + 2.17919993 -333.130585 + 2.06559992 -471.697906 + 1.95200002 105.730438 + 1.83840001 675.870667 + 1.72479999 174.548706 + 1.61119998 -267.963623 + 1.49759996 -506.872253 + 1.38399994 286.935699 + 1.27040005 344.802826 + 1.15680003 -490.902222 + 1.04320002 -361.650330 + 0.929600000 686.343689 + 0.815999985 764.176147 + 0.702400029 -849.063660 + 0.588800013 -842.574280 + 0.475199997 420.904663 + 0.361600012 1028.96753 + 0.247999996 -354.844910 + 0.134399995 -680.545410 + 2.08000001E-02 419.332672 + -9.27999988E-02 118.055344 + -0.206400007 -387.699432 + -0.319999993 138.956772 + -0.433600008 290.025208 + -0.547200024 -231.296585 + -0.660799980 -25.1498260 + -0.774399996 180.027618 + -0.888000011 -121.424614 + -1.00160003 82.1862106 + -1.11520004 -185.966873 + -1.22880006 -167.952957 + -1.34239995 616.901489 + -1.45599997 -3.41306829 + -1.56959999 -707.017395 + -1.68320000 -186.165131 + -1.79680002 782.002808 + -1.91040003 98.1923218 + -2.02399993 -370.994720 + -2.13759995 -96.2520752 + -2.25119996 435.202698 + -2.36479998 191.746216 + -2.47839999 -923.419434 + -2.59200001 -436.424164 + -2.70560002 817.467773 + -2.81920004 742.894653 + -2.93280005 -396.105103 + -3.04640007 -451.140839 + -3.16000009 26.9241982 + -3.27360010 248.718246 + -3.38720012 -35.8864861 + -3.50079989 -278.069336 + -3.61439991 -36.0458794 + -3.72799993 431.223846 + -3.84159994 -84.5946655 + -3.95519996 -280.859406 + -4.06879997 58.7902107 + -4.18240023 366.241119 + -4.29600000 81.8473282 + -4.40959978 -375.473328 + -4.52320004 -434.747742 + -4.63679981 99.2971573 + -4.75040007 718.811890 + -4.86399984 -28.3373585 + -4.97760010 -515.782776 + -5.09119987 23.7115059 + -5.20480013 705.601624 + -5.31839991 -470.127106 + -5.43200016 -773.700623 + -5.54559994 468.248779 + -5.65920019 1062.23730 + -5.77279997 -550.779053 + -5.88640022 -935.806763 + -6.00000000 442.318146 diff --git a/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output.log b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output.log new file mode 100644 index 000000000..8a781b737 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output.log @@ -0,0 +1,169 @@ +running example: Thu Nov 14 08:44:19 UTC 2024 + +setting up example... + + +decomposing mesh... + + + ********************** + Serial mesh decomposer + ********************** + + reading mesh files in: ./MESH-default + + using NGNOD = 8 + linear elements + + total number of nodes: + nnodes = 23273 + total number of spectral elements: + nspec = 20736 + materials: + num_mat = 1 + defined = 1 undefined = 0 + no poroelastic material file found + defined materials : 1 + undefined materials: 1 (interfaces/tomography models/..) + absorbing boundaries: + nspec2D_xmin = 576 + nspec2D_xmax = 576 + nspec2D_ymin = 576 + nspec2D_ymax = 576 + nspec2D_bottom = 1296 + nspec2D_top = 1296 + no absorbing_cpml_file file found + no moho_surface_file file found + + Par_file_faults not found: assuming that there are no faults + + node valence: + min = 1 max = 8 + neighbors: + nsize = 8 + valence: sup_neighbor = 38 max_neighbor = 26 + + local time stepping: turned OFF + + partitioning: + number of partitions requested = 4 + + array size xadj : 20737 memory: 7.91053772E-02 MB + array size adjncy: 539136 memory: 2.05664062 MB + sup_neighbor : 26 + + mesh2dual: max_neighbor = 26 + + + partitioning type: 1 + running SCOTCH partitioning + + SCOTCH partitioning + finished partitioning + + written file: ./OUTPUT_FILES/DATABASES_MPI/part_array.vtk + + element distribution: + partition 0 has 5136 elements + partition 1 has 5216 elements + partition 2 has 5184 elements + partition 3 has 5200 elements + elements per partition: min/max = 5136 5216 + elements per partition: imbalance = 1.53374231 % + (0% being totally balanced, 100% being unbalanced) + + load distribution: + element loads: min/max = 10 10 + + partition 0 has 51360 load units + partition 1 has 52160 load units + partition 2 has 51840 load units + partition 3 has 52000 load units + load per partition: min/max = 51360 52160 + load per partition: imbalance = 1.53374231 % + (0% being totally balanced, 100% being unbalanced) + + partitions: + num = 4 + ninterfaces = 6 + + partition 0 has number of MPI interfaces: 2 maximum size 2346 + partition 1 has number of MPI interfaces: 3 maximum size 2438 + partition 2 has number of MPI interfaces: 2 maximum size 2438 + partition 3 has number of MPI interfaces: 3 maximum size 2346 + + Databases files in directory: ./OUTPUT_FILES/DATABASES_MPI + + + finished successfully + + +running database generation on 4 processors... + +######################################################### +forward simulation +######################################################### +(running forward simulation with saving forward wavefield) + +Changed simulation_type to 1 and save_forward = .true. in Par_file + +running solver on 4 processors... + +######################################################### +adjoint sources +######################################################### +setting up adjoint sources + +'OUTPUT_FILES/DB.X20.MXP.semp' -> 'SEM/DB.X20.MXX.semp' +'OUTPUT_FILES/DB.X20.MXP.semp' -> 'SEM/DB.X20.MXY.semp' +'OUTPUT_FILES/DB.X20.MXP.semp' -> 'SEM/DB.X20.MXZ.semp' + +compiling xcreate_adjsrc_traveltime: + using fortran compiler = gfortran + using C compiler = gcc + +gfortran -o xcreate_adjsrc_traveltime create_adjsrc_traveltime.o rw_ascfile_c.o +'xcreate_adjsrc_traveltime' -> '/home/myuser/EXAMPLES/applications/homogeneous_acoustic/SEM/xcreate_adjsrc_traveltime' + +running adjoint source creation + + xcreate_adjsrc_traveltime: + measurement window start/end = 9.0000000000000000 / 26.000000000000000 + component ifile = 1 lrot = F + + reading asc file DB.X20.MXX.semp ... + reading asc file DB.X20.MXY.semp ... + reading asc file DB.X20.MXZ.semp ... + + start time: -6.0000000000000000 + time step: 0.11359978000000037 + number of steps: 300 + + i = 1 norm = 51519175283406.109 + i = 2 norm = 51519175283406.109 + component set to zero + i = 3 norm = 51519175283406.109 + component set to zero + + write to asc file DB.X20.MXX.adj + write to asc file DB.X20.MXY.adj + write to asc file DB.X20.MXZ.adj + +'./STATIONS_ADJOINT' -> '../DATA/STATIONS_ADJOINT' + + +######################################################### +kernel simulation +######################################################### +(running kernel simulation: SIMULATION_TYPE == 3) + +Changed simulation_type to 3 in Par_file + +running solver (kernel run) on 4 processors... + + +see results in directory : OUTPUT_FILES/ + kernel outputs in directory: ./OUTPUT_FILES/DATABASES_MPI + +done +Thu Nov 14 08:44:51 UTC 2024 diff --git a/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_generate_databases.txt b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_generate_databases.txt new file mode 100644 index 000000000..86e586916 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_generate_databases.txt @@ -0,0 +1,286 @@ + + ***************************************** + *** Specfem3D MPI database generation *** + ***************************************** + + Running Git package version of the code: 4.1.1 + which is Git unknown + dating unknown + + This is process 0 + There are 4 MPI processes + Processes are numbered from 0 to 3 + + There is a total of 4 slices + + NGLLX = 5 + NGLLY = 5 + NGLLZ = 5 + + Shape functions defined by NGNOD = 8 control nodes + Surface shape functions defined by NGNOD2D = 4 control nodes + Beware! Curvature (i.e. HEX27 elements) is not handled by our internal mesher + + velocity model: default + + + suppressing UTM projection + + no attenuation + + no anisotropy + + no oceans + + incorporating Stacey absorbing conditions + + using a CMTSOLUTION source + with a Gaussian source time function + + + ************************************ + reading partition files in the model + ************************************ + + external mesh points : 24701 + defined materials : 1 + undefined materials : 0 + total number of spectral elements: 20736 + absorbing boundaries: + xmin,xmax : 576 576 + ymin,ymax : 576 576 + bottom,top: 1296 1296 + + total number of C-PML elements in the global mesh: 0 + + number of MPI partition interfaces: 10 + + minimum memory used so far : 18.0572948 MB per process + minimum total memory requested : 82.8846893 MB per process + + create regions: + + ...allocating arrays + NGLLX = 5 + NGLLY = 5 + NGLLZ = 5 + NGNOD = 8 + NGNOD2D = 4 + + main process setup: + nspec = 5136 + + separating regular/irregular element shapes + nspec regular = 0 + nspec irregular = 5136 + + absorbing boundary faces: + num_abs_boundary_faces = 913 + + free surface faces: + num_free_surface_faces = 321 + + + File DATA/Par_file_faults not found: assuming that there are no faults + + + ...setting up jacobian + + ...indexing global points + creating ibool indexing : x min/max = 0.00000000 / 134000.000 + creating indirect addressing: nglob = 343525 + creating unique point locations + + ...preparing MPI interfaces + number of interfaces : 2 + creating MPI indexing : x min/max = 0.00000000 / 134000.000 + tolerance = 1.3400000000000000E-005 + + total MPI interface points: 42770 + total assembled MPI interface points: 42380 + + ...setting up absorbing boundaries + boundary xmin : 576 + boundary xmax : 576 + boundary ymin : 576 + boundary ymax : 576 + boundary bottom : 1296 + boundary top : 1296 + absorbing boundary: + total number of free faces = 1296 + total number of faces = 3600 + + ...setting up mesh surface + + ...determining velocity model + 10 % time remaining: 2.5533031967188309E-007 s + 20 % time remaining: 2.0405097946604651E-007 s + 30 % time remaining: 1.7223927824188441E-007 s + 40 % time remaining: 1.4324789827332815E-007 s + 50 % time remaining: 1.1696618033528635E-007 s + 60 % time remaining: 9.3191019908262080E-008 s + 70 % time remaining: 6.9691229548169477E-008 s + 80 % time remaining: 4.6026416729508884E-008 s + 90 % time remaining: 2.2891237377624420E-008 s + 100 % time remaining: 2.6291509789120710E-010 s + + ...detecting acoustic-elastic-poroelastic surfaces + total acoustic elements : 20736 + total elastic elements : 0 + total poroelastic elements: 0 + + acoustic - elastic coupling : total number of faces = 0 + acoustic - poroelastic coupling : total number of faces = 0 + elastic - poroelastic coupling : total number of faces = 0 + + + ...element inner/outer separation + for overlapping of communications with calculations: + percentage of edge elements 11.2149506 % + percentage of volume elements 88.7850494 % + + + ...element mesh coloring + use coloring = F + + ...external binary models + no external binary model used + + ...creating mass matrix + + ...setting up mesh adjacency + + mesh adjacency: + total number of elements in this slice = 5136 + + maximum number of neighbors allowed = 300 + minimum array memory required per slice = 5.89727783 (MB) + + maximum number of elements per shared node = 8 + node-to-element array memory required per slice = 10.4835510 (MB) + + 10 % - elapsed time: 8.53095856E-03 s + 20 % - elapsed time: 2.15513334E-02 s + 30 % - elapsed time: 3.54655422E-02 s + 40 % - elapsed time: 4.86302935E-02 s + 50 % - elapsed time: 6.13369159E-02 s + 60 % - elapsed time: 7.52137080E-02 s + 70 % - elapsed time: 8.85347053E-02 s + 80 % - elapsed time: 0.100981832 s + 90 % - elapsed time: 0.114550792 s + 100 % - elapsed time: 0.121636711 s + + maximum neighbors found per element = 26 + (maximum neighbor of neighbors) = 98 + total number of neighbors = 512346 + + Elapsed time for detection of neighbors in seconds = 0.133644789 + + + ...saving mesh databases + using binary file format + database file (for rank 0): ./OUTPUT_FILES/DATABASES_MPI/proc000000_external_mesh.bin + + saving mesh files for AVS, OpenDX, Paraview + saving additional mesh files with surface/coupling points + + ...checking mesh resolution + Mesh resolution: + + ******** + minimum and maximum number of elements + and points in the CUBIT + SCOTCH mesh: + + NSPEC_global_min = 5136 + NSPEC_global_max = 5216 + NSPEC_global_max / NSPEC_global_min imbalance = 1.01557636 = 1.55763245 % + NSPEC_global_sum = 20736 + + NGLOB_global_min = 343525 + NGLOB_global_max = 349505 + NGLOB_global_max / NGLOB_global_min imbalance = 1.01740777 = 1.74077582 % + NGLOB_global_sum = 1387880 + + If you have elements of a single type (all acoustic, all elastic, all poroelastic, and without CPML) + in the whole mesh, then there should be no significant imbalance in the above numbers. + Otherwise, it is normal to have imbalance in elements and points because the domain decomposer + compensates for the different cost of different elements by partitioning them unevenly among processes. + ******** + + + ******** + Model: P velocity min,max = 2800.00000 2800.00000 + Model: S velocity min,max = 0.00000000 0.00000000 + + Model: Poisson's ratio min,max = 0.500000000 0.500000000 + ******** + + ********************************************* + *** Verification of simulation parameters *** + ********************************************* + + *** Xmin and Xmax of the model = 0.00000000 134000.000 + *** Ymin and Ymax of the model = 0.00000000 134000.000 + *** Zmin and Zmax of the model = -60000.0000 0.00000000 + + *** Max GLL point distance = 1227.47656 + *** Min GLL point distance = 642.726562 + *** Max/min ratio = 1.90979588 + + *** Max element size = 3750.00000 + *** Min element size = 3722.21875 + *** Max/min ratio = 1.00746357 + + *** Minimum period resolved = 1.67410719 + *** Maximum suggested time step = 0.109999999 + + Elapsed time for checking mesh resolution in seconds = 1.2908709000001295E-002 + saving VTK files for Courant number and minimum period + + + mesh regions done + + min and max of elevation (i.e. height of the upper surface of the mesh) included in mesh in m is 0.0000000000000000 0.0000000000000000 + + + done mesh setup + + + Repartition of elements: + ----------------------- + + load distribution: + element loads: min/max = 51360 52160 + + partition 0 has 51360 load units + partition 1 has 52160 load units + partition 2 has 51840 load units + partition 3 has 52000 load units + + load per partition: min/max = 51360 52160 + load per partition: imbalance = 1.53374231 % + (0% being totally balanced, 100% being unbalanced) + + total number of elements in mesh slice 0: 5136 + total number of regular elements in mesh slice 0: 0 + total number of irregular elements in mesh slice 0: 5136 + total number of points in mesh slice 0: 343525 + + total number of elements in entire mesh: 20736 + approximate total number of points in entire mesh (with duplicates on MPI edges): 1387880 + approximate total number of DOFs in entire mesh (with duplicates on MPI edges): 4163640 + + total number of time steps in the solver will be: 300 + + using single precision for the calculations + + smallest and largest possible floating-point numbers are: 1.17549435E-38 3.40282347E+38 + + + Elapsed time for mesh generation and buffer creation in seconds = 15.3367910 + Elapsed time for mesh generation and buffer creation in hh:mm:ss = 0 h 00 m 15 s + + End of mesh generation + + done + diff --git a/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_solver.txt b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_solver.txt new file mode 100644 index 000000000..29dc2d745 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_solver.txt @@ -0,0 +1,399 @@ + ********************************************** + **** Specfem 3-D Solver - MPI version f90 **** + ********************************************** + + Running Git package version of the code: 4.1.1 + which is Git unknown + dating unknown + + + + + There are 4 MPI processes + Processes are numbered from 0 to 3 + + There is a total of 4 slices + + NDIM = 3 + + NGLLX = 5 + NGLLY = 5 + NGLLZ = 5 + + using single precision for the calculations + + smallest and largest possible floating-point numbers are: 1.17549435E-38 3.40282347E+38 + + velocity model: default + + Reading mesh databases... + reads binary mesh files: proc***_external_mesh.bin + from directory : ./OUTPUT_FILES/DATABASES_MPI + + simulation w/ acoustic domain: T + simulation w/ elastic domain: F + simulation w/ poroelastic domain: F + + slice 0 has: + number of elements acoustic : 5136 + number of elements elastic : 0 + number of elements poroelastic: 0 + done + + total acoustic elements : 20736 + total elastic elements : 0 + total poroelastic elements : 0 + + Mesh resolution: + + ******** + minimum and maximum number of elements + and points in the CUBIT + SCOTCH mesh: + + NSPEC_global_min = 5136 + NSPEC_global_max = 5216 + NSPEC_global_max / NSPEC_global_min imbalance = 1.01557636 = 1.55763245 % + NSPEC_global_sum = 20736 + + NGLOB_global_min = 343525 + NGLOB_global_max = 349505 + NGLOB_global_max / NGLOB_global_min imbalance = 1.01740777 = 1.74077582 % + NGLOB_global_sum = 1387880 + + If you have elements of a single type (all acoustic, all elastic, all poroelastic, and without CPML) + in the whole mesh, then there should be no significant imbalance in the above numbers. + Otherwise, it is normal to have imbalance in elements and points because the domain decomposer + compensates for the different cost of different elements by partitioning them unevenly among processes. + ******** + + + ******** + Model: P velocity min,max = 2800.00000 2800.00000 + Model: S velocity min,max = 0.00000000 0.00000000 + + Model: Poisson's ratio min,max = 0.500000000 0.500000000 + ******** + + ********************************************* + *** Verification of simulation parameters *** + ********************************************* + + *** Xmin and Xmax of the model = 0.00000000 134000.000 + *** Ymin and Ymax of the model = 0.00000000 134000.000 + *** Zmin and Zmax of the model = -60000.0000 0.00000000 + + *** Max GLL point distance = 1227.47656 + *** Min GLL point distance = 642.726562 + *** Max/min ratio = 1.90979588 + + *** Max element size = 3750.00000 + *** Min element size = 3722.21875 + *** Max/min ratio = 1.00746357 + + *** Minimum period resolved = 1.67410719 + *** Maximum suggested time step = 0.109999999 + + *** for DT : 0.11360000000000001 + *** Max stability for wave velocities = 0.494891644 + + Elapsed time for checking mesh resolution in seconds = 1.4762667000000000E-002 + saving VTK files for Courant number and minimum period + + + ****************************************** + There is a total of 4 slices + ****************************************** + + + kd-tree: + total data points: 138672 + theoretical number of nodes: 277336 + tree memory size: 8.46362305 MB + actual number of nodes: 277343 + tree memory size: 8.46383667 MB + maximum depth : 18 + creation timing : 5.08669913E-02 (s) + + + sources: 1 + + ******************** + locating sources + ******************** + + reading source information from ./DATA/CMTSOLUTION file + + no UTM projection + + + source # 1 + source located in slice 1 + in element 2780 + in acoustic domain + + using moment tensor source: + xi coordinate of source in that element: -1.0000000000000000 + eta coordinate of source in that element: 1.0000000000000000 + gamma coordinate of source in that element: 1.0000000000000000 + + source time function: + using Gaussian source time function + half duration: 2.0000000000000000 seconds + + time shift: 0.0000000000000000 seconds + + magnitude of the source: + scalar moment M0 = 9.4305355097152345E+027 dyne-cm + moment magnitude Mw = 7.9496910938684096 + + original (requested) position of the source: + + latitude: 67000.000000000000 + longitude: 67000.000000000000 + + x: 67000.000000000000 + y: 67000.000000000000 + depth: 30.000000000000000 km + topo elevation: 0.0000000000000000 + + position of the source that will be used: + + x: 67000.000000000000 + y: 67000.000000000000 + depth: 30.000000000000000 km + z: -30000.000000000000 + + error in location of the source: 0.00000000 m + + + + maximum error in location of the sources: 0.00000000 m + + + Elapsed time for detection of sources in seconds = 9.47991665E-03 + + End of source detection - done + + + printing the source-time function + + receivers: + + there are 1 stations in file ./DATA/STATIONS_ADJOINT + saving 1 stations inside the model in file ./DATA/STATIONS_ADJOINT_FILTERED + excluding 0 stations located outside the model + + Total number of receivers = 1 + + + ******************** + locating receivers + ******************** + + reading receiver information from ./DATA/STATIONS_ADJOINT_FILTERED file + + + station # 1 DB X20 + original latitude: 67000.0000 + original longitude: 22732.1406 + original x: 22732.1406 + original y: 67000.0000 + original depth: 50.0000000 m + horizontal distance: 44.2678604 + target x, y, z: 22732.1406 67000.0000 -50.0000000 + closest estimate found: 9.23705556E-14 m away + + receiver located in slice 2 + in element 4904 + in acoustic domain + at coordinates: + xi = -0.78571608325633080 + eta = -1.0000000000000000 + gamma = 0.97333333333333338 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 22732.139999999999 + y: 67000.000000000000 + depth: 49.999999999999908 m + z: -49.999999999999908 + + + + maximum error in location of all the receivers: 9.23705556E-14 m + + Elapsed time for receiver detection in seconds = 1.16090421E-02 + + End of receiver detection - done + + found a total of 1 receivers in all the slices + + 3 adjoint component trace files found in all slices + + source arrays: + number of sources is 1 + size of source array = 1.43051147E-03 MB + = 1.39698386E-06 GB + + seismograms: + seismograms written by all processes + + Total number of simulation steps (NSTEP) = 300 + writing out seismograms at every NTSTEP_BETWEEN_OUTPUT_SEISMOS = 300 + number of subsampling steps for seismograms = 1 + Total number of samples for seismograms = 300 + + + maximum number of local receivers is 1 in slice 2 + size of maximum seismogram array = 3.43322754E-03 MB + = 3.35276127E-06 GB + + adjoint source arrays: + reading adjoint sources at every NTSTEP_BETWEEN_READ_ADJSRC = 300 + maximum number of local adjoint sources is 1 in slice 2 + size of maximum adjoint source array = 3.43322754E-03 MB + = 3.35276127E-06 GB + + + Total number of samples for seismograms = 300 + + + Simulation setup: + + incorporating acoustic simulation + no elastic simulation + no poroelastic simulation + + no attenuation + no anisotropy + no oceans + no gravity + no movie simulation + + + preparing mass matrices + preparing constants + preparing wavefields + preparing fault simulation + no dynamic faults + no kinematic faults + no fault simulation + preparing gravity + no gravity simulation + preparing Stacey absorbing boundaries + preparing adjoint fields + preparing optimized arrays + number of regular shaped elements : 0 + number of irregular shaped elements: 5136 + fused array done + bandwidth test (STREAM TRIAD): + memory accesses = 11.7939949 MB + timing min/max = 7.94707972E-04 s / 8.13667022E-04 s + timing avg = 8.08104291E-04 s + bandwidth = 14.2525826 GB/s + + + Elapsed time for preparing timerun in seconds = 9.0330750000000015E-002 + + ************ + time loop + ************ + scheme: Newmark + + time step: 0.113600001 s + number of time steps: 300 + total simulated time: 34.0800018 seconds + start time: -6.00000000 seconds + + All processes are synchronized before the time loop + + Starting time iteration loop... + + Time step # 5 + Time: -5.54559994 seconds + Elapsed time in seconds = 0.14973066599999996 + Elapsed time in hh:mm:ss = 0 h 00 m 00 s + Mean elapsed time per time step in seconds = 2.99461335E-02 + Max norm pressure P in all slices (Pa) = 0.00000000 + Max norm pressure P (backward) in all slices (Pa) = 91456480.0 + Time steps done = 5 out of 300 + Time steps remaining = 295 + Estimated remaining time in seconds = 8.83410931 + Estimated remaining time in hh:mm:ss = 0 h 00 m 08 s + Estimated total run time in seconds = 8.98383999 + Estimated total run time in hh:mm:ss = 0 h 00 m 08 s + We have done 1.66666663 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 08:44 + ************************************************************ + **** BEWARE: the above time estimates are not very reliable + **** because fewer than 100 iterations have been performed + ************************************************************ + + Time step # 100 + Time: 5.24639988 seconds + Elapsed time in seconds = 3.1170359180000000 + Elapsed time in hh:mm:ss = 0 h 00 m 03 s + Mean elapsed time per time step in seconds = 3.11703589E-02 + Max norm pressure P in all slices (Pa) = 1.98121111E-08 + Max norm pressure P (backward) in all slices (Pa) = 145711840. + Time steps done = 100 out of 300 + Time steps remaining = 200 + Estimated remaining time in seconds = 6.23407173 + Estimated remaining time in hh:mm:ss = 0 h 00 m 06 s + Estimated total run time in seconds = 9.35110760 + Estimated total run time in hh:mm:ss = 0 h 00 m 09 s + We have done 33.3333321 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 08:44 + + Time step # 200 + Time: 16.6063995 seconds + Elapsed time in seconds = 6.2227248350000002 + Elapsed time in hh:mm:ss = 0 h 00 m 06 s + Mean elapsed time per time step in seconds = 3.11136246E-02 + Max norm pressure P in all slices (Pa) = 4.10516898E-10 + Max norm pressure P (backward) in all slices (Pa) = 309838176. + Time steps done = 200 out of 300 + Time steps remaining = 100 + Estimated remaining time in seconds = 3.11136246 + Estimated remaining time in hh:mm:ss = 0 h 00 m 03 s + Estimated total run time in seconds = 9.33408737 + Estimated total run time in hh:mm:ss = 0 h 00 m 09 s + We have done 66.6666641 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 08:44 + + Time step # 300 + Time: 27.9664001 seconds + Elapsed time in seconds = 9.3015807119999998 + Elapsed time in hh:mm:ss = 0 h 00 m 09 s + Mean elapsed time per time step in seconds = 3.10052689E-02 + Max norm pressure P in all slices (Pa) = 2.76162204E-10 + Max norm pressure P (backward) in all slices (Pa) = 1453328.75 + Time steps done = 300 out of 300 + Time steps remaining = 0 + Estimated remaining time in seconds = 0.00000000 + Estimated remaining time in hh:mm:ss = 0 h 00 m 00 s + Estimated total run time in seconds = 9.30158043 + Estimated total run time in hh:mm:ss = 0 h 00 m 09 s + We have done 100.000000 % of that + + Writing the seismograms + Total number of time steps written: 300 + Writing the seismograms in parallel took 8.46916717E-03 seconds + + Time loop finished. Timing info: + Total elapsed time in seconds = 9.3451562529999990 + Total elapsed time in hh:mm:ss = 0 h 00 m 09 s + + finalizing simulation + + Acoustic kernels: + maximum value of rho kernel = 1.81843929E-10 + maximum value of kappa kernel = 3.60816195E-12 + + maximum value of rho prime kernel = 1.78654244E-10 + maximum value of alpha kernel = 7.21632389E-12 + + + End of the simulation + diff --git a/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_solver.txt.forward b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_solver.txt.forward new file mode 100644 index 000000000..8cc41fde2 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_acoustic/REF_KERNEL/output_solver.txt.forward @@ -0,0 +1,463 @@ + ********************************************** + **** Specfem 3-D Solver - MPI version f90 **** + ********************************************** + + Running Git package version of the code: 4.1.1 + which is Git unknown + dating unknown + + + + + There are 4 MPI processes + Processes are numbered from 0 to 3 + + There is a total of 4 slices + + NDIM = 3 + + NGLLX = 5 + NGLLY = 5 + NGLLZ = 5 + + using single precision for the calculations + + smallest and largest possible floating-point numbers are: 1.17549435E-38 3.40282347E+38 + + velocity model: default + + Reading mesh databases... + reads binary mesh files: proc***_external_mesh.bin + from directory : ./OUTPUT_FILES/DATABASES_MPI + + simulation w/ acoustic domain: T + simulation w/ elastic domain: F + simulation w/ poroelastic domain: F + + slice 0 has: + number of elements acoustic : 5136 + number of elements elastic : 0 + number of elements poroelastic: 0 + done + + total acoustic elements : 20736 + total elastic elements : 0 + total poroelastic elements : 0 + + Mesh resolution: + + ******** + minimum and maximum number of elements + and points in the CUBIT + SCOTCH mesh: + + NSPEC_global_min = 5136 + NSPEC_global_max = 5216 + NSPEC_global_max / NSPEC_global_min imbalance = 1.01557636 = 1.55763245 % + NSPEC_global_sum = 20736 + + NGLOB_global_min = 343525 + NGLOB_global_max = 349505 + NGLOB_global_max / NGLOB_global_min imbalance = 1.01740777 = 1.74077582 % + NGLOB_global_sum = 1387880 + + If you have elements of a single type (all acoustic, all elastic, all poroelastic, and without CPML) + in the whole mesh, then there should be no significant imbalance in the above numbers. + Otherwise, it is normal to have imbalance in elements and points because the domain decomposer + compensates for the different cost of different elements by partitioning them unevenly among processes. + ******** + + + ******** + Model: P velocity min,max = 2800.00000 2800.00000 + Model: S velocity min,max = 0.00000000 0.00000000 + + Model: Poisson's ratio min,max = 0.500000000 0.500000000 + ******** + + ********************************************* + *** Verification of simulation parameters *** + ********************************************* + + *** Xmin and Xmax of the model = 0.00000000 134000.000 + *** Ymin and Ymax of the model = 0.00000000 134000.000 + *** Zmin and Zmax of the model = -60000.0000 0.00000000 + + *** Max GLL point distance = 1227.47656 + *** Min GLL point distance = 642.726562 + *** Max/min ratio = 1.90979588 + + *** Max element size = 3750.00000 + *** Min element size = 3722.21875 + *** Max/min ratio = 1.00746357 + + *** Minimum period resolved = 1.67410719 + *** Maximum suggested time step = 0.109999999 + + *** for DT : 0.11360000000000001 + *** Max stability for wave velocities = 0.494891644 + + Elapsed time for checking mesh resolution in seconds = 2.1857624999999999E-002 + saving VTK files for Courant number and minimum period + + + ****************************************** + There is a total of 4 slices + ****************************************** + + + kd-tree: + total data points: 138672 + theoretical number of nodes: 277336 + tree memory size: 8.46362305 MB + actual number of nodes: 277343 + tree memory size: 8.46383667 MB + maximum depth : 18 + creation timing : 4.36910093E-02 (s) + + + sources: 1 + + ******************** + locating sources + ******************** + + reading source information from ./DATA/CMTSOLUTION file + + no UTM projection + + + source # 1 + source located in slice 1 + in element 2780 + in acoustic domain + + using moment tensor source: + xi coordinate of source in that element: -1.0000000000000000 + eta coordinate of source in that element: 1.0000000000000000 + gamma coordinate of source in that element: 1.0000000000000000 + + source time function: + using Gaussian source time function + half duration: 2.0000000000000000 seconds + + time shift: 0.0000000000000000 seconds + + magnitude of the source: + scalar moment M0 = 9.4305355097152345E+027 dyne-cm + moment magnitude Mw = 7.9496910938684096 + + original (requested) position of the source: + + latitude: 67000.000000000000 + longitude: 67000.000000000000 + + x: 67000.000000000000 + y: 67000.000000000000 + depth: 30.000000000000000 km + topo elevation: 0.0000000000000000 + + position of the source that will be used: + + x: 67000.000000000000 + y: 67000.000000000000 + depth: 30.000000000000000 km + z: -30000.000000000000 + + error in location of the source: 0.00000000 m + + + + maximum error in location of the sources: 0.00000000 m + + + Elapsed time for detection of sources in seconds = 1.28025003E-02 + + End of source detection - done + + + printing the source-time function + + receivers: + + there are 4 stations in file ./DATA/STATIONS + saving 4 stations inside the model in file ./DATA/STATIONS_FILTERED + excluding 0 stations located outside the model + + Total number of receivers = 4 + + + ******************** + locating receivers + ******************** + + reading receiver information from ./DATA/STATIONS_FILTERED file + + + station # 1 DB X20 + original latitude: 67000.0000 + original longitude: 22732.1406 + original x: 22732.1406 + original y: 67000.0000 + original depth: 50.0000000 m + horizontal distance: 44.2678604 + target x, y, z: 22732.1406 67000.0000 -50.0000000 + closest estimate found: 9.23705556E-14 m away + + receiver located in slice 2 + in element 4904 + in acoustic domain + at coordinates: + xi = -0.78571608325633080 + eta = -1.0000000000000000 + gamma = 0.97333333333333338 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 22732.139999999999 + y: 67000.000000000000 + depth: 49.999999999999908 m + z: -49.999999999999908 + + + + station # 2 DB X30 + original latitude: 67000.0000 + original longitude: 34696.4297 + original x: 34696.4297 + original y: 67000.0000 + original depth: 50.0000000 m + horizontal distance: 32.3035698 + target x, y, z: 34696.4297 67000.0000 -50.0000000 + closest estimate found: 8.52651283E-14 m away + + receiver located in slice 2 + in element 4893 + in acoustic domain + at coordinates: + xi = -0.35714216451234176 + eta = 1.0000000000000000 + gamma = 0.97333333333333338 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 34696.430000000000 + y: 67000.000000000000 + depth: 49.999999999999915 m + z: -49.999999999999915 + + + + station # 3 DB X40 + original latitude: 67000.0000 + original longitude: 46660.7109 + original x: 46660.7109 + original y: 67000.0000 + original depth: 50.0000000 m + horizontal distance: 20.3392906 + target x, y, z: 46660.7109 67000.0000 -50.0000000 + closest estimate found: 9.94759830E-14 m away + + receiver located in slice 2 + in element 4910 + in acoustic domain + at coordinates: + xi = 7.1425444096846064E-002 + eta = -1.0000000000000000 + gamma = 0.97333333333333338 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 46660.709999999999 + y: 67000.000000000000 + depth: 49.999999999999901 m + z: -49.999999999999901 + + + + station # 4 DB X50 + original latitude: 67000.0000 + original longitude: 58625.0000 + original x: 58625.0000 + original y: 67000.0000 + original depth: 50.0000000 m + horizontal distance: 8.37500000 + target x, y, z: 58625.0000 67000.0000 -50.0000000 + closest estimate found: 9.94759830E-14 m away + + receiver located in slice 1 + in element 5209 + in acoustic domain + at coordinates: + xi = 0.50000052472008893 + eta = 1.0000000000000000 + gamma = 0.97333333333333338 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 58625.000000000000 + y: 67000.000000000000 + depth: 49.999999999999901 m + z: -49.999999999999901 + + + + maximum error in location of all the receivers: 9.94759830E-14 m + + Elapsed time for receiver detection in seconds = 1.21868337E-02 + + End of receiver detection - done + + found a total of 4 receivers in all the slices + + source arrays: + number of sources is 1 + size of source array = 1.43051147E-03 MB + = 1.39698386E-06 GB + + seismograms: + seismograms written by all processes + + Total number of simulation steps (NSTEP) = 300 + writing out seismograms at every NTSTEP_BETWEEN_OUTPUT_SEISMOS = 300 + number of subsampling steps for seismograms = 1 + Total number of samples for seismograms = 300 + + + maximum number of local receivers is 3 in slice 2 + size of maximum seismogram array = 1.02996826E-02 MB + = 1.00582838E-05 GB + + + Total number of samples for seismograms = 300 + + + Simulation setup: + + incorporating acoustic simulation + no elastic simulation + no poroelastic simulation + + no attenuation + no anisotropy + no oceans + no gravity + no movie simulation + + + preparing mass matrices + preparing constants + preparing wavefields + preparing fault simulation + no dynamic faults + no kinematic faults + no fault simulation + preparing gravity + no gravity simulation + preparing Stacey absorbing boundaries + preparing optimized arrays + number of regular shaped elements : 0 + number of irregular shaped elements: 5136 + fused array done + bandwidth test (STREAM TRIAD): + memory accesses = 11.7939949 MB + timing min/max = 7.92999985E-04 s / 9.98249976E-04 s + timing avg = 8.37925007E-04 s + bandwidth = 13.7453508 GB/s + + + Elapsed time for preparing timerun in seconds = 2.9284709000000020E-002 + + ************ + time loop + ************ + scheme: Newmark + + time step: 0.113600001 s + number of time steps: 300 + total simulated time: 34.0800018 seconds + start time: -6.00000000 seconds + + All processes are synchronized before the time loop + + Starting time iteration loop... + + Time step # 5 + Time: -5.54559994 seconds + Elapsed time in seconds = 3.8937082999999983E-002 + Elapsed time in hh:mm:ss = 0 h 00 m 00 s + Mean elapsed time per time step in seconds = 7.78741669E-03 + Max norm pressure P in all slices (Pa) = 590.761353 + Time steps done = 5 out of 300 + Time steps remaining = 295 + Estimated remaining time in seconds = 2.29728794 + Estimated remaining time in hh:mm:ss = 0 h 00 m 02 s + Estimated total run time in seconds = 2.33622503 + Estimated total run time in hh:mm:ss = 0 h 00 m 02 s + We have done 1.66666663 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 08:44 + ************************************************************ + **** BEWARE: the above time estimates are not very reliable + **** because fewer than 100 iterations have been performed + ************************************************************ + + Time step # 100 + Time: 5.24639988 seconds + Elapsed time in seconds = 0.95123208300000006 + Elapsed time in hh:mm:ss = 0 h 00 m 00 s + Mean elapsed time per time step in seconds = 9.51232109E-03 + Max norm pressure P in all slices (Pa) = 330715456. + Time steps done = 100 out of 300 + Time steps remaining = 200 + Estimated remaining time in seconds = 1.90246415 + Estimated remaining time in hh:mm:ss = 0 h 00 m 01 s + Estimated total run time in seconds = 2.85369635 + Estimated total run time in hh:mm:ss = 0 h 00 m 02 s + We have done 33.3333321 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 08:44 + + Time step # 200 + Time: 16.6063995 seconds + Elapsed time in seconds = 1.9258840420000001 + Elapsed time in hh:mm:ss = 0 h 00 m 01 s + Mean elapsed time per time step in seconds = 9.62942000E-03 + Max norm pressure P in all slices (Pa) = 149167216. + Time steps done = 200 out of 300 + Time steps remaining = 100 + Estimated remaining time in seconds = 0.962942004 + Estimated remaining time in hh:mm:ss = 0 h 00 m 00 s + Estimated total run time in seconds = 2.88882613 + Estimated total run time in hh:mm:ss = 0 h 00 m 02 s + We have done 66.6666641 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 08:44 + + Time step # 300 + Time: 27.9664001 seconds + Elapsed time in seconds = 2.8623994589999997 + Elapsed time in hh:mm:ss = 0 h 00 m 02 s + Mean elapsed time per time step in seconds = 9.54133179E-03 + Max norm pressure P in all slices (Pa) = 90905736.0 + Time steps done = 300 out of 300 + Time steps remaining = 0 + Estimated remaining time in seconds = 0.00000000 + Estimated remaining time in hh:mm:ss = 0 h 00 m 00 s + Estimated total run time in seconds = 2.86239958 + Estimated total run time in hh:mm:ss = 0 h 00 m 02 s + We have done 100.000000 % of that + + Writing the seismograms + Total number of time steps written: 300 + Writing the seismograms in parallel took 9.30725038E-03 seconds + + Time loop finished. Timing info: + Total elapsed time in seconds = 2.8835288759999997 + Total elapsed time in hh:mm:ss = 0 h 00 m 02 s + + finalizing simulation + + + End of the simulation + diff --git a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/DATA/Par_file b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/DATA/Par_file index 17a57c82c..0e5cd7059 100644 --- a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/DATA/Par_file +++ b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/DATA/Par_file @@ -138,7 +138,7 @@ BOTTOM_FREE_SURFACE = .false. # but requires disk space for temporary storage, and uses a significant amount of memory used as buffers for temporary storage. # When that option is on the second parameter indicates how often the code dumps restart files to disk (if in doubt, use something between 100 and 1000). UNDO_ATTENUATION_AND_OR_PML = .false. -NT_DUMP_ATTENUATION = 500 +NT_DUMP_ATTENUATION = 200 #----------------------------------------------------------- # diff --git a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXX.semd b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXX.semd new file mode 100644 index 000000000..26f7591a9 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXX.semd @@ -0,0 +1,700 @@ + 24.9500008 7.87155294 + 24.8999996 7.83803225 + 24.8500004 7.80466557 + 24.7999992 7.77144623 + 24.7500000 7.73837566 + 24.7000008 7.70546865 + 24.6499996 7.67275286 + 24.6000004 7.64027452 + 24.5499992 7.60808706 + 24.5000000 7.57624531 + 24.4500008 7.54479170 + 24.3999996 7.51374865 + 24.3500004 7.48310995 + 24.2999992 7.45284224 + 24.2500000 7.42289495 + 24.2000008 7.39321089 + 24.1499996 7.36374378 + 24.1000004 7.33447027 + 24.0499992 7.30539465 + 24.0000000 7.27655077 + 23.9500008 7.24799442 + 23.8999996 7.21979284 + 23.8500004 7.19201088 + 23.7999992 7.16470385 + 23.7500000 7.13790464 + 23.7000008 7.11162472 + 23.6499996 7.08585167 + 23.6000004 7.06055403 + 23.5499992 7.03568602 + 23.5000000 7.01119566 + 23.4500008 6.98702955 + 23.3999996 6.96314621 + 23.3500004 6.93951988 + 23.2999992 6.91614676 + 23.2500000 6.89305067 + 23.2000008 6.87027884 + 23.1499996 6.84789753 + 23.1000004 6.82597589 + 23.0499992 6.80457497 + 23.0000000 6.78373241 + 22.9500008 6.76345253 + 22.8999996 6.74370670 + 22.8500004 6.72443295 + 22.7999992 6.70555305 + 22.7500000 6.68698692 + 22.7000008 6.66866779 + 22.6499996 6.65055561 + 22.6000004 6.63264036 + 22.5499992 6.61494112 + 22.5000000 6.59749699 + 22.4500008 6.58035231 + 22.3999996 6.56354618 + 22.3500004 6.54709959 + 22.2999992 6.53101397 + 22.2500000 6.51526833 + 22.2000008 6.49982786 + 22.1499996 6.48464727 + 22.1000004 6.46967936 + 22.0499992 6.45488119 + 22.0000000 6.44021559 + 21.9500008 6.42565060 + 21.8999996 6.41115999 + 21.8500004 6.39671755 + 21.7999992 6.38229847 + 21.7500000 6.36787748 + 21.7000008 6.35342979 + 21.6499996 6.33893061 + 21.6000004 6.32435846 + 21.5499992 6.30969858 + 21.5000000 6.29493856 + 21.4500008 6.28006983 + 21.3999996 6.26508474 + 21.3500004 6.24997139 + 21.2999992 6.23470879 + 21.2500000 6.21926308 + 21.2000008 6.20358515 + 21.1499996 6.18760872 + 21.1000004 6.17125702 + 21.0499992 6.15444994 + 21.0000000 6.13711023 + 20.9500008 6.11918211 + 20.8999996 6.10063124 + 20.8500004 6.08145285 + 20.7999992 6.06167030 + 20.7500000 6.04132080 + 20.7000008 6.02044439 + 20.6499996 5.99906778 + 20.6000004 5.97718811 + 20.5499992 5.95476532 + 20.5000000 5.93172264 + 20.4500008 5.90795565 + 20.3999996 5.88334990 + 20.3500004 5.85779953 + 20.2999992 5.83122540 + 20.2500000 5.80358839 + 20.2000008 5.77489233 + 20.1499996 5.74517393 + 20.1000004 5.71449327 + 20.0499992 5.68291140 + 20.0000000 5.65047503 + 19.9500008 5.61720324 + 19.8999996 5.58308506 + 19.8500004 5.54807854 + 19.7999992 5.51212168 + 19.7500000 5.47514534 + 19.7000008 5.43708324 + 19.6499996 5.39788437 + 19.6000004 5.35751438 + 19.5499992 5.31596136 + 19.5000000 5.27323198 + 19.4500008 5.22934914 + 19.3999996 5.18434763 + 19.3500004 5.13827229 + 19.2999992 5.09117270 + 19.2500000 5.04310179 + 19.2000008 4.99410772 + 19.1499996 4.94423056 + 19.1000004 4.89349270 + 19.0499992 4.84189606 + 19.0000000 4.78942060 + 18.9500008 4.73602772 + 18.8999996 4.68167210 + 18.8500004 4.62631321 + 18.7999992 4.56993055 + 18.7500000 4.51253366 + 18.7000008 4.45417404 + 18.6499996 4.39493704 + 18.6000004 4.33493519 + 18.5499992 4.27428865 + 18.5000000 4.21310759 + 18.4500008 4.15147209 + 18.3999996 4.08942413 + 18.3500004 4.02696371 + 18.2999992 3.96406054 + 18.2500000 3.90066886 + 18.2000008 3.83674884 + 18.1499996 3.77228618 + 18.1000004 3.70730519 + 18.0499992 3.64187026 + 18.0000000 3.57608056 + 17.9500008 3.51005268 + 17.8999996 3.44390321 + 17.8500004 3.37773037 + 17.7999992 3.31160235 + 17.7500000 3.24555421 + 17.7000008 3.17959332 + 17.6499996 3.11371207 + 17.6000004 3.04790378 + 17.5499992 2.98217583 + 17.5000000 2.91655922 + 17.4500008 2.85110903 + 17.3999996 2.78589749 + 17.3500004 2.72100425 + 17.2999992 2.65650225 + 17.2500000 2.59244895 + 17.2000008 2.52888155 + 17.1499996 2.46581936 + 17.1000004 2.40327168 + 17.0499992 2.34124756 + 17.0000000 2.27976489 + 16.9500008 2.21885705 + 16.8999996 2.15857172 + 16.8500004 2.09896660 + 16.7999992 2.04010010 + 16.7500000 1.98202264 + 16.7000008 1.92476833 + 16.6499996 1.86835492 + 16.6000004 1.81278503 + 16.5499992 1.75805497 + 16.5000000 1.70416284 + 16.4500008 1.65111661 + 16.3999996 1.59893787 + 16.3500004 1.54766059 + 16.2999992 1.49732518 + 16.2500000 1.44797003 + 16.2000008 1.39962316 + 16.1499996 1.35229671 + 16.1000004 1.30598629 + 16.0499992 1.26067507 + 16.0000000 1.21634173 + 15.9499998 1.17296958 + 15.8999996 1.13055384 + 15.8500004 1.08910453 + 15.8000002 1.04864419 + 15.7500000 1.00920153 + 15.6999998 0.970801532 + 15.6499996 0.933456779 + 15.6000004 0.897162020 + 15.5500002 0.861893654 + 15.5000000 0.827615440 + 15.4499998 0.794287562 + 15.3999996 0.761876941 + 15.3500004 0.730366707 + 15.3000002 0.699759245 + 15.2500000 0.670074642 + 15.1999998 0.641342342 + 15.1499996 0.613589883 + 15.1000004 0.586830676 + 15.0500002 0.561055720 + 15.0000000 0.536230624 + 14.9499998 0.512299836 + 14.8999996 0.489196181 + 14.8500004 0.466854513 + 14.8000002 0.445224017 + 14.7500000 0.424277484 + 14.6999998 0.404013872 + 14.6499996 0.384453952 + 14.6000004 0.365630597 + 14.5500002 0.347575754 + 14.5000000 0.330308408 + 14.4499998 0.313826442 + 14.3999996 0.298104078 + 14.3500004 0.283095866 + 14.3000002 0.268745154 + 14.2500000 0.254994720 + 14.1999998 0.241796419 + 14.1499996 0.229117319 + 14.1000004 0.216941133 + 14.0500002 0.205264926 + 14.0000000 0.194092780 + 13.9499998 0.183428645 + 13.8999996 0.173270628 + 13.8500004 0.163608268 + 13.8000002 0.154423177 + 13.7500000 0.145692378 + 13.6999998 0.137392506 + 13.6499996 0.129503444 + 13.6000004 0.122009769 + 13.5500002 0.114899747 + 13.5000000 0.108162269 + 13.4499998 0.101783305 + 13.3999996 9.57432464E-02 + 13.3500004 9.00166258E-02 + 13.3000002 8.45745802E-02 + 13.2500000 7.93894082E-02 + 13.1999998 7.44400695E-02 + 13.1499996 6.97164387E-02 + 13.1000004 6.52208924E-02 + 13.0500002 6.09663464E-02 + 13.0000000 5.69707491E-02 + 12.9499998 5.32495826E-02 + 12.8999996 4.98083383E-02 + 12.8500004 4.66371961E-02 + 12.8000002 4.37096059E-02 + 12.7500000 4.09854539E-02 + 12.6999998 3.84181142E-02 + 12.6499996 3.59636694E-02 + 12.6000004 3.35897878E-02 + 12.5500002 3.12819146E-02 + 12.5000000 2.90449765E-02 + 12.4499998 2.69001592E-02 + 12.3999996 2.48775259E-02 + 12.3500004 2.30065137E-02 + 12.3000002 2.13068090E-02 + 12.2500000 1.97820552E-02 + 12.1999998 1.84180234E-02 + 12.1499996 1.71856247E-02 + 12.1000004 1.60478354E-02 + 12.0500002 1.49685312E-02 + 12.0000000 1.39207589E-02 + 11.9499998 1.28921969E-02 + 11.8999996 1.18864113E-02 + 11.8500004 1.09197171E-02 + 11.8000002 1.00147026E-02 + 11.7500000 9.19239409E-03 + 11.6999998 8.46531522E-03 + 11.6499996 7.83338118E-03 + 11.6000004 7.28366990E-03 + 11.5500002 6.79393066E-03 + 11.5000000 6.33864244E-03 + 11.4499998 5.89571893E-03 + 11.3999996 5.45185944E-03 + 11.3500004 5.00504905E-03 + 11.3000002 4.56361007E-03 + 11.2500000 4.14228160E-03 + 11.1999998 3.75667680E-03 + 11.1499996 3.41787236E-03 + 11.1000004 3.12880240E-03 + 11.0500002 2.88345991E-03 + 11.0000000 2.66902009E-03 + 10.9499998 2.47007888E-03 + 10.8999996 2.27356958E-03 + 10.8500004 2.07275432E-03 + 10.8000002 1.86903891E-03 + 10.7500000 1.67106523E-03 + 10.6999998 1.49142614E-03 + 10.6499996 1.34207599E-03 + 10.6000004 1.22991728E-03 + 10.5500002 1.15396141E-03 + 10.5000000 1.10496860E-03 + 10.4499998 1.06769195E-03 + 10.3999996 1.02505495E-03 + 10.3500004 9.63004364E-04 + 10.3000002 8.74578836E-04 + 10.2500000 7.61974254E-04 + 10.1999998 6.35984470E-04 + 10.1499996 5.12967759E-04 + 10.1000004 4.10211651E-04 + 10.0500002 3.41022795E-04 + 10.0000000 3.10935109E-04 + 9.94999981 3.16096295E-04 + 9.89999962 3.44261614E-04 + 9.85000038 3.78084253E-04 + 9.80000019 3.99755576E-04 + 9.75000000 3.95693874E-04 + 9.69999981 3.59996688E-04 + 9.64999962 2.95750389E-04 + 9.60000038 2.13905456E-04 + 9.55000019 1.30106695E-04 + 9.50000000 6.04122324E-05 + 9.44999981 1.71023257E-05 + 9.39999962 5.70045268E-06 + 9.35000038 2.39421333E-05 + 9.30000019 6.28537236E-05 + 9.25000000 1.09514011E-04 + 9.19999981 1.50634107E-04 + 9.14999962 1.75922643E-04 + 9.10000038 1.80337302E-04 + 9.05000019 1.64704339E-04 + 9.00000000 1.34690359E-04 + 8.94999981 9.85808874E-05 + 8.89999962 6.46215631E-05 + 8.85000038 3.87349828E-05 + 8.80000019 2.32356488E-05 + 8.75000000 1.68065762E-05 + 8.69999981 1.55927464E-05 + 8.64999962 1.49385896E-05 + 8.60000038 1.11441814E-05 + 8.55000019 2.67157088E-06 + 8.50000000 -9.53548351E-06 + 8.44999981 -2.26351021E-05 + 8.39999962 -3.30922667E-05 + 8.35000038 -3.80401871E-05 + 8.30000019 -3.63169638E-05 + 8.25000000 -2.88403917E-05 + 8.19999981 -1.82301555E-05 + 8.14999962 -7.84737585E-06 + 8.10000038 -6.10320512E-07 + 8.05000019 1.99600208E-06 + 8.00000000 3.81944488E-07 + 7.94999981 -3.35734694E-06 + 7.90000010 -6.23969618E-06 + 7.84999990 -5.50263348E-06 + 7.80000019 3.54336493E-07 + 7.75000000 1.09370167E-05 + 7.69999981 2.39406654E-05 + 7.65000010 3.57591925E-05 + 7.59999990 4.25317121E-05 + 7.55000019 4.13063644E-05 + 7.50000000 3.09702082E-05 + 7.44999981 1.26697805E-05 + 7.40000010 -1.03979555E-05 + 7.34999990 -3.37588244E-05 + 7.30000019 -5.27732846E-05 + 7.25000000 -6.37723788E-05 + 7.19999981 -6.49079520E-05 + 7.15000010 -5.65005103E-05 + 7.09999990 -4.08185260E-05 + 7.05000019 -2.13849253E-05 + 7.00000000 -2.03299896E-06 + 6.94999981 1.40107013E-05 + 6.90000010 2.47667776E-05 + 6.84999990 2.97167244E-05 + 6.80000019 2.95785703E-05 + 6.75000000 2.57671109E-05 + 6.69999981 1.97504651E-05 + 6.65000010 1.25325005E-05 + 6.59999990 4.43376894E-06 + 6.55000019 -4.76853620E-06 + 6.50000000 -1.54113877E-05 + 6.44999981 -2.74033046E-05 + 6.40000010 -3.97911135E-05 + 6.34999990 -5.06303877E-05 + 6.30000019 -5.72578647E-05 + 6.25000000 -5.69299191E-05 + 6.19999981 -4.76612586E-05 + 6.15000010 -2.90140051E-05 + 6.09999990 -2.57800730E-06 + 6.05000019 2.80457189E-05 + 6.00000000 5.78135259E-05 + 5.94999981 8.12575745E-05 + 5.90000010 9.37165751E-05 + 5.84999990 9.24671767E-05 + 5.80000019 7.74548098E-05 + 5.75000000 5.14164130E-05 + 5.69999981 1.93404576E-05 + 5.65000010 -1.26199593E-05 + 5.59999990 -3.85215862E-05 + 5.55000019 -5.39567445E-05 + 5.50000000 -5.70031443E-05 + 5.44999981 -4.85648488E-05 + 5.40000010 -3.20254694E-05 + 5.34999990 -1.23098735E-05 + 5.30000019 5.39937810E-06 + 5.25000000 1.69582709E-05 + 5.19999981 2.02383835E-05 + 5.15000010 1.55409834E-05 + 5.09999990 5.35726394E-06 + 5.05000019 -6.43905196E-06 + 5.00000000 -1.57274371E-05 + 4.94999981 -1.92792122E-05 + 4.90000010 -1.55800171E-05 + 4.84999990 -5.11519920E-06 + 4.80000019 9.93999311E-06 + 4.75000000 2.65039434E-05 + 4.69999981 4.15825743E-05 + 4.65000010 5.31505721E-05 + 4.59999990 6.06397989E-05 + 4.55000019 6.48881687E-05 + 4.50000000 6.75841075E-05 + 4.44999981 7.04132181E-05 + 4.40000010 7.42170378E-05 + 4.34999990 7.84725926E-05 + 4.30000019 8.12941362E-05 + 4.25000000 7.99795889E-05 + 4.19999981 7.19337477E-05 + 4.15000010 5.56639970E-05 + 4.09999990 3.15103389E-05 + 4.05000019 1.85275621E-06 + 4.00000000 -2.92912973E-05 + 3.95000005 -5.71668097E-05 + 3.90000010 -7.74057189E-05 + 3.84999990 -8.70370204E-05 + 3.79999995 -8.50932338E-05 + 3.75000000 -7.26685321E-05 + 3.70000005 -5.24692950E-05 + 3.65000010 -2.80556014E-05 + 3.59999990 -3.04693390E-06 + 3.54999995 1.94665809E-05 + 3.50000000 3.71852075E-05 + 3.45000005 4.85651872E-05 + 3.40000010 5.26451331E-05 + 3.34999990 4.89310587E-05 + 3.29999995 3.74796218E-05 + 3.25000000 1.91861145E-05 + 3.20000005 -3.86543707E-06 + 3.15000010 -2.82295350E-05 + 3.09999990 -4.93358602E-05 + 3.04999995 -6.22452426E-05 + 3.00000000 -6.28339476E-05 + 2.95000005 -4.91086466E-05 + 2.90000010 -2.22261842E-05 + 2.84999990 1.31963789E-05 + 2.79999995 4.97283290E-05 + 2.75000000 7.86941819E-05 + 2.70000005 9.23223706E-05 + 2.65000010 8.59006686E-05 + 2.59999990 5.92992510E-05 + 2.54999995 1.73562257E-05 + 2.50000000 -3.10744726E-05 + 2.45000005 -7.52243359E-05 + 2.40000010 -1.05119791E-04 + 2.34999990 -1.14155511E-04 + 2.29999995 -1.00801713E-04 + 2.25000000 -6.89501030E-05 + 2.20000005 -2.67908727E-05 + 2.15000010 1.54618720E-05 + 2.09999990 4.83447766E-05 + 2.04999995 6.56266420E-05 + 2.00000000 6.57959827E-05 + 1.95000005 5.21533038E-05 + 1.89999998 3.15210527E-05 + 1.85000002 1.19893675E-05 + 1.79999995 3.91072462E-07 + 1.75000000 2.64493707E-07 + 1.70000005 1.09008261E-05 + 1.64999998 2.77356412E-05 + 1.60000002 4.39340765E-05 + 1.54999995 5.26603326E-05 + 1.50000000 4.93207081E-05 + 1.45000005 3.30854673E-05 + 1.39999998 7.22136792E-06 + 1.35000002 -2.18650766E-05 + 1.29999995 -4.65753183E-05 + 1.25000000 -6.04525740E-05 + 1.20000005 -6.01751344E-05 + 1.14999998 -4.65848498E-05 + 1.10000002 -2.44226067E-05 + 1.04999995 -8.43827820E-07 + 1.00000000 1.68300521E-05 + 0.949999988 2.34230483E-05 + 0.899999976 1.75493333E-05 + 0.850000024 2.06194773E-06 + 0.800000012 -1.68524275E-05 + 0.750000000 -3.16635560E-05 + 0.699999988 -3.59544320E-05 + 0.649999976 -2.65686267E-05 + 0.600000024 -4.75868865E-06 + 0.550000012 2.40452246E-05 + 0.500000000 5.17926164E-05 + 0.449999988 7.02190227E-05 + 0.400000006 7.33505731E-05 + 0.349999994 5.93211626E-05 + 0.300000012 3.09357711E-05 + 0.250000000 -5.18368006E-06 + 0.200000003 -4.05777573E-05 + 0.150000006 -6.74040639E-05 + 0.100000001 -8.05760355E-05 + 5.00000007E-02 -7.89192127E-05 + 0.00000000 -6.50254806E-05 + -5.00000007E-02 -4.39370815E-05 + -0.100000001 -2.11926763E-05 + -0.150000006 -9.75028570E-07 + -0.200000003 1.49596272E-05 + -0.250000000 2.72003490E-05 + -0.300000012 3.75301606E-05 + -0.349999994 4.72184038E-05 + -0.400000006 5.55205988E-05 + -0.449999988 5.91147182E-05 + -0.500000000 5.28772362E-05 + -0.550000012 3.18883795E-05 + -0.600000024 -5.97103644E-06 + -0.649999976 -5.78110667E-05 + -0.699999988 -1.14905684E-04 + -0.750000000 -1.63882360E-04 + -0.800000012 -1.89921804E-04 + -0.850000024 -1.81230964E-04 + -0.899999976 -1.33480775E-04 + -0.949999988 -5.27022567E-05 + -1.00000000 4.45768419E-05 + -1.04999995 1.34439280E-04 + -1.10000002 1.91429834E-04 + -1.14999998 1.95711240E-04 + -1.20000005 1.39498894E-04 + -1.25000000 3.07396258E-05 + -1.29999995 -1.07372332E-04 + -1.35000002 -2.41325310E-04 + -1.39999998 -3.35634628E-04 + -1.45000005 -3.62459454E-04 + -1.50000000 -3.09799245E-04 + -1.54999995 -1.85798039E-04 + -1.60000002 -1.76628128E-05 + -1.64999998 1.54809910E-04 + -1.70000005 2.89528805E-04 + -1.75000000 3.53458367E-04 + -1.79999995 3.31825198E-04 + -1.85000002 2.32476305E-04 + -1.89999998 8.39994609E-05 + -1.95000005 -7.20109456E-05 + -2.00000000 -1.92443928E-04 + -2.04999995 -2.44677532E-04 + -2.09999990 -2.15650871E-04 + -2.15000010 -1.15363415E-04 + -2.20000005 2.62318572E-05 + -2.25000000 1.68038983E-04 + -2.29999995 2.69746233E-04 + -2.34999990 3.03359731E-04 + -2.40000010 2.60956818E-04 + -2.45000005 1.56416630E-04 + -2.50000000 2.07028861E-05 + -2.54999995 -1.07782871E-04 + -2.59999990 -1.94786466E-04 + -2.65000010 -2.20181784E-04 + -2.70000005 -1.83340715E-04 + -2.75000000 -1.02345366E-04 + -2.79999995 -7.42036673E-06 + -2.84999990 6.92415997E-05 + -2.90000010 1.03984508E-04 + -2.95000005 8.91751406E-05 + -3.00000000 3.53040232E-05 + -3.04999995 -3.29388786E-05 + -3.09999990 -8.53252059E-05 + -3.15000010 -9.66727530E-05 + -3.20000005 -5.59018881E-05 + -3.25000000 2.94333695E-05 + -3.29999995 1.34725677E-04 + -3.34999990 2.25657772E-04 + -3.40000010 2.69107288E-04 + -3.45000005 2.44219351E-04 + -3.50000000 1.50106673E-04 + -3.54999995 7.59755494E-06 + -3.59999990 -1.45760816E-04 + -3.65000010 -2.66160292E-04 + -3.70000005 -3.16439458E-04 + -3.75000000 -2.77585379E-04 + -3.79999995 -1.55290109E-04 + -3.84999990 2.05774722E-05 + -3.90000010 2.03644537E-04 + -3.95000005 3.43773805E-04 + -4.00000000 4.01236030E-04 + -4.05000019 3.58188729E-04 + -4.09999990 2.24064774E-04 + -4.15000010 3.32265772E-05 + -4.19999981 -1.64500685E-04 + -4.25000000 -3.17707018E-04 + -4.30000019 -3.87673150E-04 + -4.34999990 -3.58910242E-04 + -4.40000010 -2.42939248E-04 + -4.44999981 -7.42898774E-05 + -4.50000000 9.99972108E-05 + -4.55000019 2.34173247E-04 + -4.59999990 2.96757644E-04 + -4.65000010 2.78866384E-04 + -4.69999981 1.95529981E-04 + -4.75000000 7.97987668E-05 + -4.80000019 -2.83722038E-05 + -4.84999990 -9.49065434E-05 + -4.90000010 -1.02059166E-04 + -4.94999981 -5.33325401E-05 + -5.00000000 2.85681454E-05 + -5.05000019 1.09958906E-04 + -5.09999990 1.57855524E-04 + -5.15000010 1.51310727E-04 + -5.19999981 8.87443894E-05 + -5.25000000 -1.11114423E-05 + -5.30000019 -1.14994553E-04 + -5.34999990 -1.85819910E-04 + -5.40000010 -1.94933091E-04 + -5.44999981 -1.32014102E-04 + -5.50000000 -9.44131170E-06 + -5.55000019 1.40367818E-04 + -5.59999990 2.73947022E-04 + -5.65000010 3.49443435E-04 + -5.69999981 3.39143036E-04 + -5.75000000 2.38071836E-04 + -5.80000019 6.60650330E-05 + -5.84999990 -1.37332230E-04 + -5.90000010 -3.23617249E-04 + -5.94999981 -4.48545004E-04 + -6.00000000 -4.83924057E-04 + -6.05000019 -4.24600032E-04 + -6.09999990 -2.88736686E-04 + -6.15000010 -1.11548834E-04 + -6.19999981 6.54075484E-05 + -6.25000000 2.06018667E-04 + -6.30000019 2.88897252E-04 + -6.34999990 3.11496056E-04 + -6.40000010 2.88082461E-04 + -6.44999981 2.42570619E-04 + -6.50000000 1.98812922E-04 + -6.55000019 1.71700784E-04 + -6.59999990 1.62084310E-04 + -6.65000010 1.57209608E-04 + -6.69999981 1.36506060E-04 + -6.75000000 8.07344986E-05 + -6.80000019 -1.86871148E-05 + -6.84999990 -1.53533227E-04 + -6.90000010 -2.98219384E-04 + -6.94999981 -4.15514951E-04 + -7.00000000 -4.67134145E-04 + -7.05000019 -4.26121464E-04 + -7.09999990 -2.87151895E-04 + -7.15000010 -7.12627298E-05 + -7.19999981 1.76956120E-04 + -7.25000000 3.99481592E-04 + -7.30000019 5.39807254E-04 + -7.34999990 5.58637723E-04 + -7.40000010 4.45791491E-04 + -7.44999981 2.24657255E-04 + -7.50000000 -5.24484130E-05 + -7.55000019 -3.17113416E-04 + -7.59999990 -5.02559589E-04 + -7.65000010 -5.61473367E-04 + -7.69999981 -4.78982169E-04 + -7.75000000 -2.77075131E-04 + -7.80000019 -9.10030030E-06 + -7.84999990 2.54273997E-04 + -7.90000010 4.43853613E-04 + -7.94999981 5.10343700E-04 + -8.00000000 4.37369075E-04 + -8.05000019 2.45555828E-04 + -8.10000038 -1.33761532E-05 + -8.14999962 -2.70798511E-04 + -8.19999981 -4.59666335E-04 + -8.25000000 -5.32218837E-04 + -8.30000019 -4.72095737E-04 + -8.35000038 -2.97711609E-04 + -8.39999962 -5.62060668E-05 + -8.44999981 1.90095510E-04 + -8.50000000 3.80764104E-04 + -8.55000019 4.73083317E-04 + -8.60000038 4.52299020E-04 + -8.64999962 3.33702134E-04 + -8.69999981 1.56331793E-04 + -8.75000000 -2.95358859E-05 + -8.80000019 -1.77253998E-04 + -8.85000038 -2.56629457E-04 + -8.89999962 -2.61059206E-04 + -8.94999981 -2.07128745E-04 + -9.00000000 -1.27127496E-04 + -9.05000019 -5.70618940E-05 + -9.10000038 -2.40668978E-05 + -9.14999962 -3.72403956E-05 + -9.19999981 -8.48152995E-05 + -9.25000000 -1.38574105E-04 + -9.30000019 -1.64105120E-04 + -9.35000038 -1.33603477E-04 + -9.39999962 -3.70253911E-05 + -9.44999981 1.12189417E-04 + -9.50000000 2.78997322E-04 + -9.55000019 4.16046649E-04 + -9.60000038 4.77820693E-04 + -9.64999962 4.35388123E-04 + -9.69999981 2.87116360E-04 + -9.75000000 6.17361075E-05 + -9.80000019 -1.87650556E-04 + -9.85000038 -3.97289725E-04 + -9.89999962 -5.10457379E-04 + -9.94999981 -4.93965519E-04 + -10.0000000 -3.48510599E-04 diff --git a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXY.semd b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXY.semd new file mode 100644 index 000000000..f9ce75e20 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXY.semd @@ -0,0 +1,700 @@ + 24.9500008 -15.1203032 + 24.8999996 -15.0389519 + 24.8500004 -14.9577646 + 24.7999992 -14.8766785 + 24.7500000 -14.7956266 + 24.7000008 -14.7145605 + 24.6499996 -14.6334686 + 24.6000004 -14.5523758 + 24.5499992 -14.4713297 + 24.5000000 -14.3903856 + 24.4500008 -14.3095798 + 24.3999996 -14.2289133 + 24.3500004 -14.1483469 + 24.2999992 -14.0678120 + 24.2500000 -13.9872341 + 24.2000008 -13.9065533 + 24.1499996 -13.8257513 + 24.1000004 -13.7448463 + 24.0499992 -13.6638899 + 24.0000000 -13.5829449 + 23.9500008 -13.5020590 + 23.8999996 -13.4212437 + 23.8500004 -13.3404636 + 23.7999992 -13.2596540 + 23.7500000 -13.1787357 + 23.7000008 -13.0976410 + 23.6499996 -13.0163412 + 23.6000004 -12.9348516 + 23.5499992 -12.8532257 + 23.5000000 -12.7715330 + 23.4500008 -12.6898279 + 23.3999996 -12.6081324 + 23.3500004 -12.5264196 + 23.2999992 -12.4446192 + 23.2500000 -12.3626375 + 23.2000008 -12.2803860 + 23.1499996 -12.1978073 + 23.1000004 -12.1148872 + 23.0499992 -12.0316553 + 23.0000000 -11.9481630 + 22.9500008 -11.8644686 + 22.8999996 -11.7806082 + 22.8500004 -11.6965818 + 22.7999992 -11.6123533 + 22.7500000 -11.5278692 + 22.7000008 -11.4430685 + 22.6499996 -11.3579102 + 22.6000004 -11.2723780 + 22.5499992 -11.1864815 + 22.5000000 -11.1002464 + 22.4500008 -11.0136976 + 22.3999996 -10.9268484 + 22.3500004 -10.8396912 + 22.2999992 -10.7522001 + 22.2500000 -10.6643448 + 22.2000008 -10.5761003 + 22.1499996 -10.4874544 + 22.1000004 -10.3984137 + 22.0499992 -10.3089895 + 22.0000000 -10.2191925 + 21.9500008 -10.1290178 + 21.8999996 -10.0384436 + 21.8500004 -9.94743347 + 21.7999992 -9.85594940 + 21.7500000 -9.76396561 + 21.7000008 -9.67148209 + 21.6499996 -9.57852840 + 21.6000004 -9.48515034 + 21.5499992 -9.39140129 + 21.5000000 -9.29731655 + 21.4500008 -9.20290184 + 21.3999996 -9.10813427 + 21.3500004 -9.01296616 + 21.2999992 -8.91734695 + 21.2500000 -8.82124615 + 21.2000008 -8.72466183 + 21.1499996 -8.62763119 + 21.1000004 -8.53021526 + 21.0499992 -8.43248177 + 21.0000000 -8.33448124 + 20.9500008 -8.23623085 + 20.8999996 -8.13770962 + 20.8500004 -8.03887081 + 20.7999992 -7.93966007 + 20.7500000 -7.84004736 + 20.7000008 -7.74003315 + 20.6499996 -7.63966227 + 20.6000004 -7.53901196 + 20.5499992 -7.43817043 + 20.5000000 -7.33721542 + 20.4500008 -7.23619080 + 20.3999996 -7.13510275 + 20.3500004 -7.03392506 + 20.2999992 -6.93261814 + 20.2500000 -6.83115149 + 20.2000008 -6.72952175 + 20.1499996 -6.62775946 + 20.1000004 -6.52592516 + 20.0499992 -6.42409706 + 20.0000000 -6.32234573 + 19.9500008 -6.22072458 + 19.8999996 -6.11926031 + 19.8500004 -6.01795530 + 19.7999992 -5.91680288 + 19.7500000 -5.81580162 + 19.7000008 -5.71496534 + 19.6499996 -5.61433268 + 19.6000004 -5.51395893 + 19.5499992 -5.41390562 + 19.5000000 -5.31422758 + 19.4500008 -5.21496248 + 19.3999996 -5.11612844 + 19.3500004 -5.01773071 + 19.2999992 -4.91977072 + 19.2500000 -4.82226133 + 19.2000008 -4.72523260 + 19.1499996 -4.62873363 + 19.1000004 -4.53282785 + 19.0499992 -4.43757820 + 19.0000000 -4.34303951 + 18.9500008 -4.24924612 + 18.8999996 -4.15621519 + 18.8500004 -4.06394911 + 18.7999992 -3.97244859 + 18.7500000 -3.88172054 + 18.7000008 -3.79178739 + 18.6499996 -3.70268893 + 18.6000004 -3.61447597 + 18.5499992 -3.52720261 + 18.5000000 -3.44091654 + 18.4500008 -3.35565138 + 18.3999996 -3.27142572 + 18.3500004 -3.18824577 + 18.2999992 -3.10611153 + 18.2500000 -3.02502370 + 18.2000008 -2.94498944 + 18.1499996 -2.86602473 + 18.1000004 -2.78815246 + 18.0499992 -2.71140003 + 18.0000000 -2.63579512 + 17.9500008 -2.56136394 + 17.8999996 -2.48812938 + 17.8500004 -2.41611171 + 17.7999992 -2.34532857 + 17.7500000 -2.27579546 + 17.7000008 -2.20752549 + 17.6499996 -2.14052629 + 17.6000004 -2.07479930 + 17.5499992 -2.01033974 + 17.5000000 -1.94713795 + 17.4500008 -1.88518393 + 17.3999996 -1.82447100 + 17.3500004 -1.76499891 + 17.2999992 -1.70677531 + 17.2500000 -1.64981329 + 17.2000008 -1.59412682 + 17.1499996 -1.53972507 + 17.1000004 -1.48660779 + 17.0499992 -1.43476427 + 17.0000000 -1.38417530 + 16.9500008 -1.33481836 + 16.8999996 -1.28667486 + 16.8500004 -1.23973489 + 16.7999992 -1.19399965 + 16.7500000 -1.14947879 + 16.7000008 -1.10618436 + 16.6499996 -1.06412327 + 16.6000004 -1.02329051 + 16.5499992 -0.983664513 + 16.5000000 -0.945209742 + 16.4500008 -0.907881379 + 16.3999996 -0.871634305 + 16.3500004 -0.836431742 + 16.2999992 -0.802251160 + 16.2500000 -0.769085944 + 16.2000008 -0.736942172 + 16.1499996 -0.705831707 + 16.1000004 -0.675763726 + 16.0499992 -0.646737456 + 16.0000000 -0.618737936 + 15.9499998 -0.591735959 + 15.8999996 -0.565692127 + 15.8500004 -0.540563345 + 15.8000002 -0.516309381 + 15.7500000 -0.492898047 + 15.6999998 -0.470307738 + 15.6499996 -0.448527038 + 15.6000004 -0.427550852 + 15.5500002 -0.407376170 + 15.5000000 -0.387997091 + 15.4499998 -0.369401425 + 15.3999996 -0.351569235 + 15.3500004 -0.334473342 + 15.3000002 -0.318081319 + 15.2500000 -0.302358836 + 15.1999998 -0.287272811 + 15.1499996 -0.272794276 + 15.1000004 -0.258900195 + 15.0500002 -0.245573968 + 15.0000000 -0.232804820 + 14.9499998 -0.220585972 + 14.8999996 -0.208912298 + 14.8500004 -0.197777674 + 14.8000002 -0.187172592 + 14.7500000 -0.177082539 + 14.6999998 -0.167487562 + 14.6499996 -0.158362910 + 14.6000004 -0.149680734 + 14.5500002 -0.141412437 + 14.5000000 -0.133531272 + 14.4499998 -0.126014218 + 14.3999996 -0.118843324 + 14.3500004 -0.112005755 + 14.3000002 -0.105492897 + 14.2500000 -9.92987975E-02 + 14.1999998 -9.34183598E-02 + 14.1499996 -8.78456905E-02 + 14.1000004 -8.25729519E-02 + 14.0500002 -7.75899068E-02 + 14.0000000 -7.28840604E-02 + 13.9499998 -6.84412643E-02 + 13.8999996 -6.42465577E-02 + 13.8500004 -6.02851063E-02 + 13.8000002 -5.65429181E-02 + 13.7500000 -5.30074388E-02 + 13.6999998 -4.96677831E-02 + 13.6499996 -4.65147644E-02 + 13.6000004 -4.35405858E-02 + 13.5500002 -4.07383107E-02 + 13.5000000 -3.81011926E-02 + 13.4499998 -3.56220044E-02 + 13.3999996 -3.32924984E-02 + 13.3500004 -3.11032142E-02 + 13.3000002 -2.90437322E-02 + 13.2500000 -2.71033775E-02 + 13.1999998 -2.52722241E-02 + 13.1499996 -2.35421360E-02 + 13.1000004 -2.19075736E-02 + 13.0500002 -2.03658249E-02 + 13.0000000 -1.89165566E-02 + 12.9499998 -1.75607502E-02 + 12.8999996 -1.62992291E-02 + 12.8500004 -1.51312165E-02 + 12.8000002 -1.40532786E-02 + 12.7500000 -1.30590163E-02 + 12.6999998 -1.21395485E-02 + 12.6499996 -1.12847164E-02 + 12.6000004 -1.04846321E-02 + 12.5500002 -9.73118097E-03 + 12.5000000 -9.01902094E-03 + 12.4499998 -8.34585261E-03 + 12.3999996 -7.71189481E-03 + 12.3500004 -7.11875735E-03 + 12.3000002 -6.56807236E-03 + 12.2500000 -6.06026268E-03 + 12.1999998 -5.59381908E-03 + 12.1499996 -5.16524678E-03 + 12.1000004 -4.76966007E-03 + 12.0500002 -4.40180209E-03 + 12.0000000 -4.05715918E-03 + 11.9499998 -3.73282610E-03 + 11.8999996 -3.42785451E-03 + 11.8500004 -3.14300787E-03 + 11.8000002 -2.87999958E-03 + 11.7500000 -2.64046039E-03 + 11.6999998 -2.42495583E-03 + 11.6499996 -2.23234086E-03 + 11.6000004 -2.05965806E-03 + 11.5500002 -1.90259831E-03 + 11.5000000 -1.75639719E-03 + 11.4499998 -1.61690207E-03 + 11.3999996 -1.48150232E-03 + 11.3500004 -1.34965265E-03 + 11.3000002 -1.22283562E-03 + 11.2500000 -1.10397360E-03 + 11.1999998 -9.96452291E-04 + 11.1499996 -9.03027249E-04 + 11.1000004 -8.24916584E-04 + 11.0500002 -7.61321804E-04 + 11.0000000 -7.09500164E-04 + 10.9499998 -6.65352854E-04 + 10.8999996 -6.24355511E-04 + 10.8500004 -5.82569861E-04 + 10.8000002 -5.37469226E-04 + 10.7500000 -4.88373451E-04 + 10.6999998 -4.36410279E-04 + 10.6499996 -3.84050596E-04 + 10.6000004 -3.34373763E-04 + 10.5500002 -2.90272757E-04 + 10.5000000 -2.53798411E-04 + 10.4499998 -2.25779266E-04 + 10.3999996 -2.05760851E-04 + 10.3500004 -1.92222127E-04 + 10.3000002 -1.82967240E-04 + 10.2500000 -1.75572728E-04 + 10.1999998 -1.67791717E-04 + 10.1499996 -1.57857503E-04 + 10.1000004 -1.44672420E-04 + 10.0500002 -1.27895793E-04 + 10.0000000 -1.07950931E-04 + 9.94999981 -8.59606371E-05 + 9.89999962 -6.36060577E-05 + 9.85000038 -4.28992607E-05 + 9.80000019 -2.58721921E-05 + 9.75000000 -1.42131139E-05 + 9.69999981 -8.91337368E-06 + 9.64999962 -1.00077768E-05 + 9.60000038 -1.64875564E-05 + 9.55000019 -2.64313530E-05 + 9.50000000 -3.73453731E-05 + 9.44999981 -4.66445745E-05 + 9.39999962 -5.21635739E-05 + 9.35000038 -5.25738578E-05 + 9.30000019 -4.76082059E-05 + 9.25000000 -3.80463680E-05 + 9.19999981 -2.54802126E-05 + 9.14999962 -1.19314664E-05 + 9.10000038 5.75739307E-07 + 9.05000019 1.03880802E-05 + 9.00000000 1.64719004E-05 + 8.94999981 1.85098597E-05 + 8.89999962 1.68567894E-05 + 8.85000038 1.23910932E-05 + 8.80000019 6.30516843E-06 + 8.75000000 -1.27011191E-07 + 8.69999981 -5.77583387E-06 + 8.64999962 -9.85037877E-06 + 8.60000038 -1.20336317E-05 + 8.55000019 -1.25273655E-05 + 8.50000000 -1.19795204E-05 + 8.44999981 -1.12865246E-05 + 8.39999962 -1.12984872E-05 + 8.35000038 -1.24968383E-05 + 8.30000019 -1.47446117E-05 + 8.25000000 -1.72110449E-05 + 8.19999981 -1.85344525E-05 + 8.14999962 -1.72152086E-05 + 8.10000038 -1.21450785E-05 + 8.05000019 -3.11101553E-06 + 8.00000000 8.90878709E-06 + 7.94999981 2.17952547E-05 + 7.90000010 3.27226153E-05 + 7.84999990 3.88848639E-05 + 7.80000019 3.83103979E-05 + 7.75000000 3.05084250E-05 + 7.69999981 1.67237249E-05 + 7.65000010 -3.09290556E-07 + 7.59999990 -1.70614239E-05 + 7.55000019 -3.01483287E-05 + 7.50000000 -3.72220711E-05 + 7.44999981 -3.75319541E-05 + 7.40000010 -3.19812243E-05 + 7.34999990 -2.26783559E-05 + 7.30000019 -1.21558269E-05 + 7.25000000 -2.54169663E-06 + 7.19999981 5.02200237E-06 + 7.15000010 1.05194786E-05 + 7.09999990 1.47367173E-05 + 7.05000019 1.86281195E-05 + 7.00000000 2.26488755E-05 + 6.94999981 2.63402926E-05 + 6.90000010 2.83556110E-05 + 6.84999990 2.69389275E-05 + 6.80000019 2.06859240E-05 + 6.75000000 9.29328417E-06 + 6.69999981 -6.00797739E-06 + 6.65000010 -2.25349959E-05 + 6.59999990 -3.67666107E-05 + 6.55000019 -4.53077701E-05 + 6.50000000 -4.58906543E-05 + 6.44999981 -3.80763158E-05 + 6.40000010 -2.34184990E-05 + 6.34999990 -5.03380079E-06 + 6.30000019 1.32778532E-05 + 6.25000000 2.80721706E-05 + 6.19999981 3.71270798E-05 + 6.15000010 3.98785742E-05 + 6.09999990 3.73252042E-05 + 6.05000019 3.14824938E-05 + 6.00000000 2.46092641E-05 + 5.94999981 1.84811270E-05 + 5.90000010 1.39404528E-05 + 5.84999990 1.08323475E-05 + 5.80000019 8.29200235E-06 + 5.75000000 5.23405106E-06 + 5.69999981 8.46514070E-07 + 5.65000010 -5.08072299E-06 + 5.59999990 -1.20803688E-05 + 5.55000019 -1.91726594E-05 + 5.50000000 -2.51798665E-05 + 5.44999981 -2.90671269E-05 + 5.40000010 -3.02030294E-05 + 5.34999990 -2.84822654E-05 + 5.30000019 -2.43080904E-05 + 5.25000000 -1.84714554E-05 + 5.19999981 -1.19766910E-05 + 5.15000010 -5.85481848E-06 + 5.09999990 -9.89122213E-07 + 5.05000019 2.03272521E-06 + 5.00000000 3.02407557E-06 + 4.94999981 2.23791994E-06 + 4.90000010 3.11631254E-07 + 4.84999990 -1.89496711E-06 + 4.80000019 -3.54765598E-06 + 4.75000000 -4.10988241E-06 + 4.69999981 -3.54708936E-06 + 4.65000010 -2.37371478E-06 + 4.59999990 -1.49591597E-06 + 4.55000019 -1.87651472E-06 + 4.50000000 -4.12632880E-06 + 4.44999981 -8.17408636E-06 + 4.40000010 -1.31593761E-05 + 4.34999990 -1.76241265E-05 + 4.30000019 -1.99686710E-05 + 4.25000000 -1.90292831E-05 + 4.19999981 -1.45703534E-05 + 4.15000010 -7.49684432E-06 + 4.09999990 3.17351095E-07 + 4.05000019 6.55224221E-06 + 4.00000000 9.15033979E-06 + 3.95000005 6.97065389E-06 + 3.90000010 1.62109870E-07 + 3.84999990 -9.86915438E-06 + 3.79999995 -2.08762631E-05 + 3.75000000 -3.04126061E-05 + 3.70000005 -3.64667576E-05 + 3.65000010 -3.78823315E-05 + 3.59999990 -3.44566724E-05 + 3.54999995 -2.67507367E-05 + 3.50000000 -1.57551840E-05 + 3.45000005 -2.59617468E-06 + 3.40000010 1.15873136E-05 + 3.34999990 2.55778705E-05 + 3.29999995 3.79317135E-05 + 3.25000000 4.69222505E-05 + 3.20000005 5.07239856E-05 + 3.15000010 4.78709917E-05 + 3.09999990 3.78685181E-05 + 3.04999995 2.17111065E-05 + 3.00000000 2.03287209E-06 + 2.95000005 -1.72875116E-05 + 2.90000010 -3.20475956E-05 + 2.84999990 -3.89252345E-05 + 2.79999995 -3.65675696E-05 + 2.75000000 -2.61521527E-05 + 2.70000005 -1.11570071E-05 + 2.65000010 3.65379674E-06 + 2.59999990 1.37301022E-05 + 2.54999995 1.62203141E-05 + 2.50000000 1.09146531E-05 + 2.45000005 2.92758642E-07 + 2.40000010 -1.13653587E-05 + 2.34999990 -1.95049743E-05 + 2.29999995 -2.09191148E-05 + 2.25000000 -1.48836089E-05 + 2.20000005 -3.39246230E-06 + 2.15000010 9.59140380E-06 + 2.09999990 1.95945267E-05 + 2.04999995 2.32662242E-05 + 2.00000000 1.95928569E-05 + 1.95000005 1.02587637E-05 + 1.89999998 -9.63450134E-07 + 1.85000002 -9.55778887E-06 + 1.79999995 -1.18523621E-05 + 1.75000000 -6.28773023E-06 + 1.70000005 6.05917103E-06 + 1.64999998 2.18389323E-05 + 1.60000002 3.65094857E-05 + 1.54999995 4.57289134E-05 + 1.50000000 4.65895828E-05 + 1.45000005 3.83257175E-05 + 1.39999998 2.23466686E-05 + 1.35000002 1.68903068E-06 + 1.29999995 -1.98499565E-05 + 1.25000000 -3.85912172E-05 + 1.20000005 -5.16343971E-05 + 1.14999998 -5.72120625E-05 + 1.10000002 -5.47831878E-05 + 1.04999995 -4.49775835E-05 + 1.00000000 -2.94828969E-05 + 0.949999988 -1.08889471E-05 + 0.899999976 7.57162616E-06 + 0.850000024 2.24665109E-05 + 0.800000012 3.08091221E-05 + 0.750000000 3.08052295E-05 + 0.699999988 2.24826581E-05 + 0.649999976 7.94043899E-06 + 0.600000024 -8.97643804E-06 + 0.550000012 -2.35923944E-05 + 0.500000000 -3.16970363E-05 + 0.449999988 -3.08305353E-05 + 0.400000006 -2.11158967E-05 + 0.349999994 -5.31091928E-06 + 0.300000012 1.19933320E-05 + 0.250000000 2.57970951E-05 + 0.200000003 3.22724300E-05 + 0.150000006 2.99593630E-05 + 0.100000001 2.01632902E-05 + 5.00000007E-02 6.42231316E-06 + 0.00000000 -6.75220963E-06 + -5.00000007E-02 -1.54006102E-05 + -0.100000001 -1.73613153E-05 + -0.150000006 -1.28174297E-05 + -0.200000003 -3.99891269E-06 + -0.250000000 5.78527079E-06 + -0.300000012 1.33753310E-05 + -0.349999994 1.67767885E-05 + -0.400000006 1.56183596E-05 + -0.449999988 1.09588391E-05 + -0.500000000 4.61937088E-06 + -0.550000012 -1.61095443E-06 + -0.600000024 -6.54128780E-06 + -0.649999976 -9.71256122E-06 + -0.699999988 -1.11256468E-05 + -0.750000000 -1.07940677E-05 + -0.800000012 -8.42997270E-06 + -0.850000024 -3.49055563E-06 + -0.899999976 4.39133055E-06 + -0.949999988 1.47968394E-05 + -1.00000000 2.60765901E-05 + -1.04999995 3.53950782E-05 + -1.10000002 3.93976661E-05 + -1.14999998 3.53508294E-05 + -1.20000005 2.23523311E-05 + -1.25000000 2.10216831E-06 + -1.29999995 -2.11701445E-05 + -1.35000002 -4.17343799E-05 + -1.39999998 -5.40843830E-05 + -1.45000005 -5.47929267E-05 + -1.50000000 -4.38217285E-05 + -1.54999995 -2.47576281E-05 + -1.60000002 -3.80548522E-06 + -1.64999998 1.21857192E-05 + -1.70000005 1.80208644E-05 + -1.75000000 1.19919523E-05 + -1.79999995 -3.40726160E-06 + -1.85000002 -2.21675255E-05 + -1.89999998 -3.66566055E-05 + -1.95000005 -4.01109901E-05 + -2.00000000 -2.89162381E-05 + -2.04999995 -3.93717619E-06 + -2.09999990 2.95258815E-05 + -2.15000010 6.31367948E-05 + -2.20000005 8.78434003E-05 + -2.25000000 9.64222781E-05 + -2.29999995 8.55206890E-05 + -2.34999990 5.66503732E-05 + -2.40000010 1.59071260E-05 + -2.45000005 -2.74642134E-05 + -2.50000000 -6.32651063E-05 + -2.54999995 -8.27599870E-05 + -2.59999990 -8.07680262E-05 + -2.65000010 -5.69853146E-05 + -2.70000005 -1.62601264E-05 + -2.75000000 3.22599808E-05 + -2.79999995 7.69871913E-05 + -2.84999990 1.06439598E-04 + -2.90000010 1.11983456E-04 + -2.95000005 9.01676831E-05 + -3.00000000 4.40415533E-05 + -3.04999995 -1.70090661E-05 + -3.09999990 -7.90675185E-05 + -3.15000010 -1.26876053E-04 + -3.20000005 -1.47720828E-04 + -3.25000000 -1.35019407E-04 + -3.29999995 -9.05651614E-05 + -3.34999990 -2.46475265E-05 + -3.40000010 4.61906347E-05 + -3.45000005 1.03313701E-04 + -3.50000000 1.31011475E-04 + -3.54999995 1.20974262E-04 + -3.59999990 7.48937964E-05 + -3.65000010 4.34633375E-06 + -3.70000005 -7.21123215E-05 + -3.75000000 -1.33885129E-04 + -3.79999995 -1.63932607E-04 + -3.84999990 -1.53447123E-04 + -3.90000010 -1.04280931E-04 + -3.95000005 -2.84606631E-05 + -4.00000000 5.50387012E-05 + -4.05000019 1.25320759E-04 + -4.09999990 1.64989397E-04 + -4.15000010 1.64512690E-04 + -4.19999981 1.24436017E-04 + -4.25000000 5.49822216E-05 + -4.30000019 -2.67220857E-05 + -4.34999990 -1.00980411E-04 + -4.40000010 -1.50292995E-04 + -4.44999981 -1.63419434E-04 + -4.50000000 -1.37850599E-04 + -4.55000019 -8.02167415E-05 + -4.59999990 -4.62195567E-06 + -4.65000010 7.06822029E-05 + -4.69999981 1.27552543E-04 + -4.75000000 1.52226217E-04 + -4.80000019 1.38670846E-04 + -4.84999990 9.02205211E-05 + -4.90000010 1.90216506E-05 + -4.94999981 -5.67257521E-05 + -5.00000000 -1.17212614E-04 + -5.05000019 -1.46145176E-04 + -5.09999990 -1.35248003E-04 + -5.15000010 -8.68673233E-05 + -5.19999981 -1.37928555E-05 + -5.25000000 6.38726196E-05 + -5.30000019 1.24114085E-04 + -5.34999990 1.49254061E-04 + -5.40000010 1.31255583E-04 + -5.44999981 7.44360805E-05 + -5.50000000 -5.36556672E-06 + -5.55000019 -8.50889992E-05 + -5.59999990 -1.41061042E-04 + -5.65000010 -1.55986447E-04 + -5.69999981 -1.24203652E-04 + -5.75000000 -5.35650506E-05 + -5.80000019 3.66870627E-05 + -5.84999990 1.21446443E-04 + -5.90000010 1.77013178E-04 + -5.94999981 1.87916463E-04 + -6.00000000 1.51255750E-04 + -6.05000019 7.73193169E-05 + -6.09999990 -1.35984910E-05 + -6.15000010 -9.71124609E-05 + -6.19999981 -1.51556029E-04 + -6.25000000 -1.63683784E-04 + -6.30000019 -1.31723151E-04 + -6.34999990 -6.51236041E-05 + -6.40000010 1.86394464E-05 + -6.44999981 9.90483677E-05 + -6.50000000 1.57792834E-04 + -6.55000019 1.82893375E-04 + -6.59999990 1.70821178E-04 + -6.65000010 1.26411003E-04 + -6.69999981 6.09617455E-05 + -6.75000000 -1.07303622E-05 + -6.80000019 -7.36044894E-05 + -6.84999990 -1.15202391E-04 + -6.90000010 -1.27797277E-04 + -6.94999981 -1.09629298E-04 + -7.00000000 -6.52004965E-05 + -7.05000019 -4.59915100E-06 + -7.09999990 5.81690838E-05 + -7.15000010 1.07776861E-04 + -7.19999981 1.30820161E-04 + -7.25000000 1.19147364E-04 + -7.30000019 7.24630663E-05 + -7.34999990 -6.53353823E-07 + -7.40000010 -8.39286877E-05 + -7.44999981 -1.56850510E-04 + -7.50000000 -1.99766946E-04 + -7.55000019 -1.99365866E-04 + -7.59999990 -1.52944398E-04 + -7.65000010 -7.00387973E-05 + -7.69999981 2.93556204E-05 + -7.75000000 1.19752309E-04 + -7.80000019 1.77082969E-04 + -7.84999990 1.85593395E-04 + -7.90000010 1.42640652E-04 + -7.94999981 5.98405459E-05 + -8.00000000 -3.99234741E-05 + -8.05000019 -1.28854255E-04 + -8.10000038 -1.82119853E-04 + -8.14999962 -1.84913952E-04 + -8.19999981 -1.36581919E-04 + -8.25000000 -5.05933385E-05 + -8.30000019 4.95765962E-05 + -8.35000038 1.37345283E-04 + -8.39999962 1.90570892E-04 + -8.44999981 1.97492220E-04 + -8.50000000 1.59384290E-04 + -8.55000019 8.93367105E-05 + -8.60000038 7.77488094E-06 + -8.64999962 -6.37103149E-05 + -8.69999981 -1.08271401E-04 + -8.75000000 -1.17757721E-04 + -8.80000019 -9.37900186E-05 + -8.85000038 -4.60176634E-05 + -8.89999962 1.15288476E-05 + -8.94999981 6.46311310E-05 + -9.00000000 1.02325634E-04 + -9.05000019 1.18782067E-04 + -9.10000038 1.13475129E-04 + -9.14999962 9.00908635E-05 + -9.19999981 5.48970820E-05 + -9.25000000 1.52365574E-05 + -9.30000019 -2.15337550E-05 + -9.35000038 -4.87307807E-05 + -9.39999962 -6.09798772E-05 + -9.44999981 -5.50846125E-05 + -9.50000000 -3.11341937E-05 + -9.55000019 6.53311463E-06 + -9.60000038 4.91483697E-05 + -9.64999962 8.46947441E-05 + -9.69999981 1.00619669E-04 + -9.75000000 8.75886253E-05 + -9.80000019 4.31685221E-05 + -9.85000038 -2.59655553E-05 + -9.89999962 -1.04467705E-04 + -9.94999981 -1.71794789E-04 + -10.0000000 -2.08023819E-04 diff --git a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXZ.semd b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXZ.semd new file mode 100644 index 000000000..5a6efc748 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/DB.X20.BXZ.semd @@ -0,0 +1,700 @@ + 24.9500008 1.03584766 + 24.8999996 1.01555002 + 24.8500004 0.995141268 + 24.7999992 0.974635184 + 24.7500000 0.954046905 + 24.7000008 0.933389902 + 24.6499996 0.912675202 + 24.6000004 0.891911685 + 24.5499992 0.871106207 + 24.5000000 0.850263059 + 24.4500008 0.829381168 + 24.3999996 0.808450162 + 24.3500004 0.787446618 + 24.2999992 0.766332746 + 24.2500000 0.745058477 + 24.2000008 0.723568320 + 24.1499996 0.701812088 + 24.1000004 0.679755807 + 24.0499992 0.657391310 + 24.0000000 0.634740055 + 23.9500008 0.611848891 + 23.8999996 0.588778734 + 23.8500004 0.565588474 + 23.7999992 0.542318046 + 23.7500000 0.518976808 + 23.7000008 0.495537221 + 23.6499996 0.471941322 + 23.6000004 0.448113024 + 23.5499992 0.423975646 + 23.5000000 0.399470031 + 23.4500008 0.374566078 + 23.3999996 0.349266291 + 23.3500004 0.323600590 + 23.2999992 0.297614038 + 23.2500000 0.271352798 + 23.2000008 0.244851068 + 23.1499996 0.218124017 + 23.1000004 0.191168010 + 23.0499992 0.163965866 + 23.0000000 0.136496052 + 22.9500008 0.108740650 + 22.8999996 8.06900561E-02 + 22.8500004 5.23434505E-02 + 22.7999992 2.37047393E-02 + 22.7500000 -5.22321183E-03 + 22.7000008 -3.44431140E-02 + 22.6499996 -6.39642999E-02 + 22.6000004 -9.38000828E-02 + 22.5499992 -0.123962313 + 22.5000000 -0.154455975 + 22.4500008 -0.185274616 + 22.3999996 -0.216400027 + 22.3500004 -0.247804806 + 22.2999992 -0.279457897 + 22.2500000 -0.311329752 + 22.2000008 -0.343396485 + 22.1499996 -0.375640839 + 22.1000004 -0.408050567 + 22.0499992 -0.440615654 + 22.0000000 -0.473326206 + 21.9500008 -0.506172299 + 21.8999996 -0.539145768 + 21.8500004 -0.572242737 + 21.7999992 -0.605465353 + 21.7500000 -0.638820231 + 21.7000008 -0.672313571 + 21.6499996 -0.705943167 + 21.6000004 -0.739691079 + 21.5499992 -0.773518384 + 21.5000000 -0.807366192 + 21.4500008 -0.841162443 + 21.3999996 -0.874834299 + 21.3500004 -0.908321083 + 21.2999992 -0.941585064 + 21.2500000 -0.974615037 + 21.2000008 -1.00742149 + 21.1499996 -1.04002559 + 21.1000004 -1.07244432 + 21.0499992 -1.10467696 + 21.0000000 -1.13669908 + 20.9500008 -1.16846371 + 20.8999996 -1.19991076 + 20.8500004 -1.23098087 + 20.7999992 -1.26162720 + 20.7500000 -1.29182208 + 20.7000008 -1.32155502 + 20.6499996 -1.35082197 + 20.6000004 -1.37961185 + 20.5499992 -1.40789413 + 20.5000000 -1.43561244 + 20.4500008 -1.46268904 + 20.3999996 -1.48903894 + 20.3500004 -1.51458883 + 20.2999992 -1.53929555 + 20.2500000 -1.56315780 + 20.2000008 -1.58621454 + 20.1499996 -1.60853064 + 20.1000004 -1.63017356 + 20.0499992 -1.65118659 + 20.0000000 -1.67156804 + 19.9500008 -1.69126081 + 19.8999996 -1.71015871 + 19.8500004 -1.72812557 + 19.7999992 -1.74502504 + 19.7500000 -1.76075041 + 19.7000008 -1.77524698 + 19.6499996 -1.78852212 + 19.6000004 -1.80063999 + 19.5499992 -1.81169975 + 19.5000000 -1.82180846 + 19.4500008 -1.83105087 + 19.3999996 -1.83946681 + 19.3500004 -1.84704089 + 19.2999992 -1.85370684 + 19.2500000 -1.85936403 + 19.2000008 -1.86390221 + 19.1499996 -1.86722875 + 19.1000004 -1.86928952 + 19.0499992 -1.87008059 + 19.0000000 -1.86964595 + 18.9500008 -1.86806500 + 18.8999996 -1.86543143 + 18.8500004 -1.86183047 + 18.7999992 -1.85731959 + 18.7500000 -1.85191846 + 18.7000008 -1.84561050 + 18.6499996 -1.83835340 + 18.6000004 -1.83009756 + 18.5499992 -1.82080555 + 18.5000000 -1.81046653 + 18.4500008 -1.79910266 + 18.3999996 -1.78676522 + 18.3500004 -1.77352190 + 18.2999992 -1.75943911 + 18.2500000 -1.74456704 + 18.2000008 -1.72892964 + 18.1499996 -1.71252584 + 18.1000004 -1.69533932 + 18.0499992 -1.67735541 + 18.0000000 -1.65857804 + 17.9500008 -1.63904262 + 17.8999996 -1.61881638 + 17.8500004 -1.59799004 + 17.7999992 -1.57665896 + 17.7500000 -1.55490088 + 17.7000008 -1.53275812 + 17.6499996 -1.51022995 + 17.6000004 -1.48727739 + 17.5499992 -1.46384239 + 17.5000000 -1.43987274 + 17.4500008 -1.41534829 + 17.3999996 -1.39029872 + 17.3500004 -1.36480701 + 17.2999992 -1.33899784 + 17.2500000 -1.31301296 + 17.2000008 -1.28698015 + 17.1499996 -1.26098716 + 17.1000004 -1.23506474 + 17.0499992 -1.20918703 + 17.0000000 -1.18328786 + 16.9500008 -1.15728736 + 16.8999996 -1.13112247 + 16.8500004 -1.10477138 + 16.7999992 -1.07826579 + 16.7500000 -1.05168712 + 16.7000008 -1.02514863 + 16.6499996 -0.998770058 + 16.6000004 -0.972650766 + 16.5499992 -0.946851015 + 16.5000000 -0.921385348 + 16.4500008 -0.896229565 + 16.3999996 -0.871338487 + 16.3500004 -0.846667707 + 16.2999992 -0.822193444 + 16.2500000 -0.797923148 + 16.2000008 -0.773895323 + 16.1499996 -0.750168085 + 16.1000004 -0.726802170 + 16.0499992 -0.703842819 + 16.0000000 -0.681308270 + 15.9499998 -0.659186661 + 15.8999996 -0.637444556 + 15.8500004 -0.616042495 + 15.8000002 -0.594952762 + 15.7500000 -0.574174345 + 15.6999998 -0.553738832 + 15.6499996 -0.533705771 + 15.6000004 -0.514147878 + 15.5500002 -0.495131552 + 15.5000000 -0.476696879 + 15.4499998 -0.458845079 + 15.3999996 -0.441536188 + 15.3500004 -0.424698979 + 15.3000002 -0.408250004 + 15.2500000 -0.392117113 + 15.1999998 -0.376260370 + 15.1499996 -0.360684246 + 15.1000004 -0.345437616 + 15.0500002 -0.330601156 + 15.0000000 -0.316265553 + 14.9499998 -0.302506417 + 14.8999996 -0.289363205 + 14.8500004 -0.276827455 + 14.8000002 -0.264844239 + 14.7500000 -0.253325999 + 14.6999998 -0.242174774 + 14.6499996 -0.231306553 + 14.6000004 -0.220671192 + 14.5500002 -0.210261971 + 14.5000000 -0.200112790 + 14.4499998 -0.190284148 + 14.3999996 -0.180842102 + 14.3500004 -0.171836853 + 14.3000002 -0.163287073 + 14.2500000 -0.155174255 + 14.1999998 -0.147448689 + 14.1499996 -0.140044600 + 14.1000004 -0.132899672 + 14.0500002 -0.125972927 + 14.0000000 -0.119255148 + 13.9499998 -0.112769060 + 13.8999996 -0.106559083 + 13.8500004 -0.100674167 + 13.8000002 -9.51489508E-02 + 13.7500000 -8.99892822E-02 + 13.6999998 -8.51665959E-02 + 13.6499996 -8.06230009E-02 + 13.6000004 -7.62856007E-02 + 13.5500002 -7.20857382E-02 + 13.5000000 -6.79773316E-02 + 13.4499998 -6.39487058E-02 + 13.3999996 -6.00243770E-02 + 13.3500004 -5.62560074E-02 + 13.3000002 -5.27053848E-02 + 13.2500000 -4.94242609E-02 + 13.1999998 -4.64371406E-02 + 13.1499996 -4.37319875E-02 + 13.1000004 -4.12615761E-02 + 13.0500002 -3.89550477E-02 + 13.0000000 -3.67361642E-02 + 12.9499998 -3.45429592E-02 + 12.8999996 -3.23431306E-02 + 12.8500004 -3.01407725E-02 + 12.8000002 -2.79728249E-02 + 12.7500000 -2.58964803E-02 + 12.6999998 -2.39714347E-02 + 12.6499996 -2.22422369E-02 + 12.6000004 -2.07257066E-02 + 12.5500002 -1.94067992E-02 + 12.5000000 -1.82436667E-02 + 12.4499998 -1.71799734E-02 + 12.3999996 -1.61605161E-02 + 12.3500004 -1.51454387E-02 + 12.3000002 -1.41189741E-02 + 12.2500000 -1.30905099E-02 + 12.1999998 -1.20881479E-02 + 12.1499996 -1.11471619E-02 + 12.1000004 -1.02971559E-02 + 12.0500002 -9.55193210E-03 + 12.0000000 -8.90502334E-03 + 11.9499998 -8.33199453E-03 + 11.8999996 -7.79847940E-03 + 11.8500004 -7.27121299E-03 + 11.8000002 -6.72845962E-03 + 11.7500000 -6.16659783E-03 + 11.6999998 -5.60084404E-03 + 11.6499996 -5.05999196E-03 + 11.6000004 -4.57686232E-03 + 11.5500002 -4.17745207E-03 + 11.5000000 -3.87211819E-03 + 11.4499998 -3.65147274E-03 + 11.3999996 -3.48823331E-03 + 11.3500004 -3.34446132E-03 + 11.3000002 -3.18204961E-03 + 11.2500000 -2.97335489E-03 + 11.1999998 -2.70886626E-03 + 11.1499996 -2.39967811E-03 + 11.1000004 -2.07407400E-03 + 11.0500002 -1.76923547E-03 + 11.0000000 -1.52047549E-03 + 10.9499998 -1.35110167E-03 + 10.8999996 -1.26578216E-03 + 10.8500004 -1.24925422E-03 + 10.8000002 -1.27066672E-03 + 10.7500000 -1.29222777E-03 + 10.6999998 -1.27962686E-03 + 10.6499996 -1.21124380E-03 + 10.6000004 -1.08356192E-03 + 10.5500002 -9.11348092E-04 + 10.5000000 -7.22698460E-04 + 10.4499998 -5.50517230E-04 + 10.3999996 -4.22979036E-04 + 10.3500004 -3.55739292E-04 + 10.3000002 -3.48059984E-04 + 10.2500000 -3.83806677E-04 + 10.1999998 -4.36816481E-04 + 10.1499996 -4.78879549E-04 + 10.1000004 -4.87882702E-04 + 10.0500002 -4.53723507E-04 + 10.0000000 -3.80374579E-04 + 9.94999981 -2.83699919E-04 + 9.89999962 -1.85912955E-04 + 9.85000038 -1.08527398E-04 + 9.80000019 -6.60033184E-05 + 9.75000000 -6.19563580E-05 + 9.69999981 -8.89088260E-05 + 9.64999962 -1.31420384E-04 + 9.60000038 -1.71408363E-04 + 9.55000019 -1.93867279E-04 + 9.50000000 -1.91188563E-04 + 9.44999981 -1.64834462E-04 + 9.39999962 -1.24029379E-04 + 9.35000038 -8.20916612E-05 + 9.30000019 -5.17303633E-05 + 9.25000000 -4.08726410E-05 + 9.19999981 -5.03163501E-05 + 9.14999962 -7.38395902E-05 + 9.10000038 -1.00578785E-04 + 9.05000019 -1.18779179E-04 + 9.00000000 -1.19644312E-04 + 8.94999981 -1.00059828E-04 + 8.89999962 -6.33984309E-05 + 8.85000038 -1.82641434E-05 + 8.80000019 2.43154609E-05 + 8.75000000 5.42925445E-05 + 8.69999981 6.55215845E-05 + 8.64999962 5.73444777E-05 + 8.60000038 3.44469336E-05 + 8.55000019 5.14385374E-06 + 8.50000000 -2.12887971E-05 + 8.44999981 -3.73790899E-05 + 8.39999962 -3.94989074E-05 + 8.35000038 -2.86299510E-05 + 8.30000019 -9.72037560E-06 + 8.25000000 1.00844691E-05 + 8.19999981 2.37485456E-05 + 8.14999962 2.64867995E-05 + 8.10000038 1.71332376E-05 + 8.05000019 -1.61595733E-06 + 8.00000000 -2.40982754E-05 + 7.94999981 -4.34140129E-05 + 7.90000010 -5.34660612E-05 + 7.84999990 -5.07246659E-05 + 7.80000019 -3.52303032E-05 + 7.75000000 -1.05740282E-05 + 7.69999981 1.71168012E-05 + 7.65000010 4.08866290E-05 + 7.59999990 5.48680109E-05 + 7.55000019 5.58707688E-05 + 7.50000000 4.42062992E-05 + 7.44999981 2.35267271E-05 + 7.40000010 -2.63571422E-07 + 7.34999990 -2.07054472E-05 + 7.30000019 -3.26018308E-05 + 7.25000000 -3.34328288E-05 + 7.19999981 -2.39667406E-05 + 7.15000010 -7.90056129E-06 + 7.09999990 9.35084427E-06 + 7.05000019 2.23469815E-05 + 7.00000000 2.72840844E-05 + 6.94999981 2.31278500E-05 + 6.90000010 1.18785611E-05 + 6.84999990 -2.10568601E-06 + 6.80000019 -1.35159289E-05 + 6.75000000 -1.77964466E-05 + 6.69999981 -1.26233590E-05 + 6.65000010 1.29606963E-06 + 6.59999990 2.03514210E-05 + 6.55000019 3.90527894E-05 + 6.50000000 5.16494292E-05 + 6.44999981 5.38645290E-05 + 6.40000010 4.42314486E-05 + 6.34999990 2.46324726E-05 + 6.30000019 -1.27930733E-07 + 6.25000000 -2.36052001E-05 + 6.19999981 -3.94967792E-05 + 6.15000010 -4.34256763E-05 + 6.09999990 -3.42242165E-05 + 6.05000019 -1.43530415E-05 + 6.00000000 1.06729231E-05 + 5.94999981 3.37084930E-05 + 5.90000010 4.78767543E-05 + 5.84999990 4.84349330E-05 + 5.80000019 3.41048653E-05 + 5.75000000 7.50440904E-06 + 5.69999981 -2.54457500E-05 + 5.65000010 -5.69920085E-05 + 5.59999990 -7.95099331E-05 + 5.55000019 -8.74390826E-05 + 5.50000000 -7.86762612E-05 + 5.44999981 -5.50586010E-05 + 5.40000010 -2.18168425E-05 + 5.34999990 1.38375117E-05 + 5.30000019 4.45896439E-05 + 5.25000000 6.48381756E-05 + 5.19999981 7.19539603E-05 + 5.15000010 6.66559208E-05 + 5.09999990 5.24438146E-05 + 5.05000019 3.43061802E-05 + 5.00000000 1.71269385E-05 + 4.94999981 4.28446583E-06 + 4.90000010 -3.14778322E-06 + 4.84999990 -6.39960399E-06 + 4.80000019 -8.26555515E-06 + 4.75000000 -1.19242586E-05 + 4.69999981 -1.97327063E-05 + 4.65000010 -3.23909953E-05 + 4.59999990 -4.87209545E-05 + 4.55000019 -6.60781734E-05 + 4.50000000 -8.12026192E-05 + 4.44999981 -9.11826428E-05 + 4.40000010 -9.42021506E-05 + 4.34999990 -8.98534199E-05 + 4.30000019 -7.89798505E-05 + 4.25000000 -6.31891889E-05 + 4.19999981 -4.42816927E-05 + 4.15000010 -2.38337234E-05 + 4.09999990 -3.07345749E-06 + 4.05000019 1.69681007E-05 + 4.00000000 3.51836134E-05 + 3.95000005 5.02047551E-05 + 3.90000010 6.04739325E-05 + 3.84999990 6.46116314E-05 + 3.79999995 6.19618950E-05 + 3.75000000 5.30823600E-05 + 3.70000005 3.99246164E-05 + 3.65000010 2.55423056E-05 + 3.59999990 1.33379426E-05 + 3.54999995 6.05006517E-06 + 3.50000000 4.81306915E-06 + 3.45000005 8.63671175E-06 + 3.40000010 1.45369731E-05 + 3.34999990 1.83404009E-05 + 3.29999995 1.59530791E-05 + 3.25000000 4.71580688E-06 + 3.20000005 -1.55792295E-05 + 3.15000010 -4.23404126E-05 + 3.09999990 -7.06242281E-05 + 3.04999995 -9.42638653E-05 + 3.00000000 -1.07365318E-04 + 2.95000005 -1.05763378E-04 + 2.90000010 -8.80629668E-05 + 2.84999990 -5.60296830E-05 + 2.79999995 -1.42802101E-05 + 2.75000000 3.06036491E-05 + 2.70000005 7.13055706E-05 + 2.65000010 1.01089936E-04 + 2.59999990 1.14978502E-04 + 2.54999995 1.10630586E-04 + 2.50000000 8.88274808E-05 + 2.45000005 5.34968894E-05 + 2.40000010 1.12336584E-05 + 2.34999990 -2.96946819E-05 + 2.29999995 -6.07976253E-05 + 2.25000000 -7.50902982E-05 + 2.20000005 -6.87884167E-05 + 2.15000010 -4.25553480E-05 + 2.09999990 -1.88257809E-06 + 2.04999995 4.36807168E-05 + 2.00000000 8.24628733E-05 + 1.95000005 1.03438470E-04 + 1.89999998 9.91841443E-05 + 1.85000002 6.82068567E-05 + 1.79999995 1.58948533E-05 + 1.75000000 -4.63440920E-05 + 1.70000005 -1.03696671E-04 + 1.64999998 -1.41713856E-04 + 1.60000002 -1.50234453E-04 + 1.54999995 -1.26331201E-04 + 1.50000000 -7.53615095E-05 + 1.45000005 -9.71819281E-06 + 1.39999998 5.44534232E-05 + 1.35000002 1.01683247E-04 + 1.29999995 1.21483630E-04 + 1.25000000 1.11269648E-04 + 1.20000005 7.70134575E-05 + 1.14999998 3.14164572E-05 + 1.10000002 -9.87709609E-06 + 1.04999995 -3.29159302E-05 + 1.00000000 -2.95658429E-05 + 0.949999988 8.00118016E-08 + 0.899999976 4.76890164E-05 + 0.850000024 9.90463595E-05 + 0.800000012 1.38064963E-04 + 0.750000000 1.51274871E-04 + 0.699999988 1.31522684E-04 + 0.649999976 7.98880283E-05 + 0.600000024 5.37180586E-06 + 0.550000012 -7.74476430E-05 + 0.500000000 -1.52054359E-04 + 0.449999988 -2.03904230E-04 + 0.400000006 -2.23598981E-04 + 0.349999994 -2.08614088E-04 + 0.300000012 -1.63321529E-04 + 0.250000000 -9.75081930E-05 + 0.200000003 -2.39268957E-05 + 0.150000006 4.44441794E-05 + 0.100000001 9.68198947E-05 + 5.00000007E-02 1.26291052E-04 + 0.00000000 1.30608067E-04 + -5.00000007E-02 1.12113099E-04 + -0.100000001 7.69923790E-05 + -0.150000006 3.40525694E-05 + -0.200000003 -6.79742243E-06 + -0.250000000 -3.62070095E-05 + -0.300000012 -4.70300693E-05 + -0.349999994 -3.57554745E-05 + -0.400000006 -3.46567685E-06 + -0.449999988 4.39513060E-05 + -0.500000000 9.65436266E-05 + -0.550000012 1.42028366E-04 + -0.600000024 1.68304672E-04 + -0.649999976 1.66276746E-04 + -0.699999988 1.32333618E-04 + -0.750000000 6.98048098E-05 + -0.800000012 -1.11201161E-05 + -0.850000024 -9.51763577E-05 + -0.899999976 -1.65240825E-04 + -0.949999988 -2.06291414E-04 + -1.00000000 -2.09149453E-04 + -1.04999995 -1.73016713E-04 + -1.10000002 -1.06059393E-04 + -1.14999998 -2.37543918E-05 + -1.20000005 5.47122654E-05 + -1.25000000 1.11171110E-04 + -1.29999995 1.33147900E-04 + -1.35000002 1.17268319E-04 + -1.39999998 7.04278573E-05 + -1.45000005 8.30188583E-06 + -1.50000000 -4.85511118E-05 + -1.54999995 -8.00892667E-05 + -1.60000002 -7.23362464E-05 + -1.64999998 -2.16061599E-05 + -1.70000005 6.38042475E-05 + -1.75000000 1.65061923E-04 + -1.79999995 2.57182342E-04 + -1.85000002 3.15346551E-04 + -1.89999998 3.21553845E-04 + -1.95000005 2.69817596E-04 + -2.00000000 1.68395272E-04 + -2.04999995 3.82650869E-05 + -2.09999990 -9.19438316E-05 + -2.15000010 -1.93354368E-04 + -2.20000005 -2.44630792E-04 + -2.25000000 -2.37980334E-04 + -2.29999995 -1.81576528E-04 + -2.34999990 -9.75399307E-05 + -2.40000010 -1.58501061E-05 + -2.45000005 3.42359017E-05 + -2.50000000 3.28760725E-05 + -2.54999995 -2.40809150E-05 + -2.59999990 -1.23166858E-04 + -2.65000010 -2.36441236E-04 + -2.70000005 -3.28973663E-04 + -2.75000000 -3.68616806E-04 + -2.79999995 -3.35289456E-04 + -2.84999990 -2.27068827E-04 + -2.90000010 -6.12843869E-05 + -2.95000005 1.29742883E-04 + -3.00000000 3.07094917E-04 + -3.04999995 4.35349677E-04 + -3.09999990 4.91461484E-04 + -3.15000010 4.69930557E-04 + -3.20000005 3.82929778E-04 + -3.25000000 2.55608262E-04 + -3.29999995 1.18228352E-04 + -3.34999990 -2.33307287E-06 + -3.40000010 -8.91418531E-05 + -3.45000005 -1.38017655E-04 + -3.50000000 -1.56103299E-04 + -3.54999995 -1.57096496E-04 + -3.59999990 -1.55069196E-04 + -3.65000010 -1.59119125E-04 + -3.70000005 -1.70636151E-04 + -3.75000000 -1.83917451E-04 + -3.79999995 -1.89615181E-04 + -3.84999990 -1.79496856E-04 + -3.90000010 -1.50585169E-04 + -3.95000005 -1.07050808E-04 + -4.00000000 -5.91270691E-05 + -4.05000019 -1.94611603E-05 + -4.09999990 1.70343696E-06 + -4.15000010 6.63997071E-07 + -4.19999981 -1.79070121E-05 + -4.25000000 -4.22244884E-05 + -4.30000019 -5.74153491E-05 + -4.34999990 -5.09440943E-05 + -4.40000010 -1.75215137E-05 + -4.44999981 3.83050065E-05 + -4.50000000 1.02924991E-04 + -4.55000019 1.57706047E-04 + -4.59999990 1.85067387E-04 + -4.65000010 1.74603134E-04 + -4.69999981 1.27177191E-04 + -4.75000000 5.55078659E-05 + -4.80000019 -1.91344279E-05 + -4.84999990 -7.32352855E-05 + -4.90000010 -8.84613619E-05 + -4.94999981 -5.78827567E-05 + -5.00000000 1.10355377E-05 + -5.05000019 9.77682939E-05 + -5.09999990 1.74189176E-04 + -5.15000010 2.12682702E-04 + -5.19999981 1.94579698E-04 + -5.25000000 1.16402618E-04 + -5.30000019 -8.03357761E-06 + -5.34999990 -1.50471533E-04 + -5.40000010 -2.75401428E-04 + -5.44999981 -3.49698967E-04 + -5.50000000 -3.52045696E-04 + -5.55000019 -2.79433181E-04 + -5.59999990 -1.48787905E-04 + -5.65000010 6.92760023E-06 + -5.69999981 1.47240033E-04 + -5.75000000 2.35079555E-04 + -5.80000019 2.47097429E-04 + -5.84999990 1.80472693E-04 + -5.90000010 5.41926347E-05 + -5.94999981 -9.57080265E-05 + -6.00000000 -2.25773518E-04 + -6.05000019 -2.97035091E-04 + -6.09999990 -2.85907532E-04 + -6.15000010 -1.90992228E-04 + -6.19999981 -3.38044665E-05 + -6.25000000 1.46864782E-04 + -6.30000019 3.05435271E-04 + -6.34999990 4.01954050E-04 + -6.40000010 4.12949710E-04 + -6.44999981 3.37661273E-04 + -6.50000000 1.97900386E-04 + -6.55000019 3.16371188E-05 + -6.59999990 -1.17796881E-04 + -6.65000010 -2.13750522E-04 + -6.69999981 -2.36087377E-04 + -6.75000000 -1.86059086E-04 + -6.80000019 -8.49625649E-05 + -6.84999990 3.28356728E-05 + -6.90000010 1.30299653E-04 + -6.94999981 1.78409697E-04 + -7.00000000 1.64023528E-04 + -7.05000019 9.27526416E-05 + -7.09999990 -1.37440647E-05 + -7.15000010 -1.24966798E-04 + -7.19999981 -2.10911254E-04 + -7.25000000 -2.50645535E-04 + -7.30000019 -2.37616929E-04 + -7.34999990 -1.80301897E-04 + -7.40000010 -9.83084392E-05 + -7.44999981 -1.54305671E-05 + -7.50000000 4.79828705E-05 + -7.55000019 8.08978075E-05 + -7.59999990 8.39425484E-05 + -7.65000010 6.76894706E-05 + -7.69999981 4.76721398E-05 + -7.75000000 3.79830017E-05 + -7.80000019 4.57141359E-05 + -7.84999990 6.81737220E-05 + -7.90000010 9.38488593E-05 + -7.94999981 1.06806590E-04 + -8.00000000 9.30419774E-05 + -8.05000019 4.65780540E-05 + -8.10000038 -2.68685671E-05 + -8.14999962 -1.10110814E-04 + -8.19999981 -1.78821123E-04 + -8.25000000 -2.08827812E-04 + -8.30000019 -1.84188932E-04 + -8.35000038 -1.03461702E-04 + -8.39999962 1.80022507E-05 + -8.44999981 1.50783148E-04 + -8.50000000 2.58682732E-04 + -8.55000019 3.08680843E-04 + -8.60000038 2.80743581E-04 + -8.64999962 1.74568020E-04 + -8.69999981 1.11280933E-05 + -8.75000000 -1.71667023E-04 + -8.80000019 -3.28198803E-04 + -8.85000038 -4.16812138E-04 + -8.89999962 -4.10755456E-04 + -8.94999981 -3.05382651E-04 + -9.00000000 -1.19653145E-04 + -9.05000019 1.08441083E-04 + -9.10000038 3.31219169E-04 + -9.14999962 5.02861221E-04 + -9.19999981 5.90097625E-04 + -9.25000000 5.79342770E-04 + -9.30000019 4.78667469E-04 + -9.35000038 3.14463716E-04 + -9.39999962 1.24036349E-04 + -9.44999981 -5.37095621E-05 + -9.50000000 -1.87051148E-04 + -9.55000019 -2.57661188E-04 + -9.60000038 -2.63083115E-04 + -9.64999962 -2.15342079E-04 + -9.69999981 -1.36488976E-04 + -9.75000000 -5.24623138E-05 + -9.80000019 1.31734387E-05 + -9.85000038 4.42485325E-05 + -9.89999962 3.48445574E-05 + -9.94999981 -1.03898483E-05 + -10.0000000 -7.83055948E-05 diff --git a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output.log b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output.log new file mode 100644 index 000000000..12005a3e3 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output.log @@ -0,0 +1,366 @@ +running example: Thu Nov 14 09:35:04 UTC 2024 + +setting up example... + +'DATA/Par_file' -> 'DATA/Par_file.org' + +decomposing mesh... + + + ********************** + Serial mesh decomposer + ********************** + + reading mesh files in: ./MESH-default + + using NGNOD = 8 + linear elements + + total number of nodes: + nnodes = 23273 + total number of spectral elements: + nspec = 20736 + materials: + num_mat = 1 + defined = 1 undefined = 0 + no poroelastic material file found + defined materials : 1 + undefined materials: 1 (interfaces/tomography models/..) + absorbing boundaries: + nspec2D_xmin = 576 + nspec2D_xmax = 576 + nspec2D_ymin = 576 + nspec2D_ymax = 576 + nspec2D_bottom = 1296 + nspec2D_top = 1296 + no absorbing_cpml_file file found + no moho_surface_file file found + + Par_file_faults not found: assuming that there are no faults + + node valence: + min = 1 max = 8 + neighbors: + nsize = 8 + valence: sup_neighbor = 38 max_neighbor = 26 + + local time stepping: turned OFF + + partitioning: + number of partitions requested = 4 + + array size xadj : 20737 memory: 7.91053772E-02 MB + array size adjncy: 539136 memory: 2.05664062 MB + sup_neighbor : 26 + + mesh2dual: max_neighbor = 26 + + + partitioning type: 1 + running SCOTCH partitioning + + SCOTCH partitioning + finished partitioning + + written file: OUTPUT_FILES/DATABASES_MPI/part_array.vtk + + element distribution: + partition 0 has 5137 elements + partition 1 has 5168 elements + partition 2 has 5200 elements + partition 3 has 5231 elements + elements per partition: min/max = 5137 5231 + elements per partition: imbalance = 1.79697955 % + (0% being totally balanced, 100% being unbalanced) + + load distribution: + element loads: min/max = 41 41 + + partition 0 has 210617 load units + partition 1 has 211888 load units + partition 2 has 213200 load units + partition 3 has 214471 load units + load per partition: min/max = 210617 214471 + load per partition: imbalance = 1.79697955 % + (0% being totally balanced, 100% being unbalanced) + + partitions: + num = 4 + ninterfaces = 6 + + partition 0 has number of MPI interfaces: 2 maximum size 2392 + partition 1 has number of MPI interfaces: 3 maximum size 2392 + partition 2 has number of MPI interfaces: 2 maximum size 2392 + partition 3 has number of MPI interfaces: 3 maximum size 2392 + + Databases files in directory: OUTPUT_FILES/DATABASES_MPI + + + finished successfully + + +running database generation on 4 processors... + +######################################################### +forward simulation +######################################################### +(running forward simulation with saving forward wavefield) + +Changed simulation_type to 1 and save_forward = .true. in Par_file + +running solver on 4 processors... + +######################################################### +adjoint sources +######################################################### +setting up adjoint sources + + +adjoint sources: + window start/end = 10.0 / 24.0 + +'OUTPUT_FILES/DB.X20.BXX.semd' -> 'SEM/DB.X20.BXX.semd' +'OUTPUT_FILES/DB.X20.BXY.semd' -> 'SEM/DB.X20.BXY.semd' +'OUTPUT_FILES/DB.X20.BXZ.semd' -> 'SEM/DB.X20.BXZ.semd' +compiling xcreate_adjsrc_traveltime: + using fortran compiler = gfortran + using C compiler = gcc + +gfortran -o xcreate_adjsrc_traveltime create_adjsrc_traveltime.o rw_ascfile_c.o +'xcreate_adjsrc_traveltime' -> '/home/myuser/EXAMPLES/applications/homogeneous_halfspace/SEM/xcreate_adjsrc_traveltime' + +running adjoint source creation + + xcreate_adjsrc_traveltime: + measurement window start/end = 10.000000000000000 / 24.000000000000000 + component ifile = 3 lrot = F + + reading asc file DB.X20.BXX.semd ... + reading asc file DB.X20.BXY.semd ... + reading asc file DB.X20.BXZ.semd ... + + start time: -10.000000000000000 + time step: 5.0000190000000444E-002 + number of steps: 700 + + i = 1 norm = 5.7606528005199849 + component set to zero + i = 2 norm = 15.017517352478269 + component set to zero + i = 3 norm = 1.3877010232283336 + + write to asc file DB.X20.BXX.adj + write to asc file DB.X20.BXY.adj + write to asc file DB.X20.BXZ.adj + +'./STATIONS_ADJOINT' -> '../DATA/STATIONS_ADJOINT' + + +######################################################### +kernel simulation +######################################################### +(running kernel simulation: SIMULATION_TYPE == 3) + +Changed simulation_type to 3 in Par_file + +running solver (kernel run) on 4 processors... + + +kernels done + + +######################################################### +Visualization +######################################################### + + Recombining ParaView data for slices + + combine volumetric data + mesh topology dir : OUTPUT_FILES/DATABASES_MPI + input file dir : rho_kernel + + output directory : OUTPUT_FILES/ + using VTK format for file output + + Slice list: + 0 1 2 3 + + + VTK initial total points: 1387132 + VTK initial total elements: 1327104 + + + Reading slice 0 + file: OUTPUT_FILES/DATABASES_MPI/proc000000_rho_kernel.bin + min/max value: -1.53627187E-12 1.84511586E-12 + + points: 0 343557 + + Reading slice 1 + file: OUTPUT_FILES/DATABASES_MPI/proc000001_rho_kernel.bin + min/max value: -3.98493460E-09 3.18577920E-09 + + points: 343557 345865 + + Reading slice 2 + file: OUTPUT_FILES/DATABASES_MPI/proc000002_rho_kernel.bin + min/max value: -1.02608616E-11 4.32215097E-11 + + points: 689422 347685 + + Reading slice 3 + file: OUTPUT_FILES/DATABASES_MPI/proc000003_rho_kernel.bin + min/max value: -3.98493460E-09 3.59137192E-10 + + points: 1037107 350025 + + Total number of points: 1387132 + + Reading slice 0 + elements: 328768 328768 + points : 343557 343557 + Reading slice 1 + elements: 659520 330752 + points : 689422 345865 + Reading slice 2 + elements: 992320 332800 + points : 1037107 347685 + Reading slice 3 + elements: 1327104 334784 + points : 1387132 350025 + + Total number of elements: 1327104 + + + Done writing OUTPUT_FILES//rho_kernel.vtk + + + Recombining ParaView data for slices + + combine volumetric data + mesh topology dir : OUTPUT_FILES/DATABASES_MPI + input file dir : kappa_kernel + + output directory : OUTPUT_FILES/ + using VTK format for file output + + Slice list: + 0 1 2 3 + + + VTK initial total points: 1387132 + VTK initial total elements: 1327104 + + + Reading slice 0 + file: OUTPUT_FILES/DATABASES_MPI/proc000000_kappa_kernel.bin + min/max value: -2.09085418E-12 4.30854354E-12 + + points: 0 343557 + + Reading slice 1 + file: OUTPUT_FILES/DATABASES_MPI/proc000001_kappa_kernel.bin + min/max value: -9.47914724E-09 5.21300425E-09 + + points: 343557 345865 + + Reading slice 2 + file: OUTPUT_FILES/DATABASES_MPI/proc000002_kappa_kernel.bin + min/max value: -1.79466372E-10 2.46384788E-11 + + points: 689422 347685 + + Reading slice 3 + file: OUTPUT_FILES/DATABASES_MPI/proc000003_kappa_kernel.bin + min/max value: -2.45610887E-09 9.48201340E-09 + + points: 1037107 350025 + + Total number of points: 1387132 + + Reading slice 0 + elements: 328768 328768 + points : 343557 343557 + Reading slice 1 + elements: 659520 330752 + points : 689422 345865 + Reading slice 2 + elements: 992320 332800 + points : 1037107 347685 + Reading slice 3 + elements: 1327104 334784 + points : 1387132 350025 + + Total number of elements: 1327104 + + + Done writing OUTPUT_FILES//kappa_kernel.vtk + + + Recombining ParaView data for slices + + combine volumetric data + mesh topology dir : OUTPUT_FILES/DATABASES_MPI + input file dir : mu_kernel + + output directory : OUTPUT_FILES/ + using VTK format for file output + + Slice list: + 0 1 2 3 + + + VTK initial total points: 1387132 + VTK initial total elements: 1327104 + + + Reading slice 0 + file: OUTPUT_FILES/DATABASES_MPI/proc000000_mu_kernel.bin + min/max value: -3.46212252E-12 2.90256816E-12 + + points: 0 343557 + + Reading slice 1 + file: OUTPUT_FILES/DATABASES_MPI/proc000001_mu_kernel.bin + min/max value: -3.56344572E-08 1.08928919E-08 + + points: 343557 345865 + + Reading slice 2 + file: OUTPUT_FILES/DATABASES_MPI/proc000002_mu_kernel.bin + min/max value: -5.96327987E-11 1.39300280E-10 + + points: 689422 347685 + + Reading slice 3 + file: OUTPUT_FILES/DATABASES_MPI/proc000003_mu_kernel.bin + min/max value: -2.03610355E-08 3.89552284E-08 + + points: 1037107 350025 + + Total number of points: 1387132 + + Reading slice 0 + elements: 328768 328768 + points : 343557 343557 + Reading slice 1 + elements: 659520 330752 + points : 689422 345865 + Reading slice 2 + elements: 992320 332800 + points : 1037107 347685 + Reading slice 3 + elements: 1327104 334784 + points : 1387132 350025 + + Total number of elements: 1327104 + + + Done writing OUTPUT_FILES//mu_kernel.vtk + + +see results in directory : OUTPUT_FILES/ + kernel outputs in directory: OUTPUT_FILES/DATABASES_MPI + +done +Thu Nov 14 09:38:03 UTC 2024 diff --git a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_generate_databases.txt b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_generate_databases.txt new file mode 100644 index 000000000..61b102605 --- /dev/null +++ b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_generate_databases.txt @@ -0,0 +1,286 @@ + + ***************************************** + *** Specfem3D MPI database generation *** + ***************************************** + + Running Git package version of the code: 4.1.1 + which is Git unknown + dating unknown + + This is process 0 + There are 4 MPI processes + Processes are numbered from 0 to 3 + + There is a total of 4 slices + + NGLLX = 5 + NGLLY = 5 + NGLLZ = 5 + + Shape functions defined by NGNOD = 8 control nodes + Surface shape functions defined by NGNOD2D = 4 control nodes + Beware! Curvature (i.e. HEX27 elements) is not handled by our internal mesher + + velocity model: default + + + suppressing UTM projection + + no attenuation + + no anisotropy + + no oceans + + incorporating Stacey absorbing conditions + + using a CMTSOLUTION source + with a Gaussian source time function + + + ************************************ + reading partition files in the model + ************************************ + + external mesh points : 24652 + defined materials : 1 + undefined materials : 0 + total number of spectral elements: 20736 + absorbing boundaries: + xmin,xmax : 576 576 + ymin,ymax : 576 576 + bottom,top: 1296 1296 + + total number of C-PML elements in the global mesh: 0 + + number of MPI partition interfaces: 10 + + minimum memory used so far : 18.0714130 MB per process + minimum total memory requested : 82.9090424 MB per process + + create regions: + + ...allocating arrays + NGLLX = 5 + NGLLY = 5 + NGLLZ = 5 + NGNOD = 8 + NGNOD2D = 4 + + main process setup: + nspec = 5137 + + separating regular/irregular element shapes + nspec regular = 0 + nspec irregular = 5137 + + absorbing boundary faces: + num_abs_boundary_faces = 911 + + free surface faces: + num_free_surface_faces = 317 + + + File DATA/Par_file_faults not found: assuming that there are no faults + + + ...setting up jacobian + + ...indexing global points + creating ibool indexing : x min/max = 0.00000000 / 134000.000 + creating indirect addressing: nglob = 343557 + creating unique point locations + + ...preparing MPI interfaces + number of interfaces : 2 + creating MPI indexing : x min/max = 0.00000000 / 134000.000 + tolerance = 1.3400000000000000E-005 + + total MPI interface points: 41274 + total assembled MPI interface points: 40884 + + ...setting up absorbing boundaries + boundary xmin : 576 + boundary xmax : 576 + boundary ymin : 576 + boundary ymax : 576 + boundary bottom : 1296 + boundary top : 1296 + absorbing boundary: + total number of free faces = 1296 + total number of faces = 3600 + + ...setting up mesh surface + + ...determining velocity model + 10 % time remaining: 2.4028113015715034E-007 s + 20 % time remaining: 1.9656274498964488E-007 s + 30 % time remaining: 1.6375977506595669E-007 s + 40 % time remaining: 1.3617004095436806E-007 s + 50 % time remaining: 1.1142708479995382E-007 s + 60 % time remaining: 8.9732843238621953E-008 s + 70 % time remaining: 6.6752589793850270E-008 s + 80 % time remaining: 4.4190441213058570E-008 s + 90 % time remaining: 2.2089179055297862E-008 s + 100 % time remaining: 2.9560892666907712E-010 s + + ...detecting acoustic-elastic-poroelastic surfaces + total acoustic elements : 0 + total elastic elements : 20736 + total poroelastic elements: 0 + + acoustic - elastic coupling : total number of faces = 0 + acoustic - poroelastic coupling : total number of faces = 0 + elastic - poroelastic coupling : total number of faces = 0 + + + ...element inner/outer separation + for overlapping of communications with calculations: + percentage of edge elements 11.2322388 % + percentage of volume elements 88.7677612 % + + + ...element mesh coloring + use coloring = F + + ...external binary models + no external binary model used + + ...creating mass matrix + + ...setting up mesh adjacency + + mesh adjacency: + total number of elements in this slice = 5137 + + maximum number of neighbors allowed = 300 + minimum array memory required per slice = 5.89842606 (MB) + + maximum number of elements per shared node = 8 + node-to-element array memory required per slice = 10.4845276 (MB) + + 10 % - elapsed time: 1.74187087E-02 s + 20 % - elapsed time: 2.88833324E-02 s + 30 % - elapsed time: 4.17485014E-02 s + 40 % - elapsed time: 5.39622083E-02 s + 50 % - elapsed time: 6.55980036E-02 s + 60 % - elapsed time: 7.84245804E-02 s + 70 % - elapsed time: 9.07370821E-02 s + 80 % - elapsed time: 0.102916747 s + 90 % - elapsed time: 0.116048463 s + 100 % - elapsed time: 0.122804083 s + + maximum neighbors found per element = 26 + (maximum neighbor of neighbors) = 98 + total number of neighbors = 512014 + + Elapsed time for detection of neighbors in seconds = 0.135051712 + + + ...saving mesh databases + using binary file format + database file (for rank 0): OUTPUT_FILES/DATABASES_MPI/proc000000_external_mesh.bin + + saving mesh files for AVS, OpenDX, Paraview + saving additional mesh files with surface/coupling points + + ...checking mesh resolution + Mesh resolution: + + ******** + minimum and maximum number of elements + and points in the CUBIT + SCOTCH mesh: + + NSPEC_global_min = 5137 + NSPEC_global_max = 5231 + NSPEC_global_max / NSPEC_global_min imbalance = 1.01829863 = 1.82986176 % + NSPEC_global_sum = 20736 + + NGLOB_global_min = 343557 + NGLOB_global_max = 350025 + NGLOB_global_max / NGLOB_global_min imbalance = 1.01882660 = 1.88265705 % + NGLOB_global_sum = 1387132 + + If you have elements of a single type (all acoustic, all elastic, all poroelastic, and without CPML) + in the whole mesh, then there should be no significant imbalance in the above numbers. + Otherwise, it is normal to have imbalance in elements and points because the domain decomposer + compensates for the different cost of different elements by partitioning them unevenly among processes. + ******** + + + ******** + Model: P velocity min,max = 2800.00000 2800.00000 + Model: S velocity min,max = 1500.00000 1500.00000 + + Model: Poisson's ratio min,max = 0.298747778 0.298747778 + ******** + + ********************************************* + *** Verification of simulation parameters *** + ********************************************* + + *** Xmin and Xmax of the model = 0.00000000 134000.000 + *** Ymin and Ymax of the model = 0.00000000 134000.000 + *** Zmin and Zmax of the model = -60000.0000 0.00000000 + + *** Max GLL point distance = 1227.47656 + *** Min GLL point distance = 642.726562 + *** Max/min ratio = 1.90979588 + + *** Max element size = 3750.00000 + *** Min element size = 3722.21875 + *** Max/min ratio = 1.00746357 + + *** Minimum period resolved = 3.12500000 + *** Maximum suggested time step = 0.109999999 + + Elapsed time for checking mesh resolution in seconds = 2.0973124999999371E-002 + saving VTK files for Courant number and minimum period + + + mesh regions done + + min and max of elevation (i.e. height of the upper surface of the mesh) included in mesh in m is 0.0000000000000000 0.0000000000000000 + + + done mesh setup + + + Repartition of elements: + ----------------------- + + load distribution: + element loads: min/max = 210617 214471 + + partition 0 has 210617 load units + partition 1 has 211888 load units + partition 2 has 213200 load units + partition 3 has 214471 load units + + load per partition: min/max = 210617 214471 + load per partition: imbalance = 1.79697955 % + (0% being totally balanced, 100% being unbalanced) + + total number of elements in mesh slice 0: 5137 + total number of regular elements in mesh slice 0: 0 + total number of irregular elements in mesh slice 0: 5137 + total number of points in mesh slice 0: 343557 + + total number of elements in entire mesh: 20736 + approximate total number of points in entire mesh (with duplicates on MPI edges): 1387132 + approximate total number of DOFs in entire mesh (with duplicates on MPI edges): 4161396 + + total number of time steps in the solver will be: 700 + + using single precision for the calculations + + smallest and largest possible floating-point numbers are: 1.17549435E-38 3.40282347E+38 + + + Elapsed time for mesh generation and buffer creation in seconds = 16.0961285 + Elapsed time for mesh generation and buffer creation in hh:mm:ss = 0 h 00 m 16 s + + End of mesh generation + + done + diff --git a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_solver.txt b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_solver.txt new file mode 100644 index 000000000..5880431ee --- /dev/null +++ b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_solver.txt @@ -0,0 +1,383 @@ + ********************************************** + **** Specfem 3-D Solver - MPI version f90 **** + ********************************************** + + Running Git package version of the code: 4.1.1 + which is Git unknown + dating unknown + + + + + There are 4 MPI processes + Processes are numbered from 0 to 3 + + There is a total of 4 slices + + NDIM = 3 + + NGLLX = 5 + NGLLY = 5 + NGLLZ = 5 + + using single precision for the calculations + + smallest and largest possible floating-point numbers are: 1.17549435E-38 3.40282347E+38 + + velocity model: default + + Reading mesh databases... + reads binary mesh files: proc***_external_mesh.bin + from directory : OUTPUT_FILES/DATABASES_MPI + + simulation w/ acoustic domain: F + simulation w/ elastic domain: T + simulation w/ poroelastic domain: F + + slice 0 has: + number of elements acoustic : 0 + number of elements elastic : 5137 + number of elements poroelastic: 0 + done + + total acoustic elements : 0 + total elastic elements : 20736 + total poroelastic elements : 0 + + Mesh resolution: + + ******** + minimum and maximum number of elements + and points in the CUBIT + SCOTCH mesh: + + NSPEC_global_min = 5137 + NSPEC_global_max = 5231 + NSPEC_global_max / NSPEC_global_min imbalance = 1.01829863 = 1.82986176 % + NSPEC_global_sum = 20736 + + NGLOB_global_min = 343557 + NGLOB_global_max = 350025 + NGLOB_global_max / NGLOB_global_min imbalance = 1.01882660 = 1.88265705 % + NGLOB_global_sum = 1387132 + + If you have elements of a single type (all acoustic, all elastic, all poroelastic, and without CPML) + in the whole mesh, then there should be no significant imbalance in the above numbers. + Otherwise, it is normal to have imbalance in elements and points because the domain decomposer + compensates for the different cost of different elements by partitioning them unevenly among processes. + ******** + + + ******** + Model: P velocity min,max = 2800.00000 2800.00000 + Model: S velocity min,max = 1500.00000 1500.00000 + + Model: Poisson's ratio min,max = 0.298747778 0.298747778 + ******** + + ********************************************* + *** Verification of simulation parameters *** + ********************************************* + + *** Xmin and Xmax of the model = 0.00000000 134000.000 + *** Ymin and Ymax of the model = 0.00000000 134000.000 + *** Zmin and Zmax of the model = -60000.0000 0.00000000 + + *** Max GLL point distance = 1227.47656 + *** Min GLL point distance = 642.726562 + *** Max/min ratio = 1.90979588 + + *** Max element size = 3750.00000 + *** Min element size = 3722.21875 + *** Max/min ratio = 1.00746357 + + *** Minimum period resolved = 3.12500000 + *** Maximum suggested time step = 0.109999999 + + *** for DT : 5.0000000000000003E-002 + *** Max stability for wave velocities = 0.217822030 + + Elapsed time for checking mesh resolution in seconds = 1.5842708000000000E-002 + saving VTK files for Courant number and minimum period + + + ****************************************** + There is a total of 4 slices + ****************************************** + + + kd-tree: + total data points: 138699 + theoretical number of nodes: 277388 + tree memory size: 8.46520996 MB + actual number of nodes: 277397 + tree memory size: 8.46548462 MB + maximum depth : 18 + creation timing : 9.86839533E-02 (s) + + + sources: 1 + + ******************** + locating sources + ******************** + + reading source information from ./DATA/CMTSOLUTION file + + no UTM projection + + + source # 1 + source located in slice 1 + in element 2755 + in elastic domain + + using moment tensor source: + xi coordinate of source in that element: 1.0000000000000000 + eta coordinate of source in that element: 1.0000000000000000 + gamma coordinate of source in that element: 1.0000000000000000 + + source time function: + using (quasi) Heaviside source time function + half duration: 5.0000000000000000 seconds + + time shift: 0.0000000000000000 seconds + + magnitude of the source: + scalar moment M0 = 2.6266994498800193E+028 dyne-cm + moment magnitude Mw = 8.2462737794883481 + + original (requested) position of the source: + + latitude: 67000.000000000000 + longitude: 67000.000000000000 + + x: 67000.000000000000 + y: 67000.000000000000 + depth: 30.000000000000000 km + topo elevation: 0.0000000000000000 + + position of the source that will be used: + + x: 67000.000000000000 + y: 67000.000000000000 + depth: 30.000000000000000 km + z: -30000.000000000000 + + error in location of the source: 0.00000000 m + + + + maximum error in location of the sources: 0.00000000 m + + + Elapsed time for detection of sources in seconds = 9.88966599E-03 + + End of source detection - done + + + receivers: + + there are 1 stations in file ./DATA/STATIONS_ADJOINT + saving 1 stations inside the model in file ./DATA/STATIONS_ADJOINT_FILTERED + excluding 0 stations located outside the model + + Total number of receivers = 1 + + + ******************** + locating receivers + ******************** + + reading receiver information from ./DATA/STATIONS_ADJOINT_FILTERED file + + + station # 1 DB X20 + original latitude: 67000.0000 + original longitude: 22732.1406 + original x: 22732.1406 + original y: 67000.0000 + original depth: 0.00000000 m + horizontal distance: 44.2678604 + target x, y, z: 22732.1406 67000.0000 0.00000000 + closest estimate found: 0.00000000 m away + + receiver located in slice 2 + in element 4904 + in elastic domain + at coordinates: + xi = -0.78571608325633080 + eta = 1.0000000000000000 + gamma = 1.0000000000000000 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 22732.139999999999 + y: 67000.000000000000 + depth: 0.0000000000000000 m + z: 0.0000000000000000 + + + + maximum error in location of all the receivers: 0.00000000 m + + Elapsed time for receiver detection in seconds = 1.13145001E-02 + + End of receiver detection - done + + found a total of 1 receivers in all the slices + + 3 adjoint component trace files found in all slices + + source arrays: + number of sources is 1 + size of source array = 1.43051147E-03 MB + = 1.39698386E-06 GB + + seismograms: + seismograms written by all processes + + Total number of simulation steps (NSTEP) = 700 + writing out seismograms at every NTSTEP_BETWEEN_OUTPUT_SEISMOS = 700 + number of subsampling steps for seismograms = 1 + Total number of samples for seismograms = 700 + + + maximum number of local receivers is 1 in slice 2 + size of maximum seismogram array = 8.01086426E-03 MB + = 7.82310963E-06 GB + + adjoint source arrays: + reading adjoint sources at every NTSTEP_BETWEEN_READ_ADJSRC = 700 + maximum number of local adjoint sources is 1 in slice 2 + size of maximum adjoint source array = 8.01086426E-03 MB + = 7.82310963E-06 GB + + + Total number of samples for seismograms = 700 + + + Simulation setup: + + no acoustic simulation + incorporating elastic simulation + no poroelastic simulation + + no attenuation + no anisotropy + no oceans + no gravity + no movie simulation + + + preparing mass matrices + preparing constants + preparing wavefields + preparing fault simulation + no dynamic faults + no kinematic faults + no fault simulation + preparing gravity + no gravity simulation + preparing Stacey absorbing boundaries + preparing adjoint fields + preparing optimized arrays + number of regular shaped elements : 0 + number of irregular shaped elements: 5137 + fused array done + bandwidth test (STREAM TRIAD): + memory accesses = 11.7950935 MB + timing min/max = 7.90624996E-04 s / 8.15416977E-04 s + timing avg = 8.06912489E-04 s + bandwidth = 14.2749634 GB/s + + + Elapsed time for preparing timerun in seconds = 0.22194879200000006 + + ************ + time loop + ************ + scheme: Newmark + + time step: 5.00000007E-02 s + number of time steps: 700 + total simulated time: 35.0000000 seconds + start time: -10.0000000 seconds + + All processes are synchronized before the time loop + + Starting time iteration loop... + + Time step # 5 + Time: -9.80000019 seconds + Elapsed time in seconds = 0.57578966700000001 + Elapsed time in hh:mm:ss = 0 h 00 m 00 s + Mean elapsed time per time step in seconds = 0.115157932 + Max norm displacement vector U in all slices (m) = 0.00000000 + Max norm displacement vector U (backward) in all slices (m) = 828566.938 + Time steps done = 5 out of 700 + Time steps remaining = 695 + Estimated remaining time in seconds = 80.0347672 + Estimated remaining time in hh:mm:ss = 0 h 01 m 20 s + Estimated total run time in seconds = 80.6105499 + Estimated total run time in hh:mm:ss = 0 h 01 m 20 s + We have done 0.714285731 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 09:37 + ************************************************************ + **** BEWARE: the above time estimates are not very reliable + **** because fewer than 100 iterations have been performed + ************************************************************ + + Time step # 500 + Time: 14.9499998 seconds + Elapsed time in seconds = 63.784059361999994 + Elapsed time in hh:mm:ss = 0 h 01 m 03 s + Mean elapsed time per time step in seconds = 0.127568126 + Max norm displacement vector U in all slices (m) = 6.67026429E-16 + Max norm displacement vector U (backward) in all slices (m) = 421910.750 + Time steps done = 500 out of 700 + Time steps remaining = 200 + Estimated remaining time in seconds = 25.5136242 + Estimated remaining time in hh:mm:ss = 0 h 00 m 25 s + Estimated total run time in seconds = 89.2976837 + Estimated total run time in hh:mm:ss = 0 h 01 m 29 s + We have done 71.4285736 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 09:37 + + Time step # 700 + Time: 24.9500008 seconds + Elapsed time in seconds = 89.236308331000004 + Elapsed time in hh:mm:ss = 0 h 01 m 29 s + Mean elapsed time per time step in seconds = 0.127480447 + Max norm displacement vector U in all slices (m) = 5.62951702E-16 + Max norm displacement vector U (backward) in all slices (m) = 0.386163443 + Time steps done = 700 out of 700 + Time steps remaining = 0 + Estimated remaining time in seconds = 0.00000000 + Estimated remaining time in hh:mm:ss = 0 h 00 m 00 s + Estimated total run time in seconds = 89.2363052 + Estimated total run time in hh:mm:ss = 0 h 01 m 29 s + We have done 100.000000 % of that + + Writing the seismograms + Total number of time steps written: 700 + Writing the seismograms in parallel took 5.20829999E-05 seconds + + Time loop finished. Timing info: + Total elapsed time in seconds = 89.384531414999998 + Total elapsed time in hh:mm:ss = 0 h 01 m 29 s + + finalizing simulation + + Elastic kernels: + maximum value of rho kernel = 3.18577920E-09 + maximum value of kappa kernel = 9.48201340E-09 + maximum value of mu kernel = 3.89552284E-08 + + maximum value of rho prime kernel = 4.44523067E-08 + maximum value of alpha kernel = 3.07185886E-08 + maximum value of beta kernel = 6.61558914E-08 + + + End of the simulation + diff --git a/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_solver.txt.forward b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_solver.txt.forward new file mode 100644 index 000000000..53cfdee7b --- /dev/null +++ b/EXAMPLES/applications/homogeneous_halfspace_HEX8_elastic_absorbing_Stacey_5sides/REF_KERNEL/output_solver.txt.forward @@ -0,0 +1,446 @@ + ********************************************** + **** Specfem 3-D Solver - MPI version f90 **** + ********************************************** + + Running Git package version of the code: 4.1.1 + which is Git unknown + dating unknown + + + + + There are 4 MPI processes + Processes are numbered from 0 to 3 + + There is a total of 4 slices + + NDIM = 3 + + NGLLX = 5 + NGLLY = 5 + NGLLZ = 5 + + using single precision for the calculations + + smallest and largest possible floating-point numbers are: 1.17549435E-38 3.40282347E+38 + + velocity model: default + + Reading mesh databases... + reads binary mesh files: proc***_external_mesh.bin + from directory : OUTPUT_FILES/DATABASES_MPI + + simulation w/ acoustic domain: F + simulation w/ elastic domain: T + simulation w/ poroelastic domain: F + + slice 0 has: + number of elements acoustic : 0 + number of elements elastic : 5137 + number of elements poroelastic: 0 + done + + total acoustic elements : 0 + total elastic elements : 20736 + total poroelastic elements : 0 + + Mesh resolution: + + ******** + minimum and maximum number of elements + and points in the CUBIT + SCOTCH mesh: + + NSPEC_global_min = 5137 + NSPEC_global_max = 5231 + NSPEC_global_max / NSPEC_global_min imbalance = 1.01829863 = 1.82986176 % + NSPEC_global_sum = 20736 + + NGLOB_global_min = 343557 + NGLOB_global_max = 350025 + NGLOB_global_max / NGLOB_global_min imbalance = 1.01882660 = 1.88265705 % + NGLOB_global_sum = 1387132 + + If you have elements of a single type (all acoustic, all elastic, all poroelastic, and without CPML) + in the whole mesh, then there should be no significant imbalance in the above numbers. + Otherwise, it is normal to have imbalance in elements and points because the domain decomposer + compensates for the different cost of different elements by partitioning them unevenly among processes. + ******** + + + ******** + Model: P velocity min,max = 2800.00000 2800.00000 + Model: S velocity min,max = 1500.00000 1500.00000 + + Model: Poisson's ratio min,max = 0.298747778 0.298747778 + ******** + + ********************************************* + *** Verification of simulation parameters *** + ********************************************* + + *** Xmin and Xmax of the model = 0.00000000 134000.000 + *** Ymin and Ymax of the model = 0.00000000 134000.000 + *** Zmin and Zmax of the model = -60000.0000 0.00000000 + + *** Max GLL point distance = 1227.47656 + *** Min GLL point distance = 642.726562 + *** Max/min ratio = 1.90979588 + + *** Max element size = 3750.00000 + *** Min element size = 3722.21875 + *** Max/min ratio = 1.00746357 + + *** Minimum period resolved = 3.12500000 + *** Maximum suggested time step = 0.109999999 + + *** for DT : 5.0000000000000003E-002 + *** Max stability for wave velocities = 0.217822030 + + Elapsed time for checking mesh resolution in seconds = 1.4015166000000001E-002 + saving VTK files for Courant number and minimum period + + + ****************************************** + There is a total of 4 slices + ****************************************** + + + kd-tree: + total data points: 138699 + theoretical number of nodes: 277388 + tree memory size: 8.46520996 MB + actual number of nodes: 277397 + tree memory size: 8.46548462 MB + maximum depth : 18 + creation timing : 4.61480021E-02 (s) + + + sources: 1 + + ******************** + locating sources + ******************** + + reading source information from ./DATA/CMTSOLUTION file + + no UTM projection + + + source # 1 + source located in slice 1 + in element 2755 + in elastic domain + + using moment tensor source: + xi coordinate of source in that element: 1.0000000000000000 + eta coordinate of source in that element: 1.0000000000000000 + gamma coordinate of source in that element: 1.0000000000000000 + + source time function: + using (quasi) Heaviside source time function + half duration: 5.0000000000000000 seconds + + time shift: 0.0000000000000000 seconds + + magnitude of the source: + scalar moment M0 = 2.6266994498800193E+028 dyne-cm + moment magnitude Mw = 8.2462737794883481 + + original (requested) position of the source: + + latitude: 67000.000000000000 + longitude: 67000.000000000000 + + x: 67000.000000000000 + y: 67000.000000000000 + depth: 30.000000000000000 km + topo elevation: 0.0000000000000000 + + position of the source that will be used: + + x: 67000.000000000000 + y: 67000.000000000000 + depth: 30.000000000000000 km + z: -30000.000000000000 + + error in location of the source: 0.00000000 m + + + + maximum error in location of the sources: 0.00000000 m + + + Elapsed time for detection of sources in seconds = 1.01769576E-02 + + End of source detection - done + + + receivers: + + there are 4 stations in file ./DATA/STATIONS + saving 4 stations inside the model in file ./DATA/STATIONS_FILTERED + excluding 0 stations located outside the model + + Total number of receivers = 4 + + + ******************** + locating receivers + ******************** + + reading receiver information from ./DATA/STATIONS_FILTERED file + + + station # 1 DB X20 + original latitude: 67000.0000 + original longitude: 22732.1406 + original x: 22732.1406 + original y: 67000.0000 + original depth: 0.00000000 m + horizontal distance: 44.2678604 + target x, y, z: 22732.1406 67000.0000 0.00000000 + closest estimate found: 0.00000000 m away + + receiver located in slice 2 + in element 4904 + in elastic domain + at coordinates: + xi = -0.78571608325633080 + eta = 1.0000000000000000 + gamma = 1.0000000000000000 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 22732.139999999999 + y: 67000.000000000000 + depth: 0.0000000000000000 m + z: 0.0000000000000000 + + + + station # 2 DB X30 + original latitude: 67000.0000 + original longitude: 34696.4297 + original x: 34696.4297 + original y: 67000.0000 + original depth: 0.00000000 m + horizontal distance: 32.3035698 + target x, y, z: 34696.4297 67000.0000 0.00000000 + closest estimate found: 7.27595761E-12 m away + + receiver located in slice 2 + in element 4907 + in elastic domain + at coordinates: + xi = -0.35714216451233788 + eta = 1.0000000000000000 + gamma = 1.0000000000000000 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 34696.430000000008 + y: 67000.000000000000 + depth: 0.0000000000000000 m + z: 0.0000000000000000 + + + + station # 3 DB X40 + original latitude: 67000.0000 + original longitude: 46660.7109 + original x: 46660.7109 + original y: 67000.0000 + original depth: 0.00000000 m + horizontal distance: 20.3392906 + target x, y, z: 46660.7109 67000.0000 0.00000000 + closest estimate found: 0.00000000 m away + + receiver located in slice 2 + in element 4910 + in elastic domain + at coordinates: + xi = 7.1425444096846064E-002 + eta = 1.0000000000000000 + gamma = 1.0000000000000000 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 46660.709999999999 + y: 67000.000000000000 + depth: 0.0000000000000000 m + z: 0.0000000000000000 + + + + station # 4 DB X50 + original latitude: 67000.0000 + original longitude: 58625.0000 + original x: 58625.0000 + original y: 67000.0000 + original depth: 0.00000000 m + horizontal distance: 8.37500000 + target x, y, z: 58625.0000 67000.0000 0.00000000 + closest estimate found: 0.00000000 m away + + receiver located in slice 1 + in element 5164 + in elastic domain + at coordinates: + xi = 0.50000052472008893 + eta = 1.0000000000000000 + gamma = 1.0000000000000000 + rotation matrix: + nu1 = 1.00000000 0.00000000 0.00000000 + nu2 = 0.00000000 1.00000000 0.00000000 + nu3 = 0.00000000 0.00000000 1.00000000 + x: 58625.000000000000 + y: 67000.000000000000 + depth: 0.0000000000000000 m + z: 0.0000000000000000 + + + + maximum error in location of all the receivers: 7.27595761E-12 m + + Elapsed time for receiver detection in seconds = 1.27633326E-02 + + End of receiver detection - done + + found a total of 4 receivers in all the slices + + source arrays: + number of sources is 1 + size of source array = 1.43051147E-03 MB + = 1.39698386E-06 GB + + seismograms: + seismograms written by all processes + + Total number of simulation steps (NSTEP) = 700 + writing out seismograms at every NTSTEP_BETWEEN_OUTPUT_SEISMOS = 700 + number of subsampling steps for seismograms = 1 + Total number of samples for seismograms = 700 + + + maximum number of local receivers is 3 in slice 2 + size of maximum seismogram array = 2.40325928E-02 MB + = 2.34693289E-05 GB + + + Total number of samples for seismograms = 700 + + + Simulation setup: + + no acoustic simulation + incorporating elastic simulation + no poroelastic simulation + + no attenuation + no anisotropy + no oceans + no gravity + no movie simulation + + + preparing mass matrices + preparing constants + preparing wavefields + preparing fault simulation + no dynamic faults + no kinematic faults + no fault simulation + preparing gravity + no gravity simulation + preparing Stacey absorbing boundaries + preparing optimized arrays + number of regular shaped elements : 0 + number of irregular shaped elements: 5137 + fused array done + bandwidth test (STREAM TRIAD): + memory accesses = 11.7950935 MB + timing min/max = 5.98000013E-04 s / 6.39792008E-04 s + timing avg = 6.12271018E-04 s + bandwidth = 18.8129864 GB/s + + + Elapsed time for preparing timerun in seconds = 2.6165875000000005E-002 + + ************ + time loop + ************ + scheme: Newmark + + time step: 5.00000007E-02 s + number of time steps: 700 + total simulated time: 35.0000000 seconds + start time: -10.0000000 seconds + + All processes are synchronized before the time loop + + Starting time iteration loop... + + Time step # 5 + Time: -9.80000019 seconds + Elapsed time in seconds = 0.21031925000000001 + Elapsed time in hh:mm:ss = 0 h 00 m 00 s + Mean elapsed time per time step in seconds = 4.20638509E-02 + Max norm displacement vector U in all slices (m) = 2.24574184 + Time steps done = 5 out of 700 + Time steps remaining = 695 + Estimated remaining time in seconds = 29.2343750 + Estimated remaining time in hh:mm:ss = 0 h 00 m 29 s + Estimated total run time in seconds = 29.4446945 + Estimated total run time in hh:mm:ss = 0 h 00 m 29 s + We have done 0.714285731 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 09:35 + ************************************************************ + **** BEWARE: the above time estimates are not very reliable + **** because fewer than 100 iterations have been performed + ************************************************************ + + Time step # 500 + Time: 14.9499998 seconds + Elapsed time in seconds = 25.507467970000000 + Elapsed time in hh:mm:ss = 0 h 00 m 25 s + Mean elapsed time per time step in seconds = 5.10149375E-02 + Max norm displacement vector U in all slices (m) = 828559.188 + Time steps done = 500 out of 700 + Time steps remaining = 200 + Estimated remaining time in seconds = 10.2029867 + Estimated remaining time in hh:mm:ss = 0 h 00 m 10 s + Estimated total run time in seconds = 35.7104568 + Estimated total run time in hh:mm:ss = 0 h 00 m 35 s + We have done 71.4285736 % of that + The run will finish approximately on (in local time): Thu Nov 14, 2024 09:35 + + Time step # 700 + Time: 24.9500008 seconds + Elapsed time in seconds = 35.833851183000000 + Elapsed time in hh:mm:ss = 0 h 00 m 35 s + Mean elapsed time per time step in seconds = 5.11912145E-02 + Max norm displacement vector U in all slices (m) = 828567.250 + Time steps done = 700 out of 700 + Time steps remaining = 0 + Estimated remaining time in seconds = 0.00000000 + Estimated remaining time in hh:mm:ss = 0 h 00 m 00 s + Estimated total run time in seconds = 35.8338509 + Estimated total run time in hh:mm:ss = 0 h 00 m 35 s + We have done 100.000000 % of that + + Writing the seismograms + Total number of time steps written: 700 + Writing the seismograms in parallel took 1.07087921E-02 seconds + + Time loop finished. Timing info: + Total elapsed time in seconds = 35.902537975000001 + Total elapsed time in hh:mm:ss = 0 h 00 m 35 s + + finalizing simulation + + + End of the simulation + diff --git a/Makefile.in b/Makefile.in index b7069d566..53a589136 100644 --- a/Makefile.in +++ b/Makefile.in @@ -273,6 +273,9 @@ GENCODE_AMD_MI250 = --amdgpu-target=gfx90a # NVIDIA default Tesla @COND_HIP_TRUE@@COND_HIP_PLATFORM_NVIDIA_TRUE@GENCODE_HIP = $(GENCODE_30) @COND_HIP_TRUE@@COND_HIP_PLATFORM_NVIDIA_TRUE@HIP_CFLAG_ENDING = # no need for ending +# CPU +@COND_HIP_TRUE@@COND_HIP_PLATFORM_CPU_TRUE@GENCODE_HIP = # no card specifics +@COND_HIP_TRUE@@COND_HIP_PLATFORM_CPU_TRUE@HIP_CFLAG_ENDING = -x c++ # specific targets @COND_HIP_TRUE@@COND_HIP_MI8_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI8) # --with-hip=MI8 .. @@ -297,7 +300,7 @@ HIP_INC = @HIP_CPPFLAGS@ $(MPI_INCLUDES) @COND_HIP_FALSE@HIPCC = @CC@ @COND_HIP_TRUE@HIP_CFLAGS = $(HIP_FLAGS) $(HIP_INC) $(GENCODE_HIP) -@COND_HIP_TRUE@HIP_LINK = @HIP_LDFLAGS@ @HIP_LIBS@ +@COND_HIP_TRUE@HIP_LINK = @HIP_LIBS@ @COND_HIP_FALSE@HIP_CFLAGS = @COND_HIP_FALSE@HIP_LINK = diff --git a/configure b/configure index 0d91216ae..b16992248 100755 --- a/configure +++ b/configure @@ -655,16 +655,17 @@ VTK_INCLUDES VTK_MAJOR OMP_LIB OMP_FCFLAGS +COND_HIP_PLATFORM_CPU_FALSE +COND_HIP_PLATFORM_CPU_TRUE COND_HIP_PLATFORM_NVIDIA_FALSE COND_HIP_PLATFORM_NVIDIA_TRUE COND_HIP_PLATFORM_AMD_FALSE COND_HIP_PLATFORM_AMD_TRUE -HIP_LIBS -HIP_LDFLAGS HIP_CPPFLAGS HIPCONFIG_PROG HIPCC_PROG -HIP_LIB +HIP_PLATFORM +HIP_LIBS HIP_INC HIP_FLAGS HIPCC @@ -912,7 +913,8 @@ CPP HIPCC HIP_FLAGS HIP_INC -HIP_LIB +HIP_LIBS +HIP_PLATFORM OMP_FCFLAGS OMP_LIB HDF5_INC @@ -1621,7 +1623,9 @@ Some influential environment variables: HIPCC AMD HIP compiler command HIP_FLAGS HIP compiler flags HIP_INC Location of HIP include files - HIP_LIB Location of HIP library libhip_hcc + HIP_LIBS HIP linking flags + HIP_PLATFORM + target HIP platform (amd/nvidia/cpu; default amd) OMP_FCFLAGS OpenMP Fortran compiler flags OMP_LIB Location of extra OpenMP libraries HDF5_INC HDF5 include directory @@ -9535,10 +9539,13 @@ then : + # tests HIPCC variable if test x"$HIPCC" = x then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: using default hip compiler: \`hipcc\`" >&5 +printf "%s\n" "$as_me: using default hip compiler: \`hipcc\`" >&6;} HIPCC=hipcc fi @@ -9592,7 +9599,7 @@ fi if test -z "$HIPCC_PROG" ; then - as_fn_error $? "cannot find '$HIPCC' program, please check your PATH." "$LINENO" 5 + as_fn_error $? "cannot find '$HIPCC' program; try setting HIPCC, and please check your PATH." "$LINENO" 5 fi # sets default HIP path @@ -9710,19 +9717,21 @@ printf "%s\n" "$as_me: HIP path: $HIP_PATH" >&6;} #]) # adds default HIP library - if test x"$HIP_LIB" = x -a x"${HIP_PATH}" != x + if test x"$HIP_LIBS" = x -a x"${HIP_PATH}" != x then : - HIP_LIB="${HIP_PATH}/lib" - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: adding default HIP library path: ${HIP_LIB}" >&5 -printf "%s\n" "$as_me: adding default HIP library path: ${HIP_LIB}" >&6;} + HIP_LIBS="-L${HIP_PATH}/lib" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: adding default HIP library path: ${HIP_LIBS}" >&5 +printf "%s\n" "$as_me: adding default HIP library path: ${HIP_LIBS}" >&6;} fi # checks platform - if test x"$HIPCONFIG_PROG" != x ; then - GPU_PLATFORM=`$HIPCONFIG_PROG --platform` - if test x"$GPU_PLATFORM" = xnvidia + if test x"$HIPCONFIG_PROG" != x -a x"$HIP_PLATFORM" = x +then : + + HIP_PLATFORM=`$HIPCONFIG_PROG --platform` + if test x"$HIP_PLATFORM" = xnvidia then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: HIP PLATFORM NVIDIA detected." >&5 @@ -9732,7 +9741,7 @@ printf "%s\n" "$as_me: HIP PLATFORM NVIDIA detected." >&6;} fi - if test x"$GPU_PLATFORM" = xamd + if test x"$HIP_PLATFORM" = xamd then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: HIP PLATFORM AMD detected." >&5 @@ -9745,18 +9754,12 @@ fi case "${HIP_FLAGS}" in *HIP_PLATFORM*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: hip flags contain platform specifier: ${HIP_FLAGS}" >&5 printf "%s\n" "$as_me: hip flags contain platform specifier: ${HIP_FLAGS}" >&6;} ;; - *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: adding flag for platform specifier ${FLAG_PLATFORM}" >&5 -printf "%s\n" "$as_me: adding flag for platform specifier ${FLAG_PLATFORM}" >&6;}; + *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: adding flag for platform specifier: ${FLAG_PLATFORM}" >&5 +printf "%s\n" "$as_me: adding flag for platform specifier: ${FLAG_PLATFORM}" >&6;}; HIP_FLAGS="${HIP_FLAGS} ${FLAG_PLATFORM}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: HIP_FLAGS: ${HIP_FLAGS}" >&5 printf "%s\n" "$as_me: HIP_FLAGS: ${HIP_FLAGS}" >&6;} ;; esac - fi - # sets default platform to AMD if not set - if test x"$GPU_PLATFORM" = x -then : - - GPU_PLATFORM=amd fi @@ -9775,20 +9778,13 @@ printf "%s\n" "$as_me: HIP flags contain position independent code flag -fPIC: $ printf "%s\n" "$as_me: consider adding compilation flag for position independent code: HIP_FLAGS=\"-fPIC\"" >&6;};; esac - # test lib & include - if test x"$HIP_LIB" != x -then : - - HIP_LDFLAGS="-L$HIP_LIB" - -fi + # test include if test x"$HIP_INC" != x then : HIP_CPPFLAGS="-I$HIP_INC" fi - HIP_LIBS="-lamdhip64" # checks header file ac_ext=c @@ -9808,6 +9804,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu HIP_CPPFLAGS="-I$HIP_INC" CFLAGS="$CFLAGS $HIP_CPPFLAGS" fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: using hip compilation flags: $CFLAGS" >&5 +printf "%s\n" "$as_me: using hip compilation flags: $CFLAGS" >&6;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for hip/hip_runtime.h" >&5 printf %s "checking for hip/hip_runtime.h... " >&6; } @@ -9849,17 +9847,9 @@ esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - # checks for HIP library - if test "x$HIP_LIB" != "x"; then - HIP_LDFLAGS="-L$HIP_LIB" - LDFLAGS="$HIP_LDFLAGS $LDFLAGS" - fi - HIP_LIBS="-lamdhip64" - LIBS="$HIP_LIBS $LIBS" - # runs compilation test with hipcc - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking hipcc compilation with hipMalloc in -lamdhip64" >&5 -printf %s "checking hipcc compilation with hipMalloc in -lamdhip64... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for hip compilation with hipMalloc" >&5 +printf %s "checking for hip compilation with hipMalloc... " >&6; } ac_compile='$HIPCC -c $CFLAGS conftest.$ac_ext >&5' cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -9892,16 +9882,41 @@ else case e in #( e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - as_fn_error $? "HIP library function with hipcc compilation failed; try setting HIP_INC." "$LINENO" 5 + as_fn_error $? "HIP library function with hipcc compilation failed; try setting HIP_FLAGS and HIP_INC." "$LINENO" 5 ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + # sets default platform to AMD if not set + if test x"$HIP_PLATFORM" = x +then : + + HIP_PLATFORM=amd + +fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: HIP_PLATFORM: $HIP_PLATFORM" >&5 +printf "%s\n" "$as_me: HIP_PLATFORM: $HIP_PLATFORM" >&6;} + + # linking library + if test x"$HIP_PLATFORM" = xamd +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: adding default '-lamdhip64' linking for HIP platform AMD" >&5 +printf "%s\n" "$as_me: adding default '-lamdhip64' linking for HIP platform AMD" >&6;} + HIP_LIBS="-lamdhip64" + +fi + + # test lib + LIBS="$HIP_LIBS $LIBS $LDFLAGS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: using hip linking flags: $LIBS" >&5 +printf "%s\n" "$as_me: using hip linking flags: $LIBS" >&6;} + # runs linking test with hipcc - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking hipcc linking with hipMalloc in -lamdhip64" >&5 -printf %s "checking hipcc linking with hipMalloc in -lamdhip64... " >&6; } - ac_link='$HIPCC -o conftest$ac_exeext $CFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for hip linking with hipMalloc" >&5 +printf %s "checking for hip linking with hipMalloc... " >&6; } + ac_link='$HIPCC -o conftest$ac_exeext $CFLAGS conftest.$ac_ext $LIBS >&5' cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -9935,7 +9950,7 @@ else case e in #( e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - as_fn_error $? "HIP library linking with HIP failed; try setting HIP_LIB." "$LINENO" 5 + as_fn_error $? "HIP library linking with HIP failed; try setting HIP_LIBS and HIP_PLATFORM." "$LINENO" 5 ;; esac fi @@ -9960,9 +9975,8 @@ ac_compiler_gnu=$ac_cv_fc_compiler_gnu - fi - if test x"$GPU_PLATFORM" = xamd; then + if test x"$HIP_PLATFORM" = xamd; then COND_HIP_PLATFORM_AMD_TRUE= COND_HIP_PLATFORM_AMD_FALSE='#' else @@ -9970,7 +9984,7 @@ else COND_HIP_PLATFORM_AMD_FALSE= fi - if test x"$GPU_PLATFORM" = xnvidia; then + if test x"$HIP_PLATFORM" = xnvidia; then COND_HIP_PLATFORM_NVIDIA_TRUE= COND_HIP_PLATFORM_NVIDIA_FALSE='#' else @@ -9978,6 +9992,13 @@ else COND_HIP_PLATFORM_NVIDIA_FALSE= fi + if test x"$HIP_PLATFORM" = xcpu; then + COND_HIP_PLATFORM_CPU_TRUE= + COND_HIP_PLATFORM_CPU_FALSE='#' +else + COND_HIP_PLATFORM_CPU_TRUE='#' + COND_HIP_PLATFORM_CPU_FALSE= +fi ### @@ -12427,6 +12448,10 @@ if test -z "${COND_HIP_PLATFORM_NVIDIA_TRUE}" && test -z "${COND_HIP_PLATFORM_NV as_fn_error $? "conditional \"COND_HIP_PLATFORM_NVIDIA\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${COND_HIP_PLATFORM_CPU_TRUE}" && test -z "${COND_HIP_PLATFORM_CPU_FALSE}"; then + as_fn_error $? "conditional \"COND_HIP_PLATFORM_CPU\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 diff --git a/configure.ac b/configure.ac index f9207e007..0383e4230 100644 --- a/configure.ac +++ b/configure.ac @@ -539,10 +539,12 @@ AS_IF([test x"$want_hip" != xno], [ AC_ARG_VAR(HIPCC, [AMD HIP compiler command]) AC_ARG_VAR(HIP_FLAGS, [HIP compiler flags]) AC_ARG_VAR(HIP_INC, [Location of HIP include files]) - AC_ARG_VAR(HIP_LIB, [Location of HIP library libhip_hcc]) + AC_ARG_VAR(HIP_LIBS, [HIP linking flags]) + AC_ARG_VAR(HIP_PLATFORM, [target HIP platform (amd/nvidia/cpu; default amd)]) # tests HIPCC variable AS_IF([test x"$HIPCC" = x],[ + AC_MSG_NOTICE([using default hip compiler: `hipcc`]) HIPCC=hipcc ]) @@ -550,7 +552,7 @@ AS_IF([test x"$want_hip" != xno], [ # checks if program in path AC_PATH_PROG(HIPCC_PROG, $HIPCC) if test -z "$HIPCC_PROG" ; then - AC_MSG_ERROR([cannot find '$HIPCC' program, please check your PATH.]) + AC_MSG_ERROR([cannot find '$HIPCC' program; try setting HIPCC, and please check your PATH.]) fi # sets default HIP path @@ -569,21 +571,21 @@ AS_IF([test x"$want_hip" != xno], [ #]) # adds default HIP library - AS_IF([test x"$HIP_LIB" = x -a x"${HIP_PATH}" != x],[ - HIP_LIB="${HIP_PATH}/lib" - AC_MSG_NOTICE([adding default HIP library path: ${HIP_LIB}]) + AS_IF([test x"$HIP_LIBS" = x -a x"${HIP_PATH}" != x],[ + HIP_LIBS="-L${HIP_PATH}/lib" + AC_MSG_NOTICE([adding default HIP library path: ${HIP_LIBS}]) ]) # checks platform - if test x"$HIPCONFIG_PROG" != x ; then - GPU_PLATFORM=`$HIPCONFIG_PROG --platform` - AS_IF([test x"$GPU_PLATFORM" = xnvidia], [ + AS_IF([test x"$HIPCONFIG_PROG" != x -a x"$HIP_PLATFORM" = x],[ + HIP_PLATFORM=`$HIPCONFIG_PROG --platform` + AS_IF([test x"$HIP_PLATFORM" = xnvidia], [ AC_MSG_NOTICE([HIP PLATFORM NVIDIA detected.]) HIP_PLATFORM=nvidia FLAG_PLATFORM="${FC_DEFINE}__HIP_PLATFORM_NVIDIA__" ]) - AS_IF([test x"$GPU_PLATFORM" = xamd], [ + AS_IF([test x"$HIP_PLATFORM" = xamd], [ AC_MSG_NOTICE([HIP PLATFORM AMD detected.]) HIP_PLATFORM=amd FLAG_PLATFORM="${FC_DEFINE}__HIP_PLATFORM_AMD__" @@ -591,14 +593,10 @@ AS_IF([test x"$want_hip" != xno], [ # compiler might require a platform selection flag case "${HIP_FLAGS}" in *HIP_PLATFORM*) AC_MSG_NOTICE([hip flags contain platform specifier: ${HIP_FLAGS}]) ;; - *) AC_MSG_NOTICE([adding flag for platform specifier ${FLAG_PLATFORM}]); + *) AC_MSG_NOTICE([adding flag for platform specifier: ${FLAG_PLATFORM}]); HIP_FLAGS="${HIP_FLAGS} ${FLAG_PLATFORM}" AC_MSG_NOTICE([HIP_FLAGS: ${HIP_FLAGS}]) ;; esac - fi - # sets default platform to AMD if not set - AS_IF([test x"$GPU_PLATFORM" = x],[ - GPU_PLATFORM=amd ]) # for compilation errors like: @@ -614,14 +612,10 @@ AS_IF([test x"$want_hip" != xno], [ *) AC_MSG_NOTICE([consider adding compilation flag for position independent code: HIP_FLAGS="-fPIC"]);; esac - # test lib & include - AS_IF([test x"$HIP_LIB" != x],[ - HIP_LDFLAGS="-L$HIP_LIB" - ]) + # test include AS_IF([test x"$HIP_INC" != x],[ HIP_CPPFLAGS="-I$HIP_INC" ]) - HIP_LIBS="-lamdhip64" # checks header file AC_LANG_PUSH(C) @@ -636,6 +630,7 @@ AS_IF([test x"$want_hip" != xno], [ HIP_CPPFLAGS="-I$HIP_INC" CFLAGS="$CFLAGS $HIP_CPPFLAGS" fi + AC_MSG_NOTICE([using hip compilation flags: $CFLAGS]) AC_MSG_CHECKING([for hip/hip_runtime.h]) ac_compile='$HIPCC -c $CFLAGS conftest.$ac_ext >&5' @@ -648,16 +643,8 @@ AS_IF([test x"$want_hip" != xno], [ AC_MSG_ERROR([HIP runtime header not found; try setting HIP_INC.]) ]) - # checks for HIP library - if test "x$HIP_LIB" != "x"; then - HIP_LDFLAGS="-L$HIP_LIB" - LDFLAGS="$HIP_LDFLAGS $LDFLAGS" - fi - HIP_LIBS="-lamdhip64" - LIBS="$HIP_LIBS $LIBS" - # runs compilation test with hipcc - AC_MSG_CHECKING([hipcc compilation with hipMalloc in -lamdhip64]) + AC_MSG_CHECKING([for hip compilation with hipMalloc]) ac_compile='$HIPCC -c $CFLAGS conftest.$ac_ext >&5' AC_COMPILE_IFELSE([ AC_LANG_PROGRAM([[#include ]],[[void* ptr = 0;hipMalloc(&ptr, 1);]]) @@ -665,12 +652,28 @@ AS_IF([test x"$want_hip" != xno], [ AC_MSG_RESULT(yes) ], [ AC_MSG_RESULT(no) - AC_MSG_ERROR([HIP library function with hipcc compilation failed; try setting HIP_INC.]) + AC_MSG_ERROR([HIP library function with hipcc compilation failed; try setting HIP_FLAGS and HIP_INC.]) + ]) + + # sets default platform to AMD if not set + AS_IF([test x"$HIP_PLATFORM" = x],[ + HIP_PLATFORM=amd + ]) + AC_MSG_NOTICE([HIP_PLATFORM: $HIP_PLATFORM]) + + # linking library + AS_IF([test x"$HIP_PLATFORM" = xamd],[ + AC_MSG_NOTICE([adding default '-lamdhip64' linking for HIP platform AMD]) + HIP_LIBS="-lamdhip64" ]) + # test lib + LIBS="$HIP_LIBS $LIBS $LDFLAGS" + AC_MSG_NOTICE([using hip linking flags: $LIBS]) + # runs linking test with hipcc - AC_MSG_CHECKING([hipcc linking with hipMalloc in -lamdhip64]) - ac_link='$HIPCC -o conftest$ac_exeext $CFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' + AC_MSG_CHECKING([for hip linking with hipMalloc]) + ac_link='$HIPCC -o conftest$ac_exeext $CFLAGS conftest.$ac_ext $LIBS >&5' AC_LINK_IFELSE([ AC_LANG_PROGRAM([[ #include @@ -679,7 +682,7 @@ AS_IF([test x"$want_hip" != xno], [ AC_MSG_RESULT(yes) ],[ AC_MSG_RESULT(no) - AC_MSG_ERROR([HIP library linking with HIP failed; try setting HIP_LIB.]) + AC_MSG_ERROR([HIP library linking with HIP failed; try setting HIP_LIBS and HIP_PLATFORM.]) ]) #HIP_HEADER_H="hip/hip_runtime.h" @@ -694,12 +697,11 @@ AS_IF([test x"$want_hip" != xno], [ # export flags AC_SUBST([HIP_CPPFLAGS]) - AC_SUBST([HIP_LDFLAGS]) AC_SUBST([HIP_LIBS]) ]) -AM_CONDITIONAL([COND_HIP_PLATFORM_AMD], [test x"$GPU_PLATFORM" = xamd]) -AM_CONDITIONAL([COND_HIP_PLATFORM_NVIDIA], [test x"$GPU_PLATFORM" = xnvidia]) - +AM_CONDITIONAL([COND_HIP_PLATFORM_AMD], [test x"$HIP_PLATFORM" = xamd]) +AM_CONDITIONAL([COND_HIP_PLATFORM_NVIDIA], [test x"$HIP_PLATFORM" = xnvidia]) +AM_CONDITIONAL([COND_HIP_PLATFORM_CPU], [test x"$HIP_PLATFORM" = xcpu]) ### ### OpenMP diff --git a/external_libs/README.md b/external_libs/README.md index 98095444b..d60838a0c 100644 --- a/external_libs/README.md +++ b/external_libs/README.md @@ -8,5 +8,7 @@ For further informations, see: - [METIS](http://glaros.dtc.umn.edu/gkhome/metis/metis/overview) - [PaToH](https://www.cc.gatech.edu/~umit/software.html) +for GPU HIP version testing on CPUs: +- [HIP-CPU](https://github.com/ROCm/HIP-CPU) diff --git a/external_libs/ROCm-HIP-CPU b/external_libs/ROCm-HIP-CPU new file mode 160000 index 000000000..e112c9350 --- /dev/null +++ b/external_libs/ROCm-HIP-CPU @@ -0,0 +1 @@ +Subproject commit e112c935057434897bb12d9ab3910380a8bd5f58 diff --git a/src/gpu/compute_add_sources_acoustic_cuda.cu b/src/gpu/compute_add_sources_acoustic_cuda.cu index 9f3fdce31..19c3cbbb0 100644 --- a/src/gpu/compute_add_sources_acoustic_cuda.cu +++ b/src/gpu/compute_add_sources_acoustic_cuda.cu @@ -230,7 +230,7 @@ void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda, int it_index = *NTSTEP_BETWEEN_READ_ADJSRC - (*it-1) % *NTSTEP_BETWEEN_READ_ADJSRC - 1 ; // copies extracted array values onto GPU - if ( (*it-1) % *NTSTEP_BETWEEN_READ_ADJSRC==0){ + if ( (*it-1) % *NTSTEP_BETWEEN_READ_ADJSRC == 0){ // note: field declaration is only equal to realw if NB_RUNS_ACOUSTIC_GPU == 1. // for any other setting of NB_RUNS_ACOUSTIC_GPU, the compilation would fail for // gpuMemcpy_todevice_field(mp->d_source_adjoint,h_source_adjoint,mp->nadj_rec_local*NDIM*(*NTSTEP_BETWEEN_READ_ADJSRC)); diff --git a/src/gpu/compute_add_sources_viscoelastic_cuda.cu b/src/gpu/compute_add_sources_viscoelastic_cuda.cu index e525793cd..b0b084173 100644 --- a/src/gpu/compute_add_sources_viscoelastic_cuda.cu +++ b/src/gpu/compute_add_sources_viscoelastic_cuda.cu @@ -60,6 +60,9 @@ void FC_FUNC_(compute_add_sources_el_cuda, // get_stf_for_gpu(stf_pre_compute,h_stf_pre_compute,run_number_of_the_source,NSOURCES); // however, NB_RUNS_ACOUSTIC_GPU > 1 is not supported by elastic sources, thus field declaration is realw by default. // to avoid compilation issues we use the copy function for (void*) and exact byte size. + // safety check + if (NB_RUNS_ACOUSTIC_GPU != 1) exit_on_error("compute_add_sources_el_cuda: must have NB_RUNS_ACOUSTIC_GPU = 1 for elastic sources\n"); + gpuMemcpy_todevice_void((void*)mp->d_stf_pre_compute,(void*)stf_pre_compute,NSOURCES*sizeof(realw)); free(stf_pre_compute); @@ -74,7 +77,8 @@ void FC_FUNC_(compute_add_sources_el_cuda, #ifdef USE_CUDA if (run_cuda){ - compute_add_sources_kernel<<compute_stream>>>(mp->d_accel,mp->d_ibool, + compute_add_sources_kernel<<compute_stream>>>(mp->d_accel, + mp->d_ibool, mp->d_sourcearrays, mp->d_stf_pre_compute, mp->myrank, @@ -86,7 +90,8 @@ void FC_FUNC_(compute_add_sources_el_cuda, #ifdef USE_HIP if (run_hip){ hipLaunchKernelGGL(compute_add_sources_kernel, dim3(grid), dim3(threads), 0, mp->compute_stream, - mp->d_accel,mp->d_ibool, + mp->d_accel, + mp->d_ibool, mp->d_sourcearrays, mp->d_stf_pre_compute, mp->myrank, @@ -124,6 +129,9 @@ void FC_FUNC_(compute_add_sources_el_s3_cuda, // get_stf_for_gpu(stf_pre_compute,h_stf_pre_compute,run_number_of_the_source,NSOURCES); // however, NB_RUNS_ACOUSTIC_GPU > 1 is not supported by elastic sources, thus field declaration is realw by default. // to avoid compilation issues we use the copy function for (void*) and exact byte size. + // safety check + if (NB_RUNS_ACOUSTIC_GPU != 1) exit_on_error("compute_add_sources_el_s3_cuda: must have NB_RUNS_ACOUSTIC_GPU = 1 for elastic sources\n"); + gpuMemcpy_todevice_void((void*)mp->d_stf_pre_compute,(void*)stf_pre_compute,NSOURCES*sizeof(realw)); free(stf_pre_compute); @@ -138,7 +146,8 @@ void FC_FUNC_(compute_add_sources_el_s3_cuda, #ifdef USE_CUDA if (run_cuda){ - compute_add_sources_kernel<<compute_stream>>>(mp->d_b_accel,mp->d_ibool, + compute_add_sources_kernel<<compute_stream>>>(mp->d_b_accel, + mp->d_ibool, mp->d_sourcearrays, mp->d_stf_pre_compute, mp->myrank, @@ -150,7 +159,8 @@ void FC_FUNC_(compute_add_sources_el_s3_cuda, #ifdef USE_HIP if (run_hip){ hipLaunchKernelGGL(compute_add_sources_kernel, dim3(grid), dim3(threads), 0, mp->compute_stream, - mp->d_accel,mp->d_ibool, + mp->d_b_accel, + mp->d_ibool, mp->d_sourcearrays, mp->d_stf_pre_compute, mp->myrank, @@ -248,13 +258,16 @@ void FC_FUNC_(add_sources_el_sim_type_2_or_3, int it_index = *NTSTEP_BETWEEN_READ_ADJSRC - (*it-1) % *NTSTEP_BETWEEN_READ_ADJSRC - 1 ; // copies extracted array values onto GPU - if ( (*it-1) % *NTSTEP_BETWEEN_READ_ADJSRC==0){ + if ( (*it-1) % *NTSTEP_BETWEEN_READ_ADJSRC == 0){ // note: field declaration is only equal to realw if NB_RUNS_ACOUSTIC_GPU == 1. // for any other setting of NB_RUNS_ACOUSTIC_GPU, the compilation would fail for // gpuMemcpy_todevice_field(mp->d_source_adjoint,h_source_adjoint,mp->nadj_rec_local*NDIM*(*NTSTEP_BETWEEN_READ_ADJSRC)); // since the host array is still defined as realw. in that case, we will need to construct a field array first. // however, the case with NB_RUNS_ACOUSTIC_GPU > 1 is not fully implemented yet for adjoint/kernels simulations and elastic cases, // and on the todo for future ... + // safety check + if (NB_RUNS_ACOUSTIC_GPU != 1) exit_on_error("add_sources_el_sim_type_2_or_3: must have NB_RUNS_ACOUSTIC_GPU = 1 for elastic sources\n"); + // copies adjoint source array onto GPU using (void*) as variable and actual byte size to avoid compilation errors gpuMemcpy_todevice_void((void*)mp->d_source_adjoint,(void*)h_source_adjoint, mp->nadj_rec_local*NDIM*(*NTSTEP_BETWEEN_READ_ADJSRC)*sizeof(realw)); @@ -285,7 +298,7 @@ void FC_FUNC_(add_sources_el_sim_type_2_or_3, mp->d_hgammar_adj, mp->d_ibool, mp->d_ispec_is_elastic, - mp->d_ispec_selected_rec_loc, + mp->d_ispec_selected_adjrec_loc, mp->nadj_rec_local); } #endif diff --git a/src/gpu/compute_coupling_cuda.cu b/src/gpu/compute_coupling_cuda.cu index a4a8d7bda..ad24ec9a8 100644 --- a/src/gpu/compute_coupling_cuda.cu +++ b/src/gpu/compute_coupling_cuda.cu @@ -35,6 +35,9 @@ /* ----------------------------------------------------------------------------------------------- */ +// coupling direction: elastic wavefield/domain -> acoustic wavefield/domain +// (updates acoustic potential_dot_dot wavefield) + extern EXTERN_LANG void FC_FUNC_(compute_coupling_ac_el_cuda, COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer, @@ -90,7 +93,7 @@ void FC_FUNC_(compute_coupling_ac_el_cuda, // launches GPU kernel #ifdef USE_CUDA if (run_cuda){ - compute_coupling_acoustic_el_kernel<<>>(displ, + compute_coupling_acoustic_el_kernel<<compute_stream>>>(displ, potential_dot_dot, num_coupling_ac_el_faces, mp->d_coupling_ac_el_ispec, @@ -104,7 +107,7 @@ void FC_FUNC_(compute_coupling_ac_el_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_coupling_acoustic_el_kernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_coupling_acoustic_el_kernel, dim3(grid), dim3(threads), 0, mp->compute_stream, displ, potential_dot_dot, num_coupling_ac_el_faces, @@ -131,6 +134,9 @@ void FC_FUNC_(compute_coupling_ac_el_cuda, /* ----------------------------------------------------------------------------------------------- */ +// coupling direction: acoustic wavefield/domain -> elastic wavefield/domain +// (updates elastic acceleration wavefield) + extern EXTERN_LANG void FC_FUNC_(compute_coupling_el_ac_cuda, COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer, @@ -189,7 +195,7 @@ void FC_FUNC_(compute_coupling_el_ac_cuda, // launches GPU kernel #ifdef USE_CUDA if (run_cuda){ - compute_coupling_elastic_ac_kernel<<>>(potential_dot_dot, + compute_coupling_elastic_ac_kernel<<compute_stream>>>(potential_dot_dot, accel, num_coupling_ac_el_faces, mp->d_coupling_ac_el_ispec, @@ -207,7 +213,7 @@ void FC_FUNC_(compute_coupling_el_ac_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_coupling_elastic_ac_kernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_coupling_elastic_ac_kernel, dim3(grid), dim3(threads), 0, mp->compute_stream, potential_dot_dot, accel, num_coupling_ac_el_faces, @@ -248,7 +254,7 @@ void FC_FUNC_(compute_coupling_ocean_cuda, // safety check if (*FORWARD_OR_ADJOINT != 1 && *FORWARD_OR_ADJOINT != 3) { - exit_on_error("Error invalid FORWARD_OR_ADJOINT in update_displacement_ac_cuda() routine"); + exit_on_error("Error invalid FORWARD_OR_ADJOINT in compute_coupling_ocean_cuda() routine"); } // checks if anything to do diff --git a/src/gpu/compute_forces_viscoelastic_cuda.cu b/src/gpu/compute_forces_viscoelastic_cuda.cu index dd2d90f2c..1cfb0a512 100644 --- a/src/gpu/compute_forces_viscoelastic_cuda.cu +++ b/src/gpu/compute_forces_viscoelastic_cuda.cu @@ -145,7 +145,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, } // cuda kernel call - if (ATTENUATION ){ + if (ATTENUATION){ TRACE("\tKernel_2: Kernel_2_att_impl"); // compute kernels with attenuation // forward wavefields -> FORWARD_OR_ADJOINT == 1 @@ -443,7 +443,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, // backward/reconstructed wavefields -> FORWARD_OR_ADJOINT == 3 #ifdef USE_CUDA if (run_cuda){ - Kernel_2_noatt_ani_impl<<< grid,threads,0,mp->compute_stream>>>( nb_blocks_to_compute, + Kernel_2_noatt_ani_impl<<compute_stream>>>( nb_blocks_to_compute, d_ibool, mp->d_phase_ispec_inner_elastic,mp->num_phase_ispec_elastic, d_iphase, @@ -604,7 +604,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, // backward/reconstructed wavefields -> FORWARD_OR_ADJOINT == 3 #ifdef USE_CUDA if (run_cuda){ - Kernel_2_noatt_iso_grav_impl<<< grid,threads,0,mp->compute_stream>>>(nb_blocks_to_compute, + Kernel_2_noatt_iso_grav_impl<<compute_stream>>>(nb_blocks_to_compute, d_ibool, mp->d_phase_ispec_inner_elastic,mp->num_phase_ispec_elastic, d_iphase, @@ -736,7 +736,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, // backward/reconstructed wavefields -> FORWARD_OR_ADJOINT == 3 #ifdef USE_CUDA if (run_cuda){ - Kernel_2_noatt_iso_col_impl<<< grid,threads,0,mp->compute_stream>>>( nb_blocks_to_compute, + Kernel_2_noatt_iso_col_impl<<compute_stream>>>( nb_blocks_to_compute, d_ibool, mp->d_phase_ispec_inner_elastic,mp->num_phase_ispec_elastic, d_iphase, @@ -856,7 +856,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, // backward/reconstructed wavefields -> FORWARD_OR_ADJOINT == 3 #ifdef USE_CUDA if (run_cuda){ - Kernel_2_noatt_iso_strain_impl<<< grid,threads,0,mp->compute_stream>>>(nb_blocks_to_compute, + Kernel_2_noatt_iso_strain_impl<<compute_stream>>>(nb_blocks_to_compute, d_ibool, mp->d_phase_ispec_inner_elastic, mp->num_phase_ispec_elastic, @@ -1023,7 +1023,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, // backward/reconstructed wavefields -> FORWARD_OR_ADJOINT == 3 #ifdef USE_CUDA if (run_cuda){ - Kernel_2_noatt_iso_impl<<< grid,threads,0,mp->compute_stream>>>(nb_blocks_to_compute, + Kernel_2_noatt_iso_impl<<compute_stream>>>(nb_blocks_to_compute, d_ibool, mp->d_phase_ispec_inner_elastic, mp->num_phase_ispec_elastic, diff --git a/src/gpu/compute_kernels_cuda.cu b/src/gpu/compute_kernels_cuda.cu index ed9e6bc25..6b9669555 100644 --- a/src/gpu/compute_kernels_cuda.cu +++ b/src/gpu/compute_kernels_cuda.cu @@ -59,7 +59,7 @@ void FC_FUNC_(compute_kernels_elastic_cuda, // backward/reconstructed wavefield strain will be re-computed locally here #ifdef USE_CUDA if (run_cuda){ - compute_element_strain_cudakernel<<>>(mp->d_ispec_is_elastic,mp->d_ibool, + compute_element_strain_cudakernel<<compute_stream>>>(mp->d_ispec_is_elastic,mp->d_ibool, mp->d_b_displ, mp->d_b_epsilondev_xx, mp->d_b_epsilondev_yy, @@ -79,7 +79,7 @@ void FC_FUNC_(compute_kernels_elastic_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_element_strain_cudakernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_element_strain_cudakernel, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_ispec_is_elastic,mp->d_ibool, mp->d_b_displ, mp->d_b_epsilondev_xx, @@ -105,7 +105,7 @@ void FC_FUNC_(compute_kernels_elastic_cuda, // anisotropic kernel #ifdef USE_CUDA if (run_cuda){ - compute_kernels_ani_cudakernel<<>>(mp->d_ispec_is_elastic,mp->d_ibool, + compute_kernels_ani_cudakernel<<compute_stream>>>(mp->d_ispec_is_elastic,mp->d_ibool, mp->d_accel, mp->d_b_displ, mp->d_epsilondev_xx, mp->d_epsilondev_yy, @@ -127,7 +127,7 @@ void FC_FUNC_(compute_kernels_elastic_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_kernels_ani_cudakernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_kernels_ani_cudakernel, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_ispec_is_elastic,mp->d_ibool, mp->d_accel, mp->d_b_displ, mp->d_epsilondev_xx, @@ -153,7 +153,7 @@ void FC_FUNC_(compute_kernels_elastic_cuda, // isotropic kernel #ifdef USE_CUDA if (run_cuda){ - compute_kernels_cudakernel<<>>(mp->d_ispec_is_elastic,mp->d_ibool, + compute_kernels_cudakernel<<compute_stream>>>(mp->d_ispec_is_elastic,mp->d_ibool, mp->d_accel, mp->d_b_displ, mp->d_epsilondev_xx, mp->d_epsilondev_yy, @@ -176,7 +176,7 @@ void FC_FUNC_(compute_kernels_elastic_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_kernels_cudakernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_kernels_cudakernel, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_ispec_is_elastic,mp->d_ibool, mp->d_accel, mp->d_b_displ, mp->d_epsilondev_xx, @@ -234,7 +234,7 @@ void FC_FUNC_(compute_kernels_strgth_noise_cu, #ifdef USE_CUDA if (run_cuda){ - compute_kernels_strength_noise_cuda_kernel<<>>(mp->d_displ, + compute_kernels_strength_noise_cuda_kernel<<compute_stream>>>(mp->d_displ, mp->d_free_surface_ispec, mp->d_free_surface_ijk, mp->d_ibool, @@ -248,7 +248,7 @@ void FC_FUNC_(compute_kernels_strgth_noise_cu, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_kernels_strength_noise_cuda_kernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_kernels_strength_noise_cuda_kernel, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_displ, mp->d_free_surface_ispec, mp->d_free_surface_ijk, @@ -293,7 +293,7 @@ void FC_FUNC_(compute_kernels_acoustic_cuda, #ifdef USE_CUDA if (run_cuda){ - compute_kernels_acoustic_kernel<<>>(mp->d_ispec_is_acoustic, + compute_kernels_acoustic_kernel<<compute_stream>>>(mp->d_ispec_is_acoustic, mp->d_ibool, mp->d_rhostore, mp->d_hprime_xx, @@ -315,7 +315,7 @@ void FC_FUNC_(compute_kernels_acoustic_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_kernels_acoustic_kernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_kernels_acoustic_kernel, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_ispec_is_acoustic, mp->d_ibool, mp->d_rhostore, @@ -368,7 +368,7 @@ void FC_FUNC_(compute_kernels_hess_cuda, if (*ELASTIC_SIMULATION) { #ifdef USE_CUDA if (run_cuda){ - compute_kernels_hess_el_cudakernel<<>>(mp->d_ispec_is_elastic, + compute_kernels_hess_el_cudakernel<<compute_stream>>>(mp->d_ispec_is_elastic, mp->d_ibool, mp->d_accel, mp->d_b_accel, @@ -389,7 +389,7 @@ void FC_FUNC_(compute_kernels_hess_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_kernels_hess_el_cudakernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_kernels_hess_el_cudakernel, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_ispec_is_elastic, mp->d_ibool, mp->d_accel, @@ -414,7 +414,7 @@ void FC_FUNC_(compute_kernels_hess_cuda, if (*ACOUSTIC_SIMULATION) { #ifdef USE_CUDA if (run_cuda){ - compute_kernels_hess_ac_cudakernel<<>>(mp->d_ispec_is_acoustic, + compute_kernels_hess_ac_cudakernel<<compute_stream>>>(mp->d_ispec_is_acoustic, mp->d_ibool, mp->d_potential_dot_dot_acoustic, mp->d_b_potential_dot_dot_acoustic, @@ -437,7 +437,7 @@ void FC_FUNC_(compute_kernels_hess_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_kernels_hess_ac_cudakernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_kernels_hess_ac_cudakernel, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_ispec_is_acoustic, mp->d_ibool, mp->d_potential_dot_dot_acoustic, diff --git a/src/gpu/compute_stacey_acoustic_cuda.cu b/src/gpu/compute_stacey_acoustic_cuda.cu index 3a419eaf6..ea525e66b 100644 --- a/src/gpu/compute_stacey_acoustic_cuda.cu +++ b/src/gpu/compute_stacey_acoustic_cuda.cu @@ -86,7 +86,7 @@ void FC_FUNC_(compute_stacey_acoustic_cuda, // combined forward/backward fields #ifdef USE_CUDA if (run_cuda){ - compute_stacey_acoustic_kernel<<>>(mp->d_potential_dot_acoustic, + compute_stacey_acoustic_kernel<<compute_stream>>>(mp->d_potential_dot_acoustic, mp->d_potential_dot_dot_acoustic, mp->d_abs_boundary_ispec, mp->d_abs_boundary_ijk, @@ -105,7 +105,7 @@ void FC_FUNC_(compute_stacey_acoustic_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_stacey_acoustic_kernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_stacey_acoustic_kernel, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_potential_dot_acoustic, mp->d_potential_dot_dot_acoustic, mp->d_abs_boundary_ispec, @@ -138,7 +138,7 @@ void FC_FUNC_(compute_stacey_acoustic_cuda, // single forward or backward fields #ifdef USE_CUDA if (run_cuda){ - compute_stacey_acoustic_single_kernel<<>>(potential_dot, + compute_stacey_acoustic_single_kernel<<compute_stream>>>(potential_dot, potential_dot_dot, mp->d_abs_boundary_ispec, mp->d_abs_boundary_ijk, @@ -157,7 +157,7 @@ void FC_FUNC_(compute_stacey_acoustic_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_stacey_acoustic_single_kernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_stacey_acoustic_single_kernel, dim3(grid), dim3(threads), 0 ,mp->compute_stream, potential_dot, potential_dot_dot, mp->d_abs_boundary_ispec, @@ -179,7 +179,10 @@ void FC_FUNC_(compute_stacey_acoustic_cuda, // adjoint simulations: stores absorbed wavefield part if (mp->simulation_type == 1 && mp->save_forward){ + // explicitly wait until compute stream is done // (cudaMemcpy implicitly synchronizes all other cuda operations) + gpuStreamSynchronize(mp->compute_stream); + // copies array to CPU gpuMemcpy_tohost_void((void*)h_b_absorb_potential,(void*)mp->d_b_absorb_potential,mp->d_b_reclen_potential); } @@ -244,7 +247,7 @@ void FC_FUNC_(compute_stacey_acoustic_undoatt_cuda, // undoatt: single forward or backward fields #ifdef USE_CUDA if (run_cuda){ - compute_stacey_acoustic_undoatt_kernel<<>>(potential_dot, + compute_stacey_acoustic_undoatt_kernel<<compute_stream>>>(potential_dot, potential_dot_dot, mp->d_abs_boundary_ispec, mp->d_abs_boundary_ijk, @@ -259,7 +262,7 @@ void FC_FUNC_(compute_stacey_acoustic_undoatt_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(compute_stacey_acoustic_undoatt_kernel, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(compute_stacey_acoustic_undoatt_kernel, dim3(grid), dim3(threads), 0, mp->compute_stream, potential_dot, potential_dot_dot, mp->d_abs_boundary_ispec, diff --git a/src/gpu/kernels/Kernel_2_acoustic_impl.cu b/src/gpu/kernels/Kernel_2_acoustic_impl.cu index 4101e12cb..d08c67349 100644 --- a/src/gpu/kernels/Kernel_2_acoustic_impl.cu +++ b/src/gpu/kernels/Kernel_2_acoustic_impl.cu @@ -81,7 +81,7 @@ extern realw_texture d_hprime_xx_tex; // - hiding memory latency: to minimize waiting times to retrieve a memory value from global memory, we put // some more calculations into the same code block before calling syncthreads(). this should help the // compiler to move independent calculations to wherever it can overlap it with memory access operations. -// note, especially the if (gravity )-block locations are very sensitive +// note, especially the if (gravity)-block locations are very sensitive // for optimal register usage and compiler optimizations // @@ -110,26 +110,60 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, realw* d_gammax,realw* d_gammay,realw* d_gammaz, const realw xix_regular, const realw jacobian_regular, realw_const_p d_hprime_xx, - realw_const_p hprimewgll_xx, - realw_const_p wgllwgll_xy,realw_const_p wgllwgll_xz,realw_const_p wgllwgll_yz, - realw* d_rhostore, + realw_const_p d_hprimewgll_xx, + realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz, + realw_const_p d_rhostore, const int use_mesh_coloring_gpu, const int gravity, realw_const_p minus_g, realw* d_kappastore, realw_const_p wgll_cube){ +// arithmetic intensity: ratio of number-of-arithmetic-operations / number-of-bytes-accessed-on-DRAM +// +// hand-counts on floating-point operations: counts addition/subtraction/multiplication/division +// no counts for operations on indices in for-loops (compiler will likely unrool loops) +// +// counts accesses to global memory, but no shared memory or register loads/stores +// float has 4 bytes + +// counts: for simulations without gravity, without mesh_coloring +// counts floating-point operations (FLOP) per thread +// counts global memory accesses in bytes (BYTES) per block +// 2 FLOP +// +// 0 BYTES + // block-id == number of local element id in phase_ispec array int bx = blockIdx.y*gridDim.x+blockIdx.x; + // checks if anything to do + if (bx >= nb_blocks_to_compute) return; + // thread-id == GLL node id // note: use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads, // because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses; // to avoid execution branching and the need of registers to store an active state variable, // the thread ids are put in valid range int tx = threadIdx.x; + // limits thread ids to range [0,125-1] + if (tx >= NGLL3) tx = NGLL3-1; + +// counts: +// + 1 FLOP +// +// + 0 BYTE + + // local index + int K = (tx/NGLL2); + int J = ((tx-K*NGLL2)/NGLLX); + int I = (tx-K*NGLL2-J*NGLLX); + +// counts: +// + 8 FLOP +// +// + 0 BYTES - int I,J,K; int iglob,offset; int working_element,ispec_irreg; @@ -153,38 +187,13 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, __shared__ realw sh_hprime_xx[NGLL2]; __shared__ realw sh_hprimewgll_xx[NGLL2]; -// arithmetic intensity: ratio of number-of-arithmetic-operations / number-of-bytes-accessed-on-DRAM -// -// hand-counts on floating-point operations: counts addition/subtraction/multiplication/division -// no counts for operations on indices in for-loops (compiler will likely unrool loops) -// -// counts accesses to global memory, but no shared memory or register loads/stores -// float has 4 bytes - -// counts: for simulations without gravity, without mesh_coloring -// counts floating-point operations (FLOP) per thread -// counts global memory accesses in bytes (BYTES) per block -// 2 FLOP -// -// 0 BYTES - - // checks if anything to do - if (bx >= nb_blocks_to_compute) return; - - // limits thread ids to range [0,125-1] - if (tx >= NGLL3) tx = NGLL3-1; - -// counts: -// + 1 FLOP -// -// + 0 BYTE // spectral-element id #ifdef USE_MESH_COLORING_GPU working_element = bx; #else //mesh coloring - if (use_mesh_coloring_gpu ){ + if (use_mesh_coloring_gpu){ working_element = bx; }else{ // iphase-1 and working_element-1 for Fortran->C array conventions @@ -192,9 +201,11 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, } #endif + ispec_irreg = d_irregular_element_number[working_element] - 1; + // local padded index offset = working_element*NGLL3_PADDED + tx; - ispec_irreg = d_irregular_element_number[working_element] - 1; + // global index iglob = d_ibool[offset] - 1; @@ -222,18 +233,7 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, // + 1 float * 125 threads = 500 BYTE // gravity - if (gravity ) kappa_invl = 1.f / d_kappastore[working_element*NGLL3 + tx]; - - - // local index - K = (tx/NGLL2); - J = ((tx-K*NGLL2)/NGLLX); - I = (tx-K*NGLL2-J*NGLLX); - -// counts: -// + 8 FLOP -// -// + 0 BYTES + if (gravity) kappa_invl = 1.f / d_kappastore[working_element*NGLL3 + tx]; // note: loads mesh values here to give compiler possibility to overlap memory fetches with some computations; // arguments defined as realw* instead of const realw* __restrict__ to avoid that the compiler @@ -258,6 +258,7 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, } // density (reciproc) + //rhol = get_global_cr( &d_rhostore[working_element*NGLL3_PADDED + tx] ); rho_invl = 1.f / d_rhostore[offset]; // counts: @@ -273,7 +274,7 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, sh_hprime_xx[tx] = d_hprime_xx[tx]; #endif // loads hprimewgll into shared memory - sh_hprimewgll_xx[tx] = hprimewgll_xx[tx]; + sh_hprimewgll_xx[tx] = d_hprimewgll_xx[tx]; } // counts: @@ -287,9 +288,9 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, __syncthreads(); // summed terms with added gll weights - fac1 = wgllwgll_yz[K*NGLLX+J]; - fac2 = wgllwgll_xz[K*NGLLX+I]; - fac3 = wgllwgll_xy[J*NGLLX+I]; + fac1 = d_wgllwgll_yz[K*NGLLX+J]; + fac2 = d_wgllwgll_xz[K*NGLLX+I]; + fac3 = d_wgllwgll_xy[J*NGLLX+I]; // We make a loop over direct and adjoint wavefields inside the GPU kernel to increase arithmetic intensity for (int k = 0 ; k < nb_field ; k++){ @@ -340,12 +341,18 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, } } // pre-computes gravity sum term - if (gravity ){ + if (gravity){ // uses potential definition: s = grad(chi) // // gravity term: 1/kappa grad(chi) * g // assumes that g only acts in (negative) z-direction - gravity_term = minus_g[iglob] * kappa_invl * jacobianl * wgll_cube[tx] * dpotentialdzl; + if (threadIdx.x < NGLL3) { + if (ispec_irreg >= 0){ + gravity_term = minus_g[iglob] * kappa_invl * jacobianl * wgll_cube[tx] * dpotentialdzl; + }else{ + gravity_term = minus_g[iglob] * kappa_invl * jacobian_regular * wgll_cube[tx] * xix_regular*temp3l; + } + } } // counts: @@ -402,7 +409,7 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, #endif // USE_TEXTURES_FIELDS #else // MESH_COLORING //mesh coloring - if (use_mesh_coloring_gpu ){ + if (use_mesh_coloring_gpu){ // no atomic operation needed, colors don't share global points between elements #ifdef USE_TEXTURES_FIELDS if (k==0) d_potential_dot_dot_acoustic[iglob] = texfetch_potential_dot_dot(iglob) + sum_terms; @@ -525,9 +532,9 @@ template __global__ void Kernel_2_acoustic_impl<1>(const int nb_blocks_to_comput realw* d_gammax,realw* d_gammay,realw* d_gammaz, const realw xix_regular, const realw jacobian_regular, realw_const_p d_hprime_xx, - realw_const_p hprimewgll_xx, - realw_const_p wgllwgll_xy,realw_const_p wgllwgll_xz,realw_const_p wgllwgll_yz, - realw* d_rhostore, + realw_const_p d_hprimewgll_xx, + realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz, + realw_const_p d_rhostore, const int use_mesh_coloring_gpu, const int gravity, realw_const_p minus_g, @@ -558,9 +565,9 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, const int* d_irregular_element_number, const realw xix_regular, const realw jacobian_regular, realw_const_p d_hprime_xx, - realw_const_p hprimewgll_xx, - realw_const_p wgllwgll_xy,realw_const_p wgllwgll_xz,realw_const_p wgllwgll_yz, - realw* d_rhostore, + realw_const_p d_hprimewgll_xx, + realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz, + realw_const_p d_rhostore, const int use_mesh_coloring_gpu, const int gravity, realw_const_p minus_g, @@ -571,14 +578,23 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, // block-id == number of local element id in phase_ispec array int bx = blockIdx.y*gridDim.x+blockIdx.x; + // checks if anything to do + if (bx >= nb_blocks_to_compute) return; + // thread-id == GLL node id // note: use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads, // because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses; // to avoid execution branching and the need of registers to store an active state variable, // the thread ids are put in valid range int tx = threadIdx.x; + // limits thread ids to range [0,125-1] + if (tx >= NGLL3) tx = NGLL3-1; + + // local index + int K = (tx/NGLL2); + int J = ((tx-K*NGLL2)/NGLLX); + int I = (tx-K*NGLL2-J*NGLLX); - int I,J,K; int iglob,offset; int working_element,ispec_irreg; @@ -602,28 +618,24 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, __shared__ realw sh_hprime_xx[NGLL2]; __shared__ realw sh_hprimewgll_xx[NGLL2]; - // checks if anything to do - if (bx >= nb_blocks_to_compute) return; - - // limits thread ids to range [0,125-1] - if (tx >= NGLL3) tx = NGLL3-1; - // spectral-element id #ifdef USE_MESH_COLORING_GPU working_element = bx; #else //mesh coloring - if (use_mesh_coloring_gpu ){ + if (use_mesh_coloring_gpu){ working_element = bx; }else{ // iphase-1 and working_element-1 for Fortran->C array conventions - working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)]-1; + working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)] - 1; } #endif + ispec_irreg = d_irregular_element_number[working_element] - 1; + // local padded index offset = working_element*NGLL3_PADDED + tx; - ispec_irreg = d_irregular_element_number[working_element] - 1; + // global index iglob = d_ibool[offset] - 1; @@ -644,11 +656,6 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, // gravity if (gravity) kappa_invl = 1.f / d_kappastore[working_element*NGLL3 + tx]; - // local index - K = (tx/NGLL2); - J = ((tx-K*NGLL2)/NGLLX); - I = (tx-K*NGLL2-J*NGLLX); - // calculates laplacian if (ispec_irreg >= 0){ //irregular_element @@ -670,6 +677,7 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, } // density (reciproc) + //rhol = get_global_cr( &d_rhostore[working_element*NGLL3_PADDED + tx] ); rho_invl = 1.f / d_rhostore[offset]; // loads hprime into shared memory @@ -680,7 +688,7 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, sh_hprime_xx[tx] = d_hprime_xx[tx]; #endif // loads hprimewgll into shared memory - sh_hprimewgll_xx[tx] = hprimewgll_xx[tx]; + sh_hprimewgll_xx[tx] = d_hprimewgll_xx[tx]; } // synchronize all the threads (one thread for each of the NGLL grid points of the @@ -724,12 +732,18 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, } // pre-computes gravity sum term - if (gravity ){ + if (gravity){ // uses potential definition: s = grad(chi) // // gravity term: 1/kappa grad(chi) * g // assumes that g only acts in (negative) z-direction - gravity_term = minus_g[iglob] * kappa_invl * jacobianl * wgll_cube[tx] * dpotentialdzl; + if (threadIdx.x < NGLL3) { + if (ispec_irreg >= 0){ + gravity_term = minus_g[iglob] * kappa_invl * jacobianl * wgll_cube[tx] * dpotentialdzl; + }else{ + gravity_term = minus_g[iglob] * kappa_invl * jacobian_regular * wgll_cube[tx] * xix_regular*temp3l; + } + } } // synchronize all the threads (one thread for each of the NGLL grid points of the @@ -753,9 +767,9 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, } // summed terms with added gll weights - fac1 = wgllwgll_yz[K*NGLLX+J]; - fac2 = wgllwgll_xz[K*NGLLX+I]; - fac3 = wgllwgll_xy[J*NGLLX+I]; + fac1 = d_wgllwgll_yz[K*NGLLX+J]; + fac2 = d_wgllwgll_xz[K*NGLLX+I]; + fac3 = d_wgllwgll_xy[J*NGLLX+I]; sum_terms = -(fac1*temp1l + fac2*temp2l + fac3*temp3l); @@ -777,7 +791,7 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, #endif // USE_TEXTURES_FIELDS #else // MESH_COLORING //mesh coloring - if (use_mesh_coloring_gpu ){ + if (use_mesh_coloring_gpu){ // no atomic operation needed, colors don't share global points between elements #ifdef USE_TEXTURES_FIELDS if (FORWARD_OR_ADJOINT == 3){ @@ -817,8 +831,8 @@ Kernel_2_acoustic_perf_impl(const int nb_blocks_to_compute, realw* d_etax,realw* d_etay,realw* d_etaz, realw* d_gammax,realw* d_gammay,realw* d_gammaz, realw_const_p d_hprime_xx, - realw_const_p hprimewgll_xx, - realw_const_p wgllwgll_xy,realw_const_p wgllwgll_xz,realw_const_p wgllwgll_yz, + realw_const_p d_hprimewgll_xx, + realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz, realw* d_rhostore, const int use_mesh_coloring_gpu, const int gravity, @@ -927,7 +941,7 @@ Kernel_2_acoustic_perf_impl(const int nb_blocks_to_compute, sh_hprime_xx[tx] = d_hprime_xx[tx]; #endif // loads hprimewgll into shared memory - sh_hprimewgll_xx[tx] = hprimewgll_xx[tx]; + sh_hprimewgll_xx[tx] = d_hprimewgll_xx[tx]; } // synchronize all the threads (one thread for each of the NGLL grid points of the @@ -984,9 +998,9 @@ Kernel_2_acoustic_perf_impl(const int nb_blocks_to_compute, } // summed terms with added gll weights - fac1 = wgllwgll_yz[K*NGLLX+J]; - fac2 = wgllwgll_xz[K*NGLLX+I]; - fac3 = wgllwgll_xy[J*NGLLX+I]; + fac1 = d_wgllwgll_yz[K*NGLLX+J]; + fac2 = d_wgllwgll_xz[K*NGLLX+I]; + fac3 = d_wgllwgll_xy[J*NGLLX+I]; sum_terms = -(fac1*temp1l + fac2*temp2l + fac3*temp3l); diff --git a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu index 0a9eb3e6d..98c42b507 100644 --- a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu +++ b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu @@ -1773,7 +1773,7 @@ Kernel_2_noatt_iso_grav_impl(int nb_blocks_to_compute, sigma_zx = sigma_xz; sigma_zy = sigma_yz; - if (gravity ){ + if (gravity){ // computes non-symmetric terms for gravity compute_element_gravity(tx,working_element,&iglob,d_minus_g,d_minus_deriv_gravity, d_rhostore,wgll_cube,jacobianl, @@ -2128,7 +2128,7 @@ Kernel_2_noatt_ani_impl(int nb_blocks_to_compute, sigma_zx = sigma_xz; sigma_zy = sigma_yz; - if (gravity ){ + if (gravity){ // computes non-symmetric terms for gravity compute_element_gravity(tx,working_element,&iglob,d_minus_g,d_minus_deriv_gravity, d_rhostore,wgll_cube,jacobianl, @@ -2167,7 +2167,7 @@ Kernel_2_noatt_ani_impl(int nb_blocks_to_compute, sum_terms3 = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l); // adds gravity term - if (gravity ){ + if (gravity){ sum_terms1 += rho_s_H1; sum_terms2 += rho_s_H2; sum_terms3 += rho_s_H3; @@ -3148,7 +3148,7 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, sigma_zx = sigma_xz; sigma_zy = sigma_yz; - if (gravity ){ + if (gravity){ // computes non-symmetric terms for gravity compute_element_gravity(tx,working_element,&iglob,d_minus_g,d_minus_deriv_gravity, d_rhostore,wgll_cube,jacobianl, @@ -3287,7 +3287,7 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, sum_terms3 = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l); // adds gravity term - if (gravity ){ + if (gravity){ sum_terms1 += rho_s_H1; sum_terms2 += rho_s_H2; sum_terms3 += rho_s_H3; diff --git a/src/gpu/kernels/add_sources_ac_SIM_TYPE_2_OR_3_kernel.cu b/src/gpu/kernels/add_sources_ac_SIM_TYPE_2_OR_3_kernel.cu index 1db0bd2b1..167716074 100644 --- a/src/gpu/kernels/add_sources_ac_SIM_TYPE_2_OR_3_kernel.cu +++ b/src/gpu/kernels/add_sources_ac_SIM_TYPE_2_OR_3_kernel.cu @@ -48,6 +48,7 @@ __global__ void add_sources_ac_SIM_TYPE_2_OR_3_kernel(field* potential_dot_dot_a if (irec_local < nadj_rec_local) { int ispec = ispec_selected_recloc[irec_local]-1; + if (ispec_is_acoustic[ispec]){ int i = threadIdx.x; int j = threadIdx.y; diff --git a/src/gpu/kernels/add_sources_el_SIM_TYPE_2_OR_3_kernel.cu b/src/gpu/kernels/add_sources_el_SIM_TYPE_2_OR_3_kernel.cu index 6c7543e24..2afe8089c 100644 --- a/src/gpu/kernels/add_sources_el_SIM_TYPE_2_OR_3_kernel.cu +++ b/src/gpu/kernels/add_sources_el_SIM_TYPE_2_OR_3_kernel.cu @@ -51,6 +51,7 @@ __global__ void add_sources_el_SIM_TYPE_2_OR_3_kernel(realw* accel, int i = threadIdx.x; int j = threadIdx.y; int k = threadIdx.z; + int iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1; realw hxir = xir_store[INDEX2(NGLLX,i,irec_local)]; @@ -59,13 +60,20 @@ __global__ void add_sources_el_SIM_TYPE_2_OR_3_kernel(realw* accel, realw lagrange = hxir * hetar * hgammar ; + realw source_adj_x = (realw) source_adjoint[INDEX3(NDIM,nadj_rec_local,0,irec_local,it)]; + realw source_adj_y = (realw) source_adjoint[INDEX3(NDIM,nadj_rec_local,1,irec_local,it)]; + realw source_adj_z = (realw) source_adjoint[INDEX3(NDIM,nadj_rec_local,2,irec_local,it)]; + + realw stf_x = source_adj_x * lagrange; + realw stf_y = source_adj_y * lagrange; + realw stf_z = source_adj_z * lagrange; + // atomic operations are absolutely necessary for correctness! - atomicAdd(&accel[0+3*iglob],source_adjoint[INDEX3(NDIM,nadj_rec_local,0,irec_local,it)]*lagrange); - atomicAdd(&accel[1+3*iglob],source_adjoint[INDEX3(NDIM,nadj_rec_local,1,irec_local,it)]*lagrange); - atomicAdd(&accel[2+3*iglob],source_adjoint[INDEX3(NDIM,nadj_rec_local,2,irec_local,it)]*lagrange); + atomicAdd(&accel[iglob*3],stf_x); + atomicAdd(&accel[iglob*3+1],stf_y); + atomicAdd(&accel[iglob*3+2],stf_z); } // ispec_is_elastic } - } diff --git a/src/gpu/kernels/compute_acoustic_seismogram_kernel.cu b/src/gpu/kernels/compute_acoustic_seismogram_kernel.cu index 963df2054..7aa06e910 100644 --- a/src/gpu/kernels/compute_acoustic_seismogram_kernel.cu +++ b/src/gpu/kernels/compute_acoustic_seismogram_kernel.cu @@ -167,7 +167,6 @@ __global__ void compute_acoustic_seismogram_kernel(int nrec_local, realw duxdyl,duxdzl,duydxl,duydzl,duzdxl,duzdyl; int ispec_irreg = d_irregular_element_number[ispec] - 1; - if (ispec_irreg >= 0){ // irregular element int offset_irreg = ispec_irreg * NGLL3_PADDED + tx; @@ -321,7 +320,7 @@ __global__ void compute_acoustic_vectorial_seismogram_kernel(int nrec_local, realw etaxl, etayl, etazl; realw gammaxl, gammayl, gammazl; realw dpotentialdxl, dpotentialdyl, dpotentialdzl; - int ispec, iglob, ispec_irreg; + int ispec, iglob; /* // debug @@ -358,6 +357,8 @@ __global__ void compute_acoustic_vectorial_seismogram_kernel(int nrec_local, } */ + if (irec_local >= nrec_local) return; + s_temp1[tx] = 0.0f; s_temp2[tx] = 0.0f; s_temp3[tx] = 0.0f; @@ -367,14 +368,11 @@ __global__ void compute_acoustic_vectorial_seismogram_kernel(int nrec_local, int J = ((tx-K*NGLL2)/NGLLX); int I = (tx-K*NGLL2-J*NGLLX); - if (irec_local >= nrec_local) return; - if (tx < NGLL3) { ispec = ispec_selected_rec_loc[irec_local] - 1; - ispec_irreg = d_irregular_element_number[ispec] - 1; // nothing to do if we are in elastic element - if (d_ispec_is_acoustic[ispec] == 0) {return;} + if (d_ispec_is_acoustic[ispec] == 0) { return; } int offset = INDEX4_PADDED(NGLLX,NGLLX,NGLLX,I,J,K,ispec); @@ -412,7 +410,8 @@ __global__ void compute_acoustic_vectorial_seismogram_kernel(int nrec_local, temp3l += s_dummy_loc[l*NGLL2+J*NGLLX+I] * sh_hprime_xx[l*NGLLX+K]; } - if (ispec_irreg >= 0){ + int ispec_irreg = d_irregular_element_number[ispec] - 1; + if (ispec_irreg >= 0){ //irregular element int offset_irreg = INDEX4_PADDED(NGLLX,NGLLX,NGLLX,I,J,K,ispec_irreg); xixl = d_xix[offset_irreg]; @@ -429,8 +428,7 @@ __global__ void compute_acoustic_vectorial_seismogram_kernel(int nrec_local, dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l; dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l; dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l; - } - else{ + }else{ // compute derivatives of ux, uy and uz with respect to x, y and z // derivatives of potential dpotentialdxl = xix_regular*temp1l; diff --git a/src/gpu/kernels/compute_add_sources_acoustic_kernel.cu b/src/gpu/kernels/compute_add_sources_acoustic_kernel.cu index f36b6b282..de374aa64 100644 --- a/src/gpu/kernels/compute_add_sources_acoustic_kernel.cu +++ b/src/gpu/kernels/compute_add_sources_acoustic_kernel.cu @@ -45,8 +45,6 @@ __global__ void compute_add_sources_acoustic_kernel(field* potential_dot_dot_aco int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx int ispec,iglob; - field stf; - realw kappal; if (isource < NSOURCES){ @@ -58,11 +56,12 @@ __global__ void compute_add_sources_acoustic_kernel(field* potential_dot_dot_aco iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1; - stf = stf_pre_compute[isource]; - kappal = kappastore[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]; + field stf = stf_pre_compute[isource]; + realw kappal = kappastore[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]; - atomicAdd(&potential_dot_dot_acoustic[iglob], - -sourcearrays[INDEX5(NSOURCES,NDIM,NGLLX,NGLLX,isource, 0,i,j,k)]*stf/kappal); + field stf_p = - (sourcearrays[INDEX5(NSOURCES,NDIM,NGLLX,NGLLX,isource, 0,i,j,k)] / kappal) * stf; + + atomicAdd(&potential_dot_dot_acoustic[iglob],stf_p); // debug: without atomic operation // potential_dot_dot_acoustic[iglob] += diff --git a/src/gpu/kernels/compute_add_sources_kernel.cu b/src/gpu/kernels/compute_add_sources_kernel.cu index 9cb131de4..71fb08690 100644 --- a/src/gpu/kernels/compute_add_sources_kernel.cu +++ b/src/gpu/kernels/compute_add_sources_kernel.cu @@ -44,7 +44,6 @@ __global__ void compute_add_sources_kernel(realw* accel, int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx int ispec,iglob; - field stf; if (isource < NSOURCES) { // when NSOURCES > 65535, but mod(nspec_top,2) > 0, we end up with an extra block. @@ -54,12 +53,17 @@ __global__ void compute_add_sources_kernel(realw* accel, if (ispec_is_elastic[ispec]) { - stf = stf_pre_compute[isource]; iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1; - atomicAdd(&accel[iglob*3+0],sourcearrays[INDEX5(NSOURCES,NDIM,NGLLX,NGLLX,isource,0,i,j,k)]*stf); - atomicAdd(&accel[iglob*3+1],sourcearrays[INDEX5(NSOURCES,NDIM,NGLLX,NGLLX,isource,1,i,j,k)]*stf); - atomicAdd(&accel[iglob*3+2],sourcearrays[INDEX5(NSOURCES,NDIM,NGLLX,NGLLX,isource,2,i,j,k)]*stf); + realw stf = (realw) stf_pre_compute[isource]; + + realw stf_x = sourcearrays[INDEX5(NSOURCES,NDIM,NGLLX,NGLLX,isource,0,i,j,k)] * stf; + realw stf_y = sourcearrays[INDEX5(NSOURCES,NDIM,NGLLX,NGLLX,isource,1,i,j,k)] * stf; + realw stf_z = sourcearrays[INDEX5(NSOURCES,NDIM,NGLLX,NGLLX,isource,2,i,j,k)] * stf; + + atomicAdd(&accel[iglob*3],stf_x); + atomicAdd(&accel[iglob*3+1],stf_y); + atomicAdd(&accel[iglob*3+2],stf_z); } } } diff --git a/src/gpu/kernels/compute_coupling_elastic_ac_kernel.cu b/src/gpu/kernels/compute_coupling_elastic_ac_kernel.cu index 8be7bd245..e6f47a871 100644 --- a/src/gpu/kernels/compute_coupling_elastic_ac_kernel.cu +++ b/src/gpu/kernels/compute_coupling_elastic_ac_kernel.cu @@ -77,7 +77,7 @@ __global__ void compute_coupling_elastic_ac_kernel(field* potential_dot_dot_acou jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)]; // acoustic pressure on global point - if (gravity ){ + if (gravity){ // takes density (from acoustic? element) rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]; diff --git a/src/gpu/kernels/compute_kernels_acoustic_kernel.cu b/src/gpu/kernels/compute_kernels_acoustic_kernel.cu index c0029096d..e2df6b6f0 100644 --- a/src/gpu/kernels/compute_kernels_acoustic_kernel.cu +++ b/src/gpu/kernels/compute_kernels_acoustic_kernel.cu @@ -75,6 +75,7 @@ __global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic, // copy field values iglob = d_ibool[ijk_ispec_padded] - 1; + scalar_field_displ[ijk] = b_potential_acoustic[iglob]; scalar_field_accel[ijk] = potential_acoustic[iglob]; } @@ -83,7 +84,7 @@ __global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic, // synchronizes threads __syncthreads(); - if (active ){ + if (active){ field accel_loc[3]; field b_displ_loc[3]; realw rhol; diff --git a/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cu b/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cu index 0c335bcf9..3a1342e56 100644 --- a/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cu +++ b/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cu @@ -78,18 +78,18 @@ __global__ void compute_kernels_hess_ac_cudakernel(int* ispec_is_acoustic, // copy field values scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob]; scalar_field_b_accel[ijk] = b_potential_dot_dot_acoustic[iglob]; - scalar_field_b_veloc[ijk] = b_potential_dot_acoustic[iglob]; + scalar_field_b_veloc[ijk] = b_potential_dot_acoustic[iglob]; } } // synchronizes threads __syncthreads(); - if (active ){ + if (active){ field accel_loc[3]; field b_accel_loc[3]; field b_veloc_loc[3]; - realw rhol, kappal; + realw rhol, kappal_inv; // gets material parameter rhol = rhostore[ijk_ispec_padded]; @@ -126,11 +126,8 @@ __global__ void compute_kernels_hess_ac_cudakernel(int* ispec_is_acoustic, hess_rho_ac_kl[ijk_ispec] += deltat * rhol * sum(b_veloc_loc[0]*b_veloc_loc[0] + b_veloc_loc[1]*b_veloc_loc[1] + b_veloc_loc[2]*b_veloc_loc[2]); - kappal = kappastore[ijk_ispec]; - hess_kappa_ac_kl[ijk_ispec] += deltat / kappal * sum( b_potential_dot_acoustic[iglob] - * b_potential_dot_acoustic[iglob]); - // - + kappal_inv = 1.0f / kappastore[ijk_ispec]; + hess_kappa_ac_kl[ijk_ispec] += deltat * kappal_inv * sum(b_potential_dot_acoustic[iglob] * b_potential_dot_acoustic[iglob]); } // active } diff --git a/src/gpu/kernels/compute_kernels_hess_el_cudakernel.cu b/src/gpu/kernels/compute_kernels_hess_el_cudakernel.cu index 1303dbf6e..d6e602b13 100644 --- a/src/gpu/kernels/compute_kernels_hess_el_cudakernel.cu +++ b/src/gpu/kernels/compute_kernels_hess_el_cudakernel.cu @@ -58,18 +58,17 @@ __global__ void compute_kernels_hess_el_cudakernel(int* ispec_is_elastic, accel[3*iglob+1]*b_accel[3*iglob+1]+ accel[3*iglob+2]*b_accel[3*iglob+2]); - // - hess_rho_kl[ijk_ispec] += deltat * (b_veloc[3*iglob] *b_veloc[3*iglob]+ - b_veloc[3*iglob+1]*b_veloc[3*iglob+1]+ - b_veloc[3*iglob+2]*b_veloc[3*iglob+2]); + hess_rho_kl[ijk_ispec] += deltat * (b_veloc[3*iglob]*b_veloc[3*iglob]+ + b_veloc[3*iglob+1]*b_veloc[3*iglob+1]+ + b_veloc[3*iglob+2]*b_veloc[3*iglob+2]); hess_mu_kl[ijk_ispec] += deltat * (b_epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+ - b_epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+ - (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])* - (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+ - 2*(b_epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+ - b_epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+ - b_epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec])); + b_epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+ + (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])* + (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+ + 2*(b_epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+ + b_epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+ + b_epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec])); hess_kappa_kl[ijk_ispec] += deltat*(9*b_epsilon_trace_over_3[ijk_ispec]*b_epsilon_trace_over_3[ijk_ispec]); diff --git a/src/gpu/kernels/compute_stacey_acoustic_kernel.cu b/src/gpu/kernels/compute_stacey_acoustic_kernel.cu index db5dfbcae..84d7b2e5f 100644 --- a/src/gpu/kernels/compute_stacey_acoustic_kernel.cu +++ b/src/gpu/kernels/compute_stacey_acoustic_kernel.cu @@ -77,7 +77,7 @@ __global__ void compute_stacey_acoustic_kernel(field* potential_dot_acoustic, cpl = sqrt( kappal / rhol ); // velocity - if (gravity ){ + if (gravity){ // daniel: TODO - check gravity and stacey condition here... // uses a potential definition of: s = grad(chi) vel = potential_dot_acoustic[iglob] / rhol ; @@ -166,7 +166,7 @@ __global__ void compute_stacey_acoustic_single_kernel(field* potential_dot_acous cpl = sqrt( kappal / rhol ); // velocity - if (gravity ){ + if (gravity){ // daniel: TODO - check gravity and stacey condition here... // uses a potential definition of: s = grad(chi) vel = potential_dot_acoustic[iglob] / rhol ; @@ -238,7 +238,7 @@ __global__ void compute_stacey_acoustic_undoatt_kernel( field* potential_dot_aco cpl = sqrt( kappal / rhol ); // velocity - if (gravity ){ + if (gravity){ // daniel: TODO - check gravity and stacey condition here... // uses a potential definition of: s = grad(chi) vel = potential_dot_acoustic[iglob] / rhol ; diff --git a/src/gpu/kernels/compute_stacey_elastic_kernel.cu b/src/gpu/kernels/compute_stacey_elastic_kernel.cu index ea848fb41..8ad9b66cf 100644 --- a/src/gpu/kernels/compute_stacey_elastic_kernel.cu +++ b/src/gpu/kernels/compute_stacey_elastic_kernel.cu @@ -72,7 +72,7 @@ __global__ void compute_stacey_elastic_kernel(realw* veloc, // gets associated velocity - vx = veloc[iglob*3+0]; + vx = veloc[iglob*3]; vy = veloc[iglob*3+1]; vz = veloc[iglob*3+2]; @@ -197,7 +197,7 @@ __global__ void compute_stacey_elastic_single_kernel(realw* veloc, atomicAdd(&accel[iglob*3+2],-b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)]); }else{ // gets associated velocity - vx = veloc[iglob*3+0]; + vx = veloc[iglob*3]; vy = veloc[iglob*3+1]; vz = veloc[iglob*3+2]; @@ -274,7 +274,7 @@ __global__ void compute_stacey_elastic_undoatt_kernel(realw* veloc, iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1; // gets associated velocity - vx = veloc[iglob*3+0]; + vx = veloc[iglob*3]; vy = veloc[iglob*3+1]; vz = veloc[iglob*3+2]; diff --git a/src/gpu/kernels/kernel_3_acoustic_cuda_device.cu b/src/gpu/kernels/kernel_3_acoustic_cuda_device.cu index a724c1cb1..ab742b9f0 100644 --- a/src/gpu/kernels/kernel_3_acoustic_cuda_device.cu +++ b/src/gpu/kernels/kernel_3_acoustic_cuda_device.cu @@ -40,18 +40,18 @@ __global__ void kernel_3_acoustic_cuda_device(field* potential_dot_acoustic, int id = threadIdx.x + (blockIdx.x + blockIdx.y*gridDim.x)*blockDim.x; - realw rmass; - field p_dot_dot; // because of block and grid sizing problems, there is a small // amount of buffer at the end of the calculation if (id < size) { - rmass = rmass_acoustic[id]; + realw rmass = rmass_acoustic[id]; // multiplies pressure with the inverse of the mass matrix - p_dot_dot = rmass*potential_dot_dot_acoustic[id]; + field p_dot_dot = rmass * potential_dot_dot_acoustic[id]; + potential_dot_dot_acoustic[id] = p_dot_dot; potential_dot_acoustic[id] += deltatover2*p_dot_dot; - if (simulation_type==3) { - p_dot_dot = rmass*b_potential_dot_dot_acoustic[id]; + + if (simulation_type == 3) { + p_dot_dot = rmass * b_potential_dot_dot_acoustic[id]; b_potential_dot_dot_acoustic[id] = p_dot_dot; b_potential_dot_acoustic[id] += b_deltatover2*p_dot_dot; } diff --git a/src/gpu/kernels/kernel_proto.cu.h b/src/gpu/kernels/kernel_proto.cu.h index 443b38852..00db7749e 100644 --- a/src/gpu/kernels/kernel_proto.cu.h +++ b/src/gpu/kernels/kernel_proto.cu.h @@ -56,9 +56,9 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, realw* d_gammax,realw* d_gammay,realw* d_gammaz, const realw xix_regular, const realw jacobian_regular, realw_const_p d_hprime_xx, - realw_const_p hprimewgll_xx, - realw_const_p wgllwgll_xy,realw_const_p wgllwgll_xz,realw_const_p wgllwgll_yz, - realw* d_rhostore, + realw_const_p d_hprimewgll_xx, + realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz, + realw_const_p d_rhostore, const int use_mesh_coloring_gpu, const int gravity, realw_const_p minus_g, @@ -79,9 +79,9 @@ Kernel_2_acoustic_single_impl(const int nb_blocks_to_compute, const int* d_irregular_element_number, const realw xix_regular, const realw jacobian_regular, realw_const_p d_hprime_xx, - realw_const_p hprimewgll_xx, - realw_const_p wgllwgll_xy,realw_const_p wgllwgll_xz,realw_const_p wgllwgll_yz, - realw* d_rhostore, + realw_const_p d_hprimewgll_xx, + realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz, + realw_const_p d_rhostore, const int use_mesh_coloring_gpu, const int gravity, realw_const_p minus_g, diff --git a/src/gpu/kernels/pml_impose_boundary_condition_cuda_kernel.cu b/src/gpu/kernels/pml_impose_boundary_condition_cuda_kernel.cu index b070a6768..41396efa3 100644 --- a/src/gpu/kernels/pml_impose_boundary_condition_cuda_kernel.cu +++ b/src/gpu/kernels/pml_impose_boundary_condition_cuda_kernel.cu @@ -65,15 +65,15 @@ __global__ void pml_impose_boundary_condition_cuda_kernel(realw* accel, // gets associated velocity - displ[iglob*3+0] = 0.f; + displ[iglob*3] = 0.f; displ[iglob*3+1] = 0.f; displ[iglob*3+2] = 0.f; - veloc[iglob*3+0] = 0.f; + veloc[iglob*3] = 0.f; veloc[iglob*3+1] = 0.f; veloc[iglob*3+2] = 0.f; - accel[iglob*3+0] = 0.f; + accel[iglob*3] = 0.f; accel[iglob*3+1] = 0.f; accel[iglob*3+2] = 0.f; diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h index 402edb266..495ff2528 100644 --- a/src/gpu/mesh_constants_gpu.h +++ b/src/gpu/mesh_constants_gpu.h @@ -124,9 +124,28 @@ typedef double realw; #endif // maximum function +#if !defined(MAX) #define MAX(x,y) (((x) < (y)) ? (y) : (x)) +#endif // minimum function +#if !defined(MIN) #define MIN(a,b) (((a) > (b)) ? (b) : (a)) +#endif + +// HIP +#ifdef USE_HIP +// for HIP-CPU installation +#if defined(__HIP_CPU_RT__) +//#pragma message ("\nCompiling with: HIP-CPU enabled\n") +// forces __forceinline__ keyword to be inline to avoid "duplicate symbol.." linking errors +#if defined(__forceinline__) +#undef __forceinline__ +#endif +//#define __forceinline__ inline +// or +#define __forceinline__ __attribute__((always_inline)) inline +#endif +#endif /* ----------------------------------------------------------------------------------------------- */ @@ -260,15 +279,11 @@ typedef double realw; /* ----------------------------------------------------------------------------------------------- */ -// type of "working" variables: see also CUSTOM_REAL -// double precision temporary variables leads to 10% performance decrease -// in Kernel_2_impl (not very much..) -typedef float realw; - // textures // note: texture templates are supported only for CUDA versions <= 11.x // since CUDA 12.x, these are deprecated and texture objects should be used instead // see: https://developer.nvidia.com/blog/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/ +#if CUSTOM_REAL == 4 #if defined(USE_TEXTURES_FIELDS) || defined(USE_TEXTURES_CONSTANTS) #ifdef USE_CUDA typedef texture realw_texture; @@ -277,6 +292,16 @@ typedef texture realw_texture typedef texture realw_texture; #endif #endif +#elif CUSTOM_REAL == 8 +#if defined(USE_TEXTURES_FIELDS) || defined(USE_TEXTURES_CONSTANTS) +#ifdef USE_CUDA +typedef texture realw_texture; +#endif +#ifdef USE_HIP +typedef texture realw_texture; +#endif +#endif +#endif // pointer declarations // restricted pointers: can improve performance on Kepler ~ 10% diff --git a/src/gpu/prepare_mesh_constants_cuda.cu b/src/gpu/prepare_mesh_constants_cuda.cu index 57e142571..78dcf5069 100644 --- a/src/gpu/prepare_mesh_constants_cuda.cu +++ b/src/gpu/prepare_mesh_constants_cuda.cu @@ -1615,23 +1615,32 @@ TRACE("prepare_cleanup_device"); if (mp->save_seismograms_p) gpuFree(mp->d_seismograms_p); gpuFree(mp->d_nu_rec); gpuFree(mp->d_ispec_selected_rec_loc); - } - gpuFree(mp->d_ispec_selected_rec); + } + gpuFree(mp->d_ispec_selected_rec); // ACOUSTIC arrays if (*ACOUSTIC_SIMULATION ){ gpuFree(mp->d_potential_acoustic); gpuFree(mp->d_potential_dot_acoustic); gpuFree(mp->d_potential_dot_dot_acoustic); - gpuFree(mp->d_send_potential_dot_dot_buffer); + if (mp->size_mpi_buffer_potential > 0){ + gpuFree(mp->d_send_potential_dot_dot_buffer); + if (mp->simulation_type == 3) gpuFree(mp->d_b_send_potential_dot_dot_buffer); + } gpuFree(mp->d_rmass_acoustic); gpuFree(mp->d_kappastore); gpuFree(mp->d_phase_ispec_inner_acoustic); if (*NOISE_TOMOGRAPHY == 0){ - gpuFree(mp->d_free_surface_ispec); - gpuFree(mp->d_free_surface_ijk); + if (mp->num_free_surface_faces > 0){ + gpuFree(mp->d_free_surface_ispec); + gpuFree(mp->d_free_surface_ijk); + } + } + if (mp->stacey_absorbing_conditions && mp->d_num_abs_boundary_faces > 0) { + if (mp->simulation_type == 3 || ( mp->simulation_type == 1 && mp->save_forward )) { + gpuFree(mp->d_b_absorb_potential); + } } - if (mp->stacey_absorbing_conditions) gpuFree(mp->d_b_absorb_potential); if (mp->simulation_type == 3) { gpuFree(mp->d_b_potential_acoustic); gpuFree(mp->d_b_potential_dot_acoustic); @@ -1651,8 +1660,10 @@ TRACE("prepare_cleanup_device"); gpuFree(mp->d_displ); gpuFree(mp->d_veloc); gpuFree(mp->d_accel); - gpuFree(mp->d_send_accel_buffer); - if (mp->simulation_type == 3) gpuFree(mp->d_b_send_accel_buffer); + if (mp->size_mpi_buffer > 0){ + gpuFree(mp->d_send_accel_buffer); + if (mp->simulation_type == 3) gpuFree(mp->d_b_send_accel_buffer); + } gpuFree(mp->d_rmassx); gpuFree(mp->d_rmassy); gpuFree(mp->d_rmassz); @@ -1660,8 +1671,9 @@ TRACE("prepare_cleanup_device"); if (mp->stacey_absorbing_conditions && mp->d_num_abs_boundary_faces > 0){ gpuFree(mp->d_rho_vp); gpuFree(mp->d_rho_vs); - if (mp->simulation_type == 3 || ( mp->simulation_type == 1 && mp->save_forward )) - gpuFree(mp->d_b_absorb_field); + if (mp->simulation_type == 3 || ( mp->simulation_type == 1 && mp->save_forward )) { + gpuFree(mp->d_b_absorb_field); + } } if (mp->pml_conditions && mp->NSPEC_CPML > 0){ gpuFree(mp->d_CPML_to_spec); diff --git a/src/gpu/smooth_cuda.cu b/src/gpu/smooth_cuda.cu index 9798c6091..dca19fd3d 100644 --- a/src/gpu/smooth_cuda.cu +++ b/src/gpu/smooth_cuda.cu @@ -185,6 +185,8 @@ void FC_FUNC_(compute_smooth_gpu, gpuFree(d_data_other); } + + // explicitly wait for cuda kernels to finish gpuSynchronize(); gpuFree(x_other); diff --git a/src/gpu/update_displacement_cuda.cu b/src/gpu/update_displacement_cuda.cu index 8156f87d7..3a27de2d0 100644 --- a/src/gpu/update_displacement_cuda.cu +++ b/src/gpu/update_displacement_cuda.cu @@ -330,7 +330,7 @@ void FC_FUNC_(kernel_3_a_cuda, // updates both, accel and veloc #ifdef USE_CUDA if (run_cuda){ - kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(veloc, + kernel_3_cuda_device<<compute_stream>>>(veloc, accel, size, deltatover2, @@ -352,7 +352,7 @@ void FC_FUNC_(kernel_3_a_cuda, // updates only accel #ifdef USE_CUDA if (run_cuda){ - kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(accel, + kernel_3_accel_cuda_device<<compute_stream>>>(accel, size, mp->d_rmassx, mp->d_rmassy, @@ -416,7 +416,7 @@ void FC_FUNC_(kernel_3_b_cuda, // updates only veloc at this point #ifdef USE_CUDA if (run_cuda){ - kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(veloc, + kernel_3_veloc_cuda_device<<compute_stream>>>(veloc, accel, size,deltatover2); } @@ -455,6 +455,7 @@ void FC_FUNC_(kernel_3_acoustic_cuda, Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper int FORWARD_OR_ADJOINT = *FORWARD_OR_ADJOINT_f; + // safety check if (FORWARD_OR_ADJOINT != 0 && FORWARD_OR_ADJOINT != 1 && FORWARD_OR_ADJOINT != 3) { exit_on_error("Error invalid FORWARD_OR_ADJOINT in Kernel_2_acoustic() routine"); @@ -491,7 +492,7 @@ void FC_FUNC_(kernel_3_acoustic_cuda, // This kernel treats both forward and adjoint wavefield within the same call, to increase performance #ifdef USE_CUDA if (run_cuda){ - kernel_3_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic, + kernel_3_acoustic_cuda_device<<compute_stream>>>(mp->d_potential_dot_acoustic, mp->d_potential_dot_dot_acoustic, mp->d_b_potential_dot_acoustic, mp->d_b_potential_dot_dot_acoustic, @@ -504,7 +505,7 @@ void FC_FUNC_(kernel_3_acoustic_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(kernel_3_acoustic_cuda_device, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(kernel_3_acoustic_cuda_device, dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_potential_dot_acoustic, mp->d_potential_dot_dot_acoustic, mp->d_b_potential_dot_acoustic, @@ -521,7 +522,7 @@ void FC_FUNC_(kernel_3_acoustic_cuda, // single field kernel #ifdef USE_CUDA if (run_cuda){ - kernel_3_acoustic_single_cuda_device<<< grid, threads>>>(potential_dot, + kernel_3_acoustic_single_cuda_device<<compute_stream>>>(potential_dot, potential_dot_dot, size, deltaover2, @@ -530,7 +531,7 @@ void FC_FUNC_(kernel_3_acoustic_cuda, #endif #ifdef USE_HIP if (run_hip){ - hipLaunchKernelGGL(kernel_3_acoustic_single_cuda_device, dim3(grid), dim3(threads), 0, 0, + hipLaunchKernelGGL(kernel_3_acoustic_single_cuda_device, dim3(grid), dim3(threads), 0, mp->compute_stream, potential_dot, potential_dot_dot, size, diff --git a/src/specfem3D/compute_add_sources_acoustic.f90 b/src/specfem3D/compute_add_sources_acoustic.f90 index 85866872d..676f57bc0 100644 --- a/src/specfem3D/compute_add_sources_acoustic.f90 +++ b/src/specfem3D/compute_add_sources_acoustic.f90 @@ -31,7 +31,9 @@ subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) use constants use specfem_par, only: station_name,network_name, & - nsources_local,tshift_src,DT,t0,SU_FORMAT,USE_LDDRK,istage, & + nsources_local,tshift_src,DT,t0, & + SU_FORMAT,READ_ADJSRC_ASDF, & + USE_LDDRK,istage, & hxir_adjstore,hetar_adjstore,hgammar_adjstore,source_adjoint,number_adjsources_global,nadj_rec_local, & USE_BINARY_FOR_SEISMOGRAMS, & ibool,NSOURCES,myrank,it,ispec_selected_source,islice_selected_source, & @@ -62,20 +64,9 @@ subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) character(len=MAX_STRING_LEN) :: adj_source_file - ! sets current initial time - if (USE_LDDRK) then - ! LDDRK - ! note: the LDDRK scheme updates displacement after the stiffness computations and - ! after adding boundary/coupling/source terms. - ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme - ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. - time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 - else - time_t = dble(it-1)*DT - t0 - endif - -! forward simulations + ! forward simulations if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then + ! ignore pressure sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -83,6 +74,18 @@ subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) ! because the source is precisely the wavefield coming from the DSM traction file if (COUPLE_WITH_INJECTION_TECHNIQUE) return + ! sets current initial time + if (USE_LDDRK) then + ! LDDRK + ! note: the LDDRK scheme updates displacement after the stiffness computations and + ! after adding boundary/coupling/source terms. + ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme + ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. + time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 + else + time_t = dble(it-1)*DT - t0 + endif + ! openmp solver !$OMP PARALLEL if (NSOURCES > 100) & !$OMP DEFAULT(SHARED) & @@ -179,25 +182,34 @@ subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) ! with other partitions while we calculate for the inner part ! this must be done carefully, otherwise the adjoint sources may be added twice if (ibool_read_adj_arrays .and. .not. INVERSE_FWI_FULL_PROBLEM) then - - if (.not. SU_FORMAT) then - ! ASCII format + ! reads adjoint source files + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ACOUSTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + else + ! default ASCII format if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' - !!! read ascii adjoint sources do irec_local = 1, nadj_rec_local - irec = number_adjsources_global(irec_local) ! reads in **net**.**sta**.**BH**.adj files - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) - call compute_arrays_adjoint_source(adj_source_file,irec) + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - else - ! SU format - call compute_arrays_adjoint_source_SU() - endif !if (.not. SU_FORMAT) - + endif endif ! if (ibool_read_adj_arrays) + ! adds source term if (it < NSTEP) then ! receivers act as sources do irec_local = 1, nadj_rec_local @@ -251,7 +263,8 @@ end subroutine compute_add_sources_acoustic subroutine compute_add_sources_acoustic_backward(b_potential_dot_dot_acoustic) use constants - use specfem_par, only: nsources_local,tshift_src,DT,t0,USE_LDDRK,istage, & + use specfem_par, only: nsources_local,tshift_src,DT,t0, & + USE_LDDRK,istage, & ibool,NSOURCES,myrank,it,islice_selected_source,ispec_selected_source, & sourcearrays,kappastore,SIMULATION_TYPE,NSTEP,NGLOB_AB @@ -408,13 +421,14 @@ subroutine compute_add_sources_acoustic_GPU() use constants use specfem_par, only: station_name,network_name, & - nsources_local,tshift_src,DT,t0,SU_FORMAT,USE_LDDRK,istage, & + nsources_local,tshift_src,DT,t0, & + SU_FORMAT,READ_ADJSRC_ASDF, & + USE_LDDRK,istage, & source_adjoint,nadj_rec_local,number_adjsources_global, & USE_BINARY_FOR_SEISMOGRAMS, & NSOURCES,it,SIMULATION_TYPE,NSTEP,nrec, & NTSTEP_BETWEEN_READ_ADJSRC,Mesh_pointer, & - INVERSE_FWI_FULL_PROBLEM,run_number_of_the_source, & - GPU_MODE + INVERSE_FWI_FULL_PROBLEM,run_number_of_the_source ! coupling use shared_parameters, only: COUPLE_WITH_INJECTION_TECHNIQUE @@ -438,11 +452,9 @@ subroutine compute_add_sources_acoustic_GPU() character(len=MAX_STRING_LEN) :: adj_source_file - ! checks if anything to do - if (.not. GPU_MODE) return - ! forward simulations if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then + ! ignore pressure sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -525,24 +537,34 @@ subroutine compute_add_sources_acoustic_GPU() ! with other partitions while we calculate for the inner part ! this must be done carefully, otherwise the adjoint sources may be added twice if (ibool_read_adj_arrays .and. .not. INVERSE_FWI_FULL_PROBLEM) then - - if (.not. SU_FORMAT) then - ! ASCII format + ! reads adjoint source files + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ACOUSTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + else + ! default ASCII format if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' !!! read ascii adjoint sources do irec_local = 1, nadj_rec_local - irec = number_adjsources_global(irec_local) ! reads in **net**.**sta**.**BH**.adj files - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) - call compute_arrays_adjoint_source(adj_source_file,irec) + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - else - ! SU format - call compute_arrays_adjoint_source_SU() - endif !if (.not. SU_FORMAT) - + endif endif ! if (ibool_read_adj_arrays) + ! adds source term if (it < NSTEP) then ! receivers act as sources ! on GPU @@ -560,10 +582,11 @@ end subroutine compute_add_sources_acoustic_GPU subroutine compute_add_sources_acoustic_backward_GPU() use constants - use specfem_par, only: nsources_local,tshift_src,DT,t0,USE_LDDRK,istage, & + use specfem_par, only: nsources_local,tshift_src,DT,t0, & + USE_LDDRK,istage, & NSOURCES,myrank,it, & SIMULATION_TYPE,NSTEP, & - GPU_MODE,Mesh_pointer,run_number_of_the_source + Mesh_pointer,run_number_of_the_source ! undo_att use specfem_par, only: UNDO_ATTENUATION_AND_OR_PML,NSUBSET_ITERATIONS,NT_DUMP_ATTENUATION, & iteration_on_subset,it_of_this_subset @@ -587,8 +610,6 @@ subroutine compute_add_sources_acoustic_backward_GPU() ! checks if anything to do if (SIMULATION_TYPE /= 3) return - if (.not. GPU_MODE) return - ! checks if this slice has sources to add if (nsources_local == 0) return diff --git a/src/specfem3D/compute_add_sources_poroelastic.f90 b/src/specfem3D/compute_add_sources_poroelastic.f90 index d230c321d..f8def09bc 100644 --- a/src/specfem3D/compute_add_sources_poroelastic.f90 +++ b/src/specfem3D/compute_add_sources_poroelastic.f90 @@ -36,6 +36,7 @@ subroutine compute_add_sources_poroelastic() UNDO_ATTENUATION_AND_OR_PML, & NSOURCES,myrank,it,islice_selected_source,ispec_selected_source, & sourcearrays,SIMULATION_TYPE,NSTEP, & + SU_FORMAT,READ_ADJSRC_ASDF, & ispec_selected_rec, & nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC, & hxir_adjstore,hetar_adjstore,hgammar_adjstore,source_adjoint,number_adjsources_global,nadj_rec_local @@ -67,6 +68,7 @@ subroutine compute_add_sources_poroelastic() ! forward simulations if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then + ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -217,16 +219,31 @@ subroutine compute_add_sources_poroelastic() ! this must be done carefully, otherwise the adjoint sources may be added ! twice if (ibool_read_adj_arrays) then - if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use ASCII instead' - ! ASCII format - !!! read ascii adjoint sources - do irec_local = 1, nadj_rec_local - irec = number_adjsources_global(irec_local) - ! compute source arrays - ! reads in **net**.**sta**.**BH**.adj files - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) - call compute_arrays_adjoint_source(adj_source_file,irec_local) - enddo + ! reads adjoint source files + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ELASTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + else + ! default ASCII format + if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' + !!! read ascii adjoint sources + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + endif endif ! if (ibool_read_adj_arrays) if (it < NSTEP) then @@ -277,6 +294,7 @@ subroutine compute_add_sources_poroelastic() ! adjoint/backward simulations if (SIMULATION_TYPE == 3 .and. nsources_local > 0) then + ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return diff --git a/src/specfem3D/compute_add_sources_viscoelastic.F90 b/src/specfem3D/compute_add_sources_viscoelastic.F90 index ac1c15bf4..745fff9b2 100644 --- a/src/specfem3D/compute_add_sources_viscoelastic.F90 +++ b/src/specfem3D/compute_add_sources_viscoelastic.F90 @@ -75,23 +75,9 @@ subroutine compute_add_sources_viscoelastic(accel) character(len=MAX_STRING_LEN) :: adj_source_file - ! sets current initial time - if (USE_LDDRK) then - ! LDDRK - ! note: the LDDRK scheme updates displacement after the stiffness computations and - ! after adding boundary/coupling/source terms. - ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme - ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. - time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 - else if (LTS_MODE) then - ! current local time - time_t = current_lts_time - else - time_t = dble(it-1)*DT - t0 - endif - ! forward simulations if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -99,6 +85,21 @@ subroutine compute_add_sources_viscoelastic(accel) ! because the source is precisely the wavefield coming from the DSM traction file if (COUPLE_WITH_INJECTION_TECHNIQUE) return + ! sets current initial time + if (USE_LDDRK) then + ! LDDRK + ! note: the LDDRK scheme updates displacement after the stiffness computations and + ! after adding boundary/coupling/source terms. + ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme + ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. + time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 + else if (LTS_MODE) then + ! current local time + time_t = current_lts_time + else + time_t = dble(it-1)*DT - t0 + endif + ! openmp solver !$OMP PARALLEL if (NSOURCES > 100) & !$OMP DEFAULT(SHARED) & @@ -201,29 +202,30 @@ subroutine compute_add_sources_viscoelastic(accel) ! this must be done carefully, otherwise the adjoint sources may be added twice if (ibool_read_adj_arrays .and. .not. INVERSE_FWI_FULL_PROBLEM) then ! reads adjoint source files - if (.not. (SU_FORMAT .or. READ_ADJSRC_ASDF)) then - ! ASCII formant - if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' - !!! read ascii adjoint sources + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ELASTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format do irec_local = 1, nadj_rec_local ! reads in **net**.**sta**.**BH**.adj files irec = number_adjsources_global(irec_local) - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - else if (READ_ADJSRC_ASDF) then - ! ASDF format + else + ! default ASCII format + if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' + !!! read ascii adjoint sources do irec_local = 1, nadj_rec_local ! reads in **net**.**sta**.**BH**.adj files irec = number_adjsources_global(irec_local) - adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - call compute_arrays_adjoint_source(adj_source_file, irec_local) - else - ! SU format - call compute_arrays_adjoint_source_SU() - endif !if (.not. SU_FORMAT) + endif endif ! if (ibool_read_adj_arrays) ! adds source term @@ -284,6 +286,7 @@ subroutine compute_add_sources_viscoelastic(accel) ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density ! therefore, we must add it here, before applying the inverse of mass matrix endif + ! note: NOISE_TOMOGRAPHY == 3 step is done in backward routine endif end subroutine compute_add_sources_viscoelastic @@ -340,28 +343,6 @@ subroutine compute_add_sources_viscoelastic_backward(b_accel) ! because the source is precisely the wavefield coming from the DSM traction file if (COUPLE_WITH_INJECTION_TECHNIQUE) return - ! iteration step - if (UNDO_ATTENUATION_AND_OR_PML) then - ! example: NSTEP is a multiple of NT_DUMP_ATTENUATION - ! NT_DUMP_ATTENUATION = 301, NSTEP = 1204, NSUBSET_ITERATIONS = 4, iteration_on_subset = 1 -> 4, - ! 1. subset, it_temp goes from 301 down to 1 - ! 2. subset, it_temp goes from 602 down to 302 - ! 3. subset, it_temp goes from 903 down to 603 - ! 4. subset, it_temp goes from 1204 down to 904 - !valid for multiples only: - !it_tmp = iteration_on_subset * NT_DUMP_ATTENUATION - it_of_this_subset + 1 - ! - ! example: NSTEP is **NOT** a multiple of NT_DUMP_ATTENUATION - ! NT_DUMP_ATTENUATION = 301, NSTEP = 900, NSUBSET_ITERATIONS = 3, iteration_on_subset = 1 -> 3 - ! 1. subset, it_temp goes from (900 - 602) = 298 down to 1 - ! 2. subset, it_temp goes from (900 - 301) = 599 down to 299 - ! 3. subset, it_temp goes from (900 - 0) = 900 down to 600 - !works always: - it_tmp = NSTEP - (NSUBSET_ITERATIONS - iteration_on_subset)*NT_DUMP_ATTENUATION - it_of_this_subset + 1 - else - it_tmp = it - endif - ! NOTE: adjoint sources and backward wavefield timing: ! idea is to start with the backward field b_displ,.. at time (T) ! and convolve with the adjoint field at time (T-t) @@ -386,25 +367,51 @@ subroutine compute_add_sources_viscoelastic_backward(b_accel) ! adjoint source traces which start at -t0 and end at time (NSTEP-1)*DT - t0 ! for step it=1: (NSTEP -it + 1)*DT - t0 for backward wavefields corresponds to time T - ! sets current initial time - if (USE_LDDRK) then - ! LDDRK - ! note: the LDDRK scheme updates displacement after the stiffness computations and - ! after adding boundary/coupling/source terms. - ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme - ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. + ! adjoint simulations + if (NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + + ! iteration step if (UNDO_ATTENUATION_AND_OR_PML) then - ! stepping moves forward from snapshot position - time_t = dble(NSTEP-it_tmp-1)*DT + dble(C_LDDRK(istage))*DT - t0 + ! example: NSTEP is a multiple of NT_DUMP_ATTENUATION + ! NT_DUMP_ATTENUATION = 301, NSTEP = 1204, NSUBSET_ITERATIONS = 4, iteration_on_subset = 1 -> 4, + ! 1. subset, it_temp goes from 301 down to 1 + ! 2. subset, it_temp goes from 602 down to 302 + ! 3. subset, it_temp goes from 903 down to 603 + ! 4. subset, it_temp goes from 1204 down to 904 + !valid for multiples only: + !it_tmp = iteration_on_subset * NT_DUMP_ATTENUATION - it_of_this_subset + 1 + ! + ! example: NSTEP is **NOT** a multiple of NT_DUMP_ATTENUATION + ! NT_DUMP_ATTENUATION = 301, NSTEP = 900, NSUBSET_ITERATIONS = 3, iteration_on_subset = 1 -> 3 + ! 1. subset, it_temp goes from (900 - 602) = 298 down to 1 + ! 2. subset, it_temp goes from (900 - 301) = 599 down to 299 + ! 3. subset, it_temp goes from (900 - 0) = 900 down to 600 + !works always: + it_tmp = NSTEP - (NSUBSET_ITERATIONS - iteration_on_subset)*NT_DUMP_ATTENUATION - it_of_this_subset + 1 else - time_t = dble(NSTEP-it_tmp-1)*DT - dble(C_LDDRK(istage))*DT - t0 + it_tmp = it endif - else - time_t = dble(NSTEP-it_tmp)*DT - t0 - endif -! adjoint simulations - if (NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + ! sets current initial time + if (USE_LDDRK) then + ! LDDRK + ! note: the LDDRK scheme updates displacement after the stiffness computations and + ! after adding boundary/coupling/source terms. + ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme + ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. + if (UNDO_ATTENUATION_AND_OR_PML) then + ! stepping moves forward from snapshot position + time_t = dble(NSTEP-it_tmp-1)*DT + dble(C_LDDRK(istage))*DT - t0 + else + time_t = dble(NSTEP-it_tmp-1)*DT - dble(C_LDDRK(istage))*DT - t0 + endif + else + ! Newmark + ! note: b_displ() is read in after Newmark time scheme, thus + ! b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT. + ! thus indexing is NSTEP - it , instead of NSTEP - it - 1 + time_t = dble(NSTEP-it_tmp)*DT - t0 + endif ! backward source reconstruction do isource = 1,NSOURCES @@ -429,7 +436,9 @@ subroutine compute_add_sources_viscoelastic_backward(b_accel) do j = 1,NGLLY do i = 1,NGLLX iglob = ibool(i,j,k,ispec) - b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k) * stf_used + b_accel(1,iglob) = b_accel(1,iglob) + sourcearrays(isource,1,i,j,k) * stf_used + b_accel(2,iglob) = b_accel(2,iglob) + sourcearrays(isource,2,i,j,k) * stf_used + b_accel(3,iglob) = b_accel(3,iglob) + sourcearrays(isource,3,i,j,k) * stf_used enddo enddo enddo @@ -472,8 +481,8 @@ subroutine compute_add_sources_viscoelastic_GPU() use shared_parameters, only: DT, & SIMULATION_TYPE,NOISE_TOMOGRAPHY,INVERSE_FWI_FULL_PROBLEM, & - USE_LDDRK,LTS_MODE,GPU_MODE,UNDO_ATTENUATION_AND_OR_PML, & - SU_FORMAT,USE_BINARY_FOR_SEISMOGRAMS, & + USE_LDDRK,LTS_MODE, & + SU_FORMAT,READ_ADJSRC_ASDF,USE_BINARY_FOR_SEISMOGRAMS, & NSTEP,NTSTEP_BETWEEN_READ_ADJSRC use specfem_par, only: station_name,network_name, & @@ -510,11 +519,20 @@ subroutine compute_add_sources_viscoelastic_GPU() character(len=MAX_STRING_LEN) :: adj_source_file - ! checks if anything to do - if (.not. GPU_MODE) return + ! note: this routine will only take care of adding contributions to accel() wavefield array. + ! it mimicks exactly what the routine compute_add_sources_viscoelastic() is doing. + ! + ! thus, it deals with the CMT/force/noise source contributions for forward simulations, + ! and the adjoint source contributions for pure adjoint or kernel simulations. + ! + ! we will not consider the backward b_accel() wavefield contributions. + ! that is, the re-injection of the CMT/force/noise source contribution into + ! the backward wavefield b_accel() is not done here. + ! those will be done in the routine compute_add_sources_viscoelastic_backward_GPU(). ! forward simulations if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -600,6 +618,7 @@ subroutine compute_add_sources_viscoelastic_GPU() ! adjoint simulations if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then + ! adds adjoint source in this partitions if (nadj_rec_local > 0) then @@ -619,24 +638,34 @@ subroutine compute_add_sources_viscoelastic_GPU() ! with other partitions while calculate for the inner part ! this must be done carefully, otherwise the adjoint sources may be added twice if (ibool_read_adj_arrays .and. .not. INVERSE_FWI_FULL_PROBLEM) then - - if (.not. SU_FORMAT) then - ! ASCII format + ! reads adjoint source files + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ELASTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + else + ! default ASCII format if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' !!! read ascii adjoint sources do irec_local = 1, nadj_rec_local ! reads in **net**.**sta**.**BH**.adj files irec = number_adjsources_global(irec_local) - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - else - ! SU format - call compute_arrays_adjoint_source_SU() - endif !if (.not. SU_FORMAT) - + endif endif ! if (ibool_read_adj_arrays) + ! adds source term if (it < NSTEP) then call add_sources_el_sim_type_2_or_3(Mesh_pointer, & source_adjoint, & @@ -648,50 +677,6 @@ subroutine compute_add_sources_viscoelastic_GPU() endif ! nadj_rec_local endif !adjoint -! note: b_displ() is read in after Newmark time scheme, thus -! b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT. -! thus indexing is NSTEP - it , instead of NSTEP - it - 1 - - ! adjoint/backward wavefield - if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then - ! ignore CMT sources for fault rupture simulations - if (FAULT_SIMULATION) return - - ! no source inside the mesh if we are coupling with DSM - ! nothing left to do, can exit routine... - if (COUPLE_WITH_INJECTION_TECHNIQUE) return - - if (NSOURCES > 0) then - do isource = 1,NSOURCES - ! current time - if (USE_LDDRK) then - ! LDDRK - ! note: the LDDRK scheme updates displacement after the stiffness computations and - ! after adding boundary/coupling/source terms. - ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme - ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. - if (UNDO_ATTENUATION_AND_OR_PML) then - ! stepping moves forward from snapshot position - time_source_dble = dble(NSTEP-it-1)*DT + dble(C_LDDRK(istage))*DT - t0 - tshift_src(isource) - else - time_source_dble = dble(NSTEP-it-1)*DT - dble(C_LDDRK(istage))*DT - t0 - tshift_src(isource) - endif - else - time_source_dble = dble(NSTEP-it)*DT - t0 - tshift_src(isource) - endif - - ! determines source time function value - stf = get_stf_viscoelastic(time_source_dble,isource,NSTEP-it+1) - - ! stores precomputed source time function factor - stf_pre_compute(isource) = stf - enddo - - ! only implements SIMTYPE=3 - call compute_add_sources_el_s3_cuda(Mesh_pointer,stf_pre_compute,NSOURCES) - endif - endif ! adjoint - ! for noise simulations if (NOISE_TOMOGRAPHY > 0) then ! we have two loops indicated by iphase ("inner elements/points" or "boundary elements/points") @@ -713,14 +698,8 @@ subroutine compute_add_sources_viscoelastic_GPU() ! note the ensemble forward sources are generally distributed on the surface of the earth ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density ! therefore, we must add it here, before applying the inverse of mass matrix - else if (NOISE_TOMOGRAPHY == 3) then - ! third step of noise tomography, i.e., read the surface movie saved at every timestep - ! use the movie to reconstruct the ensemble forward wavefield - ! the ensemble adjoint wavefield is done as usual - ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal - call noise_read_add_surface_movie_GPU(noise_surface_movie,it,num_free_surface_faces, & - Mesh_pointer,NOISE_TOMOGRAPHY) endif + ! note: NOISE_TOMOGRAPHY == 3 step is done in backward routine endif end subroutine compute_add_sources_viscoelastic_GPU @@ -731,10 +710,11 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() use constants use specfem_par, only: nsources_local,tshift_src,dt,t0, & - USE_LDDRK,istage, & - NSOURCES,it,SIMULATION_TYPE,NSTEP, & - NOISE_TOMOGRAPHY, & - Mesh_pointer,GPU_MODE + num_free_surface_faces, & + USE_LDDRK,istage, & + NSOURCES,it,SIMULATION_TYPE,NSTEP, & + NOISE_TOMOGRAPHY, & + Mesh_pointer ! coupling use shared_parameters, only: COUPLE_WITH_INJECTION_TECHNIQUE @@ -743,6 +723,8 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() use specfem_par, only: UNDO_ATTENUATION_AND_OR_PML,NSUBSET_ITERATIONS,NT_DUMP_ATTENUATION, & iteration_on_subset,it_of_this_subset + use specfem_par_noise, only: noise_surface_movie + ! faults use specfem_par, only: FAULT_SIMULATION @@ -758,7 +740,6 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() ! checks if anything to do if (SIMULATION_TYPE /= 3) return - if (.not. GPU_MODE) return ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -767,30 +748,31 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() ! because the source is precisely the wavefield coming from the DSM traction file if (COUPLE_WITH_INJECTION_TECHNIQUE) return - ! iteration step - if (UNDO_ATTENUATION_AND_OR_PML) then - ! example: NSTEP is a multiple of NT_DUMP_ATTENUATION - ! NT_DUMP_ATTENUATION = 301, NSTEP = 1204, NSUBSET_ITERATIONS = 4, iteration_on_subset = 1 -> 4, - ! 1. subset, it_temp goes from 301 down to 1 - ! 2. subset, it_temp goes from 602 down to 302 - ! 3. subset, it_temp goes from 903 down to 603 - ! 4. subset, it_temp goes from 1204 down to 904 - !valid for multiples only: - !it_tmp = iteration_on_subset * NT_DUMP_ATTENUATION - it_of_this_subset + 1 - ! - ! example: NSTEP is **NOT** a multiple of NT_DUMP_ATTENUATION - ! NT_DUMP_ATTENUATION = 301, NSTEP = 900, NSUBSET_ITERATIONS = 3, iteration_on_subset = 1 -> 3 - ! 1. subset, it_temp goes from (900 - 602) = 298 down to 1 - ! 2. subset, it_temp goes from (900 - 301) = 599 down to 299 - ! 3. subset, it_temp goes from (900 - 0) = 900 down to 600 - !works always: - it_tmp = NSTEP - (NSUBSET_ITERATIONS - iteration_on_subset)*NT_DUMP_ATTENUATION - it_of_this_subset + 1 - else - it_tmp = it - endif - ! forward simulations if (NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + + ! iteration step + if (UNDO_ATTENUATION_AND_OR_PML) then + ! example: NSTEP is a multiple of NT_DUMP_ATTENUATION + ! NT_DUMP_ATTENUATION = 301, NSTEP = 1204, NSUBSET_ITERATIONS = 4, iteration_on_subset = 1 -> 4, + ! 1. subset, it_temp goes from 301 down to 1 + ! 2. subset, it_temp goes from 602 down to 302 + ! 3. subset, it_temp goes from 903 down to 603 + ! 4. subset, it_temp goes from 1204 down to 904 + !valid for multiples only: + !it_tmp = iteration_on_subset * NT_DUMP_ATTENUATION - it_of_this_subset + 1 + ! + ! example: NSTEP is **NOT** a multiple of NT_DUMP_ATTENUATION + ! NT_DUMP_ATTENUATION = 301, NSTEP = 900, NSUBSET_ITERATIONS = 3, iteration_on_subset = 1 -> 3 + ! 1. subset, it_temp goes from (900 - 602) = 298 down to 1 + ! 2. subset, it_temp goes from (900 - 301) = 599 down to 299 + ! 3. subset, it_temp goes from (900 - 0) = 900 down to 600 + !works always: + it_tmp = NSTEP - (NSUBSET_ITERATIONS - iteration_on_subset)*NT_DUMP_ATTENUATION - it_of_this_subset + 1 + else + it_tmp = it + endif + ! sets current initial time if (USE_LDDRK) then ! LDDRK @@ -805,6 +787,10 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() time_t = dble(NSTEP-it_tmp-1)*DT - dble(C_LDDRK(istage))*DT - t0 endif else + ! Newmark + ! note: b_displ() is read in after Newmark time scheme, thus + ! b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT. + ! thus indexing is NSTEP - it , instead of NSTEP - it - 1 time_t = dble(NSTEP-it_tmp)*DT - t0 endif @@ -818,13 +804,24 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() ! stores precomputed source time function factor stf_pre_compute(isource) = stf enddo + ! only implements SIMTYPE=3 call compute_add_sources_el_s3_cuda(Mesh_pointer,stf_pre_compute,NSOURCES) endif ! for noise simulations if (NOISE_TOMOGRAPHY > 0) then - stop 'for NOISE simulations, backward GPU routine is not implemented yet' + ! we have two loops indicated by iphase ("inner elements/points" or "boundary elements/points") + ! here, we add all noise sources once, when we are calculating for boundary points (iphase==1), + ! because boundary points are calculated first! + if (NOISE_TOMOGRAPHY == 3) then + ! third step of noise tomography, i.e., read the surface movie saved at every timestep + ! use the movie to reconstruct the ensemble forward wavefield + ! the ensemble adjoint wavefield is done as usual + ! note instead of "NSTEP-it+1", now we use "it", since reconstruction is a reversal of reversal + call noise_read_add_surface_movie_GPU(noise_surface_movie,it,num_free_surface_faces, & + Mesh_pointer,NOISE_TOMOGRAPHY) + endif endif end subroutine compute_add_sources_viscoelastic_backward_GPU @@ -895,13 +892,13 @@ double precision function get_stf_viscoelastic(time_source_dble,isource,it_tmp_e ! Brune source time function ! hdur is the source duration or the rise time ! Frequency parameter: - f0=1.d0/hdur(isource) + f0 = 1.d0/hdur(isource) stf = comp_source_time_function_brune(time_source_dble,f0) case (6) ! Smoothed Brune source time function ! hdur is the source duration or the rise time ! Frequency parameter: - f0=1.d0/hdur(isource) + f0 = 1.d0/hdur(isource) stf = comp_source_time_function_smooth_brune(time_source_dble,f0) case default stop 'unsupported force_stf value!' diff --git a/src/specfem3D/compute_arrays_source.f90 b/src/specfem3D/compute_arrays_source.f90 index 3d835744a..e8bc4d3a3 100644 --- a/src/specfem3D/compute_arrays_source.f90 +++ b/src/specfem3D/compute_arrays_source.f90 @@ -214,9 +214,8 @@ subroutine compute_arrays_adjoint_source(adj_source_file,irec_local) ! local integer :: icomp, itime, ier, it_start, it_end, it_sub_adj - real(kind=CUSTOM_REAL), dimension(NDIM,NTSTEP_BETWEEN_READ_ADJSRC) :: adj_src - real(kind=CUSTOM_REAL), dimension(NSTEP) :: adj_source_asdf - double precision :: junk + real(kind=CUSTOM_REAL), dimension(NTSTEP_BETWEEN_READ_ADJSRC) :: adj_source_asdf + real(kind=CUSTOM_REAL) :: val,junk ! note: should have same order as orientation in write_seismograms_to_file() character(len=3),dimension(NDIM) :: comp character(len=MAX_STRING_LEN) :: filename @@ -231,31 +230,30 @@ subroutine compute_arrays_adjoint_source(adj_source_file,irec_local) it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1 it_end = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1 - adj_src(:,:) = 0._CUSTOM_REAL - itime = 0 - if (READ_ADJSRC_ASDF) then ! ASDF format do icomp = 1,NDIM ! 3 components + ! format: "net_sta_comp" filename = trim(adj_source_file) // '_' // comp(icomp) - ! would skip read and set source artificially to zero if out of bounds, - ! see comments above - if (it_start == 0 .and. itime == 0) then - adj_src(icomp,1) = 0._CUSTOM_REAL - cycle - endif - + ! reads full trace (NSTEP) call read_adjoint_sources_ASDF(filename, adj_source_asdf, it_start, it_end) - ! stores source array - adj_src(icomp,:) = adj_source_asdf(:) + ! debug - check whether we read the correct block + !if (icomp == 1) print *, junk, adj_source_asdf(itime-it_start+1,icomp) + + ! store the block we need + do itime = it_start, it_end + ! store adjoint trace + source_adjoint(icomp,irec_local,itime-it_start+1) = adj_source_asdf(itime-it_start+1) + enddo enddo else ! ASCII format ! loops over components do icomp = 1, NDIM + ! format: "SEM/net.sta.comp.adj" filename = OUTPUT_FILES(1:len_trim(OUTPUT_FILES))//'/../SEM/'//trim(adj_source_file)//'.'//comp(icomp)//'.adj' open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ier) @@ -268,19 +266,20 @@ subroutine compute_arrays_adjoint_source(adj_source_file,irec_local) !! skip unused blocks do itime = 1, it_start-1 read(IIN,*,iostat=ier) junk, junk - if (ier /= 0) & - call exit_MPI(myrank, & - 'file '//trim(filename)//' has wrong length, please check with your simulation duration (1111)') + if (ier /= 0) then + call exit_MPI(myrank,'file '//trim(filename)//' has wrong length, please check with your simulation duration (1)') + endif enddo !! read the block we need do itime = it_start, it_end - read(IIN,*,iostat=ier) junk, source_adjoint(icomp,irec_local,itime-it_start+1) - !!! used to check whether we read the correct block - ! if (icomp==1) print *, junk, adj_src(itime-it_start+1,icomp) - if (ier /= 0) & - call exit_MPI(myrank, & - 'file '//trim(filename)//' has wrong length, please check with your simulation duration (2222)') + read(IIN,*,iostat=ier) junk, val + if (ier /= 0) then + call exit_MPI(myrank,'file '//trim(filename)//' has wrong length, please check with your simulation duration (2)') + endif + + ! store adjoint trace + source_adjoint(icomp,irec_local,itime-it_start+1) = val enddo close(IIN) @@ -294,18 +293,23 @@ end subroutine compute_arrays_adjoint_source !------------------------------------------------------------------------------------------------- ! - subroutine compute_arrays_adjoint_source_SU() + subroutine compute_arrays_adjoint_source_SU(idomain) use specfem_par, only: myrank,source_adjoint,it,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC,nrec_local - use shared_parameters, only: ACOUSTIC_SIMULATION,ELASTIC_SIMULATION use constants implicit none + integer, intent(in) :: idomain + ! local parameters real(kind=CUSTOM_REAL), dimension(NTSTEP_BETWEEN_READ_ADJSRC) :: adj_temp integer :: ier, irec_local, it_start, it_sub_adj + logical :: found_adjoint_files ! note: should have same order as orientation in write_seismograms_to_file() - character(len=MAX_STRING_LEN) :: procname, filename + character(len=MAX_STRING_LEN) :: procname, filename_p, filename_x, filename_y, filename_z + + ! check if anything to read for this slice + if (nrec_local < 1) return ! range of the block we need to read it_sub_adj = ceiling( dble(it)/dble(NTSTEP_BETWEEN_READ_ADJSRC) ) @@ -313,53 +317,120 @@ subroutine compute_arrays_adjoint_source_SU() write(procname,"(i4)") myrank - if (ACOUSTIC_SIMULATION) then + ! check if we have adjoint traces + found_adjoint_files = .false. + + select case(idomain) + case (IDOMAIN_ACOUSTIC) + ! get acoustic adjoint traces + ! SU adjoint file name + filename_p = trim(OUTPUT_FILES)//'../SEM/'//trim(adjustl(procname))//'_p_SU.adj' + + ! check if file exists + inquire(file=trim(filename_p),exist=found_adjoint_files) + + ! read file + if (found_adjoint_files) then + ! user output + if (myrank == 0) then + write(IMAIN,*) 'reading acoustic adjoint traces:' + write(IMAIN,*) ' ',trim(filename_p) + write(IMAIN,*) ' using SU_FORMAT' + write(IMAIN,*) + write(IMAIN,*) ' start index = ',it_start + write(IMAIN,*) ' trace length = ',NTSTEP_BETWEEN_READ_ADJSRC + write(IMAIN,*) + call flush_IMAIN() + endif - filename = trim(OUTPUT_FILES)//'../SEM/'//trim(adjustl(procname))//'_p_SU.adj' - open(unit=IIN_SU1,file=filename,status='old',access='stream',iostat = ier) - if (ier /= 0) call exit_MPI(myrank,'file '//trim(filename)//' does not exist') - do irec_local = 1,nrec_local - read(IIN_SU1,pos=4*((irec_local-1)*(60+NSTEP) + 60 + it_start)+1 ) adj_temp - source_adjoint(1,irec_local,:) = adj_temp(:) - source_adjoint(2,irec_local,:) = 0.0 !TRIVIAL - source_adjoint(3,irec_local,:) = 0.0 !TRIVIAL - enddo - close(IIN_SU1) + open(unit=IIN_SU1,file=trim(filename_p),status='old',access='stream',iostat = ier) + if (ier /= 0) call exit_MPI(myrank,'file '//trim(filename_p)//' does not exist') + do irec_local = 1,nrec_local + read(IIN_SU1,pos=4*((irec_local-1)*(60+NSTEP) + 60 + it_start)+1 ) adj_temp + source_adjoint(1,irec_local,:) = adj_temp(:) + source_adjoint(2,irec_local,:) = 0.0_CUSTOM_REAL !TRIVIAL + source_adjoint(3,irec_local,:) = 0.0_CUSTOM_REAL !TRIVIAL + enddo + close(IIN_SU1) + endif + + case (IDOMAIN_ELASTIC) + ! get elastic adjoint traces + ! SU adjoint file names + ! x-direction traces + filename_x = trim(OUTPUT_FILES)//'../SEM/'//trim(adjustl(procname))//'_dx_SU.adj' + ! y-direction traces + filename_y = trim(OUTPUT_FILES)//'../SEM/'//trim(adjustl(procname))//'_dy_SU.adj' + ! z-direction traces + filename_z = trim(OUTPUT_FILES)//'../SEM/'//trim(adjustl(procname))//'_dz_SU.adj' + + ! check if x-file exists + inquire(file=trim(filename_x),exist=found_adjoint_files) + if (found_adjoint_files) then + ! check if y-file exists + inquire(file=trim(filename_y),exist=found_adjoint_files) + if (found_adjoint_files) then + ! check if z-file exists + inquire(file=trim(filename_z),exist=found_adjoint_files) + endif + endif + + if (found_adjoint_files) then + ! user output + if (myrank == 0) then + write(IMAIN,*) 'reading elastic adjoint traces:' + write(IMAIN,*) ' ',trim(filename_x) + write(IMAIN,*) ' ',trim(filename_y) + write(IMAIN,*) ' ',trim(filename_z) + write(IMAIN,*) ' using SU_FORMAT' + write(IMAIN,*) + write(IMAIN,*) ' start index = ',it_start + write(IMAIN,*) ' trace length = ',NTSTEP_BETWEEN_READ_ADJSRC + write(IMAIN,*) + call flush_IMAIN() + endif - else if (ELASTIC_SIMULATION) then + ! opens files + open(unit=IIN_SU1,file=trim(filename_x),status='old',access='stream',iostat = ier) + if (ier /= 0) call exit_MPI(myrank,'file '//trim(filename_x)//' does not exist') - filename = trim(OUTPUT_FILES)//'../SEM/'//trim(adjustl(procname))//'_dx_SU.adj' - open(unit=IIN_SU1,file=filename,status='old',access='stream',iostat = ier) - if (ier /= 0) call exit_MPI(myrank,'file '//trim(filename)//' does not exist') + open(unit=IIN_SU2,file=trim(filename_y),status='old',access='stream',iostat = ier) + if (ier /= 0) call exit_MPI(myrank,'file '//trim(filename_y)//' does not exist') - filename = trim(OUTPUT_FILES)//'../SEM/'//trim(adjustl(procname))//'_dy_SU.adj' - open(unit=IIN_SU2,file=filename,status='old',access='stream',iostat = ier) - if (ier /= 0) call exit_MPI(myrank,'file '//trim(filename)//' does not exist') + open(unit=IIN_SU3,file=trim(filename_z),status='old',access='stream',iostat = ier) + if (ier /= 0) call exit_MPI(myrank,'file '//trim(filename_z)//' does not exist') - filename = trim(OUTPUT_FILES)//'../SEM/'//trim(adjustl(procname))//'_dz_SU.adj' - open(unit=IIN_SU3,file=filename,status='old',access='stream',iostat = ier) - if (ier /= 0) call exit_MPI(myrank,'file '//trim(filename)//' does not exist') + ! reads traces + do irec_local = 1,nrec_local + read(IIN_SU1,pos=4*((irec_local-1)*(60+NSTEP) + 60 + it_start)+1 ) adj_temp + source_adjoint(1,irec_local,:) = adj_temp(:) - do irec_local = 1,nrec_local - read(IIN_SU1,pos=4*((irec_local-1)*(60+NSTEP) + 60 + it_start)+1 ) adj_temp - source_adjoint(1,irec_local,:) = adj_temp(:) + read(IIN_SU2,pos=4*((irec_local-1)*(60+NSTEP) + 60 + it_start)+1 ) adj_temp + source_adjoint(2,irec_local,:) = adj_temp(:) - read(IIN_SU2,pos=4*((irec_local-1)*(60+NSTEP) + 60 + it_start)+1 ) adj_temp - source_adjoint(2,irec_local,:) = adj_temp(:) + read(IIN_SU3,pos=4*((irec_local-1)*(60+NSTEP) + 60 + it_start)+1 ) adj_temp + source_adjoint(3,irec_local,:) = adj_temp(:) + enddo - read(IIN_SU3,pos=4*((irec_local-1)*(60+NSTEP) + 60 + it_start)+1 ) adj_temp - source_adjoint(3,irec_local,:) = adj_temp(:) - enddo + close(IIN_SU1) + close(IIN_SU2) + close(IIN_SU3) + endif - close(IIN_SU1) - close(IIN_SU2) - close(IIN_SU3) + case (IDOMAIN_POROELASTIC) + ! not implemented yet + call exit_MPI(myrank,'SU_FORMAT not implemented for adjoint sources in poroelastic domains yet') - else + case default + ! domain not recognized + call exit_MPI(myrank,'Invalid domain for SU_FORMAT adjoint sources') - call exit_MPI(myrank,'SU_FORMAT not implemented for adjoint poroelastic simulations yet') + end select - endif + ! debug - check if file found + !if (.not. found_adjoint_files) then + ! call exit_MPI(myrank,'Found no adjoint traces in SU_FORMAT') + !endif end subroutine compute_arrays_adjoint_source_SU diff --git a/src/specfem3D/compute_forces_acoustic_calling_routine.f90 b/src/specfem3D/compute_forces_acoustic_calling_routine.f90 index 9cf371905..a51cc2f9f 100644 --- a/src/specfem3D/compute_forces_acoustic_calling_routine.f90 +++ b/src/specfem3D/compute_forces_acoustic_calling_routine.f90 @@ -606,14 +606,25 @@ subroutine compute_forces_acoustic_GPU_calling() ! local parameters integer:: iphase - ! safety check + ! runs with the additionally optimized GPU routine + ! (combines forward/backward fields in main compute_kernel_acoustic) if (.not. GPU_MODE) return - ! check + + ! safety check + if (SIMULATION_TYPE /= 3) & + call exit_MPI(myrank,'routine compute_forces_acoustic_GPU_calling() works only for SIMULATION_TYPE == 3') + + ! checks if for kernel simulation with both, forward & backward fields + if (UNDO_ATTENUATION_AND_OR_PML) return ! pure elastic / acoustic simulation w/out attenuation + if (ELASTIC_SIMULATION .and. ACOUSTIC_SIMULATION) return ! single domain only - coupling requires switching ordering + + ! safety check if (PML_CONDITIONS) call exit_MPI(myrank,'PML conditions for acoustic domains not yet implemented on GPUs') ! enforces free surface (zeroes potentials at free surface) - call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,1) - if (SIMULATION_TYPE == 3) call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,3) + ! assumes SIMULATION_TYPE == 3 + call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,1) ! 1 == forward + call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,3) ! 3 == backward ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions do iphase = 1,2 @@ -634,8 +645,9 @@ subroutine compute_forces_acoustic_GPU_calling() ! elastic coupling if (ELASTIC_SIMULATION) then - call compute_coupling_ac_el_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,1) ! 1 == forward - if (SIMULATION_TYPE == 3) call compute_coupling_ac_el_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,3) + ! assumes SIMULATION_TYPE == 3 + call compute_coupling_ac_el_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,1) ! 1 == forward + call compute_coupling_ac_el_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,3) ! 3 == backward endif ! poroelastic coupling @@ -660,7 +672,7 @@ subroutine compute_forces_acoustic_GPU_calling() ! adds sources ! note: we will add all source contributions in the first pass, when iphase == 1 ! to avoid calling the same routine twice and to check if the source element is an inner/outer element - ! + ! assumes SIMULATION_TYPE == 3 call compute_add_sources_acoustic_GPU() ! forward/adjoint sources call compute_add_sources_acoustic_backward_GPU() ! backward sources endif ! iphase @@ -680,43 +692,37 @@ subroutine compute_forces_acoustic_GPU_calling() request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh) ! adjoint simulations - if (SIMULATION_TYPE == 3) then - call transfer_boun_pot_from_device(Mesh_pointer, & - b_buffer_send_scalar_ext_mesh, & - 3) ! -- 3 == adjoint b_accel - - call assemble_MPI_scalar_send_cuda(NPROC, & - b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, & - num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & - nibool_interfaces_ext_mesh, & - my_neighbors_ext_mesh, & - b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh) + ! assumes SIMULATION_TYPE == 3 + call transfer_boun_pot_from_device(Mesh_pointer, & + b_buffer_send_scalar_ext_mesh, & + 3) ! -- 3 == adjoint b_accel - endif + call assemble_MPI_scalar_send_cuda(NPROC, & + b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, & + num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh, & + my_neighbors_ext_mesh, & + b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh) else - ! waits for send/receive requests to be completed and assembles values call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, & Mesh_pointer, & buffer_recv_scalar_ext_mesh, & num_interfaces_ext_mesh, & max_nibool_interfaces_ext_mesh, & - ! nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, & - 1) + 1) ! 1 == forward ! adjoint simulations - if (SIMULATION_TYPE == 3) then - call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, & - Mesh_pointer, & - b_buffer_recv_scalar_ext_mesh, & - num_interfaces_ext_mesh, & - max_nibool_interfaces_ext_mesh, & - ! nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & - b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh, & - 3) - endif + ! assumes SIMULATION_TYPE == 3 + call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, & + Mesh_pointer, & + b_buffer_recv_scalar_ext_mesh, & + num_interfaces_ext_mesh, & + max_nibool_interfaces_ext_mesh, & + b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh, & + 3) ! 3 == backward endif enddo @@ -741,9 +747,10 @@ subroutine compute_forces_acoustic_GPU_calling() call kernel_3_acoustic_cuda(Mesh_pointer,deltatover2,b_deltatover2,0) ! 0 == both -! enforces free surface (zeroes potentials at free surface) - call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,1) - if (SIMULATION_TYPE == 3) call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,3) + ! enforces free surface (zeroes potentials at free surface) + ! assumes SIMULATION_TYPE == 3 + call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,1) ! 1 == forward + call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,3) ! 3 == backward end subroutine compute_forces_acoustic_GPU_calling diff --git a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 index 873cb01fb..54bac90a7 100644 --- a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 +++ b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 @@ -66,7 +66,7 @@ subroutine compute_forces_viscoelastic_calling() .and. .not. UNDO_ATTENUATION_AND_OR_PML & .and. .not. (ELASTIC_SIMULATION .and. ACOUSTIC_SIMULATION)) then ! runs with the additionally optimized GPU routine - ! (combines forward/backward fields in main compute_kernel_acoustic) + ! (combines forward/backward fields in main compute kernels) call compute_forces_viscoelastic_GPU_calling() ! all done return @@ -484,7 +484,7 @@ subroutine compute_forces_viscoelastic_backward_calling() .and. .not. (ELASTIC_SIMULATION .and. ACOUSTIC_SIMULATION)) then ! runs with the additionally optimized GPU routine ! (combines forward/backward fields in main compute_kernel_acoustic) - ! all done in compute_forces_acoustic_GPU_calling() + ! all done in compute_forces_viscoelastic_GPU_calling() return endif endif @@ -726,10 +726,18 @@ subroutine compute_forces_viscoelastic_GPU_calling() integer:: iphase + ! runs with the additionally optimized GPU routine + ! (combines forward/backward fields in main compute kernels) + if (.not. GPU_MODE) return + ! safety check if (SIMULATION_TYPE /= 3) & call exit_MPI(myrank,'routine compute_forces_viscoelastic_GPU_calling() works only for SIMULATION_TYPE == 3') + ! checks if for kernel simulation with both, forward & backward fields + if (UNDO_ATTENUATION_AND_OR_PML) return ! pure elastic / acoustic simulation w/out attenuation + if (ELASTIC_SIMULATION .and. ACOUSTIC_SIMULATION) return ! single domain only - coupling requires switching ordering + ! distinguishes two runs: for elements in contact with MPI interfaces, and elements within the partitions do iphase = 1,2 @@ -738,7 +746,7 @@ subroutine compute_forces_viscoelastic_GPU_calling() call compute_forces_viscoelastic_cuda(Mesh_pointer, iphase, deltat, & nspec_outer_elastic, & nspec_inner_elastic, & - COMPUTE_AND_STORE_STRAIN,ATTENUATION,0) ! 0 == both + COMPUTE_AND_STORE_STRAIN,ATTENUATION,0) ! 0 == both combined ! while inner elements compute "Kernel_2", we wait for MPI to ! finish and transfer the boundary terms to the device asynchronously @@ -768,15 +776,15 @@ subroutine compute_forces_viscoelastic_GPU_calling() call compute_stacey_viscoelastic_GPU(iphase,num_abs_boundary_faces, & NSTEP,it, & b_num_abs_boundary_faces,b_reclen_field,b_absorb_field, & - Mesh_pointer,0) + Mesh_pointer,0) ! 0 == both combined endif ! acoustic coupling if (ACOUSTIC_SIMULATION) then if (num_coupling_ac_el_faces > 0) then + ! assumes SIMULATION_TYPE == 3 call compute_coupling_el_ac_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,1) ! 1 == forward - if (SIMULATION_TYPE == 3) & - call compute_coupling_el_ac_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,3) ! 3 == backward + call compute_coupling_el_ac_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,3) ! 3 == backward endif endif @@ -799,8 +807,9 @@ subroutine compute_forces_viscoelastic_GPU_calling() ! adds source term (single-force/moment-tensor solution) ! note: we will add all source contributions in the first pass, when iphase == 1 ! to avoid calling the same routine twice and to check if the source element is an inner/outer element - call compute_add_sources_viscoelastic_GPU() - + ! assumes SIMULATION_TYPE == 3 + call compute_add_sources_viscoelastic_GPU() ! forward/adjoint sources (into accel) + call compute_add_sources_viscoelastic_backward_GPU() ! backward sources (into b_accel) endif ! iphase ! assemble all the contributions between slices using MPI @@ -814,36 +823,36 @@ subroutine compute_forces_viscoelastic_GPU_calling() call transfer_boundary_from_device_a(Mesh_pointer,nspec_outer_elastic) ! adjoint simulations - if (SIMULATION_TYPE == 3) then - call transfer_boun_accel_from_device(Mesh_pointer, & - b_buffer_send_vector_ext_mesh, & - 3) ! 3 == adjoint b_accel + ! assumes SIMULATION_TYPE == 3 + call transfer_boun_accel_from_device(Mesh_pointer, & + b_buffer_send_vector_ext_mesh, & + 3) ! 3 == adjoint b_accel - call assemble_MPI_vector_send_cuda(NPROC, & - b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, & - num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & - nibool_interfaces_ext_mesh, & - my_neighbors_ext_mesh, & - b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh) - endif !adjoint + call assemble_MPI_vector_send_cuda(NPROC, & + b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, & + num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh, & + my_neighbors_ext_mesh, & + b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh) else ! waits for send/receive requests to be completed and assembles values - call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer, & + call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, & + Mesh_pointer, & buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh, & max_nibool_interfaces_ext_mesh, & nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & request_send_vector_ext_mesh,request_recv_vector_ext_mesh, & - 1) + 1) ! 1 == forward ! adjoint simulations - if (SIMULATION_TYPE == 3) then - call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer, & - b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh, & - max_nibool_interfaces_ext_mesh, & - nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & - b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh, & - 3) - endif !adjoint + ! assumes SIMULATION_TYPE == 3 + call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, & + Mesh_pointer, & + b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh, & + max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & + b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh, & + 3) ! 3 == backward endif enddo @@ -858,13 +867,15 @@ subroutine compute_forces_viscoelastic_GPU_calling() endif ! multiplies with inverse of mass matrix (note: rmass has been inverted already) + ! assumes SIMULATION_TYPE == 3 call kernel_3_a_cuda(Mesh_pointer,deltatover2,b_deltatover2,APPROXIMATE_OCEAN_LOAD,1) ! 1 == forward - if (SIMULATION_TYPE == 3) call kernel_3_a_cuda(Mesh_pointer,deltatover2,b_deltatover2,APPROXIMATE_OCEAN_LOAD,3) ! 3 == backward + call kernel_3_a_cuda(Mesh_pointer,deltatover2,b_deltatover2,APPROXIMATE_OCEAN_LOAD,3) ! 3 == backward ! updates acceleration with ocean load term if (APPROXIMATE_OCEAN_LOAD) then + ! assumes SIMULATION_TYPE == 3 call compute_coupling_ocean_cuda(Mesh_pointer,1) ! 1 == forward - if (SIMULATION_TYPE == 3) call compute_coupling_ocean_cuda(Mesh_pointer,3) ! 3 == backward + call compute_coupling_ocean_cuda(Mesh_pointer,3) ! 3 == backward ! updates velocities ! Newmark finite-difference time scheme with elastic domains: @@ -883,8 +894,9 @@ subroutine compute_forces_viscoelastic_GPU_calling() ! corrector: ! updates the velocity term which requires a(t+delta) ! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass + ! assumes SIMULATION_TYPE == 3 call kernel_3_b_cuda(Mesh_pointer,deltatover2,b_deltatover2,1) ! 1 == forward - if (SIMULATION_TYPE == 3) call kernel_3_b_cuda(Mesh_pointer,deltatover2,b_deltatover2,3) ! 3 == backward + call kernel_3_b_cuda(Mesh_pointer,deltatover2,b_deltatover2,3) ! 3 == backward endif end subroutine compute_forces_viscoelastic_GPU_calling diff --git a/src/specfem3D/compute_kernels.f90 b/src/specfem3D/compute_kernels.f90 index 41365d52d..18f780084 100644 --- a/src/specfem3D/compute_kernels.f90 +++ b/src/specfem3D/compute_kernels.f90 @@ -464,7 +464,7 @@ subroutine compute_kernels_Hessian() real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: b_accel_elm,accel_elm,b_veloc_elm real(kind=CUSTOM_REAL), dimension(5) :: b_epsilondev_loc real(kind=CUSTOM_REAL) :: b_eps_trace_l - real(kind=CUSTOM_REAL) :: kappal,rhol + real(kind=CUSTOM_REAL) :: kappal_inv,rhol integer :: i,j,k,ispec,iglob ! updates Hessian kernels @@ -500,9 +500,9 @@ subroutine compute_kernels_Hessian() + b_veloc_elm(2,i,j,k) * b_veloc_elm(2,i,j,k) & + b_veloc_elm(3,i,j,k) * b_veloc_elm(3,i,j,k)) - kappal = 1._CUSTOM_REAL / kappastore(i,j,k,ispec) + kappal_inv = 1._CUSTOM_REAL / kappastore(i,j,k,ispec) hess_kappa_ac_kl(i,j,k,ispec) = hess_kappa_ac_kl(i,j,k,ispec) & - + deltat * kappal & + + deltat * kappal_inv & * b_potential_dot_acoustic(iglob) & * b_potential_dot_acoustic(iglob) diff --git a/src/specfem3D/noise_tomography.f90 b/src/specfem3D/noise_tomography.f90 index 97a595560..d6fc7d2ac 100644 --- a/src/specfem3D/noise_tomography.f90 +++ b/src/specfem3D/noise_tomography.f90 @@ -894,7 +894,7 @@ end subroutine noise_read_add_surface_movie ! step 2/3: calculate/reconstruct the "ensemble forward wavefield" ! read surface movie (displacement) at every time steps, injected as the source of "ensemble forward wavefield" ! in step 2, call noise_read_add_surface_movie_GPU(..., NSTEP-it+1 ,...) -! in step 3, call noise_read_add_surface_movie(..., it ,...) +! in step 3, call noise_read_add_surface_movie_GPU(..., it ,...) subroutine noise_read_add_surface_movie_GPU(noise_surface_movie,it,num_free_surface_faces, & Mesh_pointer,NOISE_TOMOGRAPHY) @@ -913,12 +913,11 @@ subroutine noise_read_add_surface_movie_GPU(noise_surface_movie,it,num_free_surf ! reads in ensemble noise sources at surface if (num_free_surface_faces > 0) then - ! read surface movie call read_abs(2,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLSQUARE*num_free_surface_faces,it) + ! adds noise movie field on GPU call noise_read_add_surface_movie_cu(Mesh_pointer,noise_surface_movie,NOISE_TOMOGRAPHY) - endif end subroutine noise_read_add_surface_movie_GPU