From df79650a05df98552ab18ebf2125bf23365eeba6 Mon Sep 17 00:00:00 2001 From: Jannis Teunissen Date: Tue, 4 Jun 2024 00:05:33 +0800 Subject: [PATCH] Print information on computational cost at exit --- src/m_fluid.f90 | 10 ++++++++++ src/m_streamer.f90 | 11 ++++++++++- src/streamer.f90 | 29 ++++++++++++++++++++++++++++- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/m_fluid.f90 b/src/m_fluid.f90 index ece75e7f..0eebf3f8 100644 --- a/src/m_fluid.f90 +++ b/src/m_fluid.f90 @@ -25,6 +25,7 @@ subroutine forward_euler(tree, dt, dt_stiff, dt_lim, time, s_deriv, n_prev, & use m_dt use m_transport_data use m_dielectric + use omp_lib type(af_t), intent(inout) :: tree real(dp), intent(in) :: dt !< Time step real(dp), intent(in) :: dt_stiff !< Time step for stiff terms (IMEX) @@ -38,6 +39,7 @@ subroutine forward_euler(tree, dt, dt_stiff, dt_lim, time, s_deriv, n_prev, & integer, intent(in) :: i_step !< Step of the integrator integer, intent(in) :: n_steps !< Total number of steps integer :: ix, id_out + real(dp) :: t1, t2, t3, t4 ! Set current rates to zero; they are summed below ST_current_rates = 0 @@ -47,11 +49,16 @@ subroutine forward_euler(tree, dt, dt_stiff, dt_lim, time, s_deriv, n_prev, & ! Since field_compute is called after performing time integration, we don't ! have to call it again for the first sub-step of the next iteration + t1 = omp_get_wtime() if (i_step > 1) call field_compute(tree, mg, s_deriv, time, .true.) + t2 = omp_get_wtime() + wc_time_field = wc_time_field + t2 - t1 call flux_upwind_tree(tree, flux_num_species, flux_species, s_deriv, & flux_variables, 2, dt_limits(1:2), flux_upwind, flux_direction, & flux_dummy_line_modify, af_limiter_koren_t) + t3 = omp_get_wtime() + wc_time_flux = wc_time_flux + t3 - t2 if (transport_data_ions%n_mobile_ions > 0 .and. & ion_se_yield > 0.0_dp) then @@ -59,10 +66,13 @@ subroutine forward_euler(tree, dt, dt_stiff, dt_lim, time, s_deriv, n_prev, & call af_loop_box(tree, handle_ion_se_flux, .true.) end if + t1 = omp_get_wtime() call flux_update_densities(tree, dt, size(all_densities), & all_densities, flux_num_species, & flux_species, flux_variables, s_deriv, n_prev, s_prev, & w_prev, s_out, add_source_terms, 2, dt_limits(3:4), set_box_mask) + t4 = omp_get_wtime() + wc_time_source = wc_time_source + t4 - t3 if (ST_use_dielectric) then ! Update surface charge and handle photon emission diff --git a/src/m_streamer.f90 b/src/m_streamer.f90 index a28bcd99..fdc8f91c 100644 --- a/src/m_streamer.f90 +++ b/src/m_streamer.f90 @@ -166,7 +166,7 @@ module m_streamer real(dp), public, allocatable :: ST_current_JdotE(:, :) !> Per how many iterations the electric current is computed - integer, public, protected :: current_update_per_steps = 10 + integer, public, protected :: current_update_per_steps = 1000*1000 !> Electric current through electrodes due to J.E real(dp), public :: ST_global_JdotE_current @@ -177,6 +177,15 @@ module m_streamer !> Global sum of J.E real(dp), public :: ST_global_JdotE + ! To keep track of the computational cost of different parts + real(dp), public :: wc_time_flux = 0.0_dp + real(dp), public :: wc_time_source = 0.0_dp + real(dp), public :: wc_time_copy_state = 0.0_dp + real(dp), public :: wc_time_field = 0.0_dp + real(dp), public :: wc_time_output = 0.0_dp + real(dp), public :: wc_time_refine = 0.0_dp + real(dp), public :: wc_time_photoi = 0.0_dp + !> Method used to prolong (interpolate) densities procedure(af_subr_prolong), pointer, public, protected :: & ST_prolongation_method => null() diff --git a/src/streamer.f90 b/src/streamer.f90 index 9c992acf..84edfcc3 100644 --- a/src/streamer.f90 +++ b/src/streamer.f90 @@ -19,6 +19,7 @@ program streamer use m_dielectric use m_units_constants use m_model + use omp_lib implicit none @@ -26,7 +27,7 @@ program streamer integer, parameter :: max_attemps_per_time_step = 10 integer, parameter :: datfile_version = 30 integer(int8) :: t_start, t_current, count_rate - real(dp) :: wc_time, inv_count_rate + real(dp) :: wc_time = 0.0_dp, inv_count_rate real(dp) :: time_last_print, time_last_output integer :: i, it, n, coord_type, box_bytes integer :: n_steps_rejected @@ -49,6 +50,9 @@ program streamer real(dp) :: tmp, field_energy_prev_time logical :: step_accepted, start_of_new_pulse + ! To keep track of the computational cost of different parts + real(dp) :: t1, t2, t3 + !> The configuration for the simulation type(CFG_t) :: cfg !> This contains the full grid information @@ -230,7 +234,10 @@ program streamer end if if (photoi_enabled .and. mod(it, photoi_per_steps) == 0) then + t1 = omp_get_wtime() call photoi_set_src(tree, time - photoi_prev_time) + t2 = omp_get_wtime() + wc_time_photoi = wc_time_photoi + t2 - t1 photoi_prev_time = time end if @@ -242,7 +249,10 @@ program streamer dt_lim = huge_real step_accepted = .false. do n = 1, max_attemps_per_time_step + t1 = omp_get_wtime() call copy_current_state() + t2 = omp_get_wtime() + wc_time_copy_state = wc_time_copy_state + t2 - t1 call af_advance(tree, dt, dt_lim_step, time, all_densities, & time_integrator, forward_euler) @@ -307,7 +317,10 @@ program streamer end if ! Make sure field is available for latest time state + t1 = omp_get_wtime() call field_compute(tree, mg, 0, time, .true.) + t2 = omp_get_wtime() + wc_time_field = wc_time_field + t2 - t1 if (gas_dynamics) then call coupling_add_fluid_source(tree, dt) @@ -349,6 +362,7 @@ program streamer write_out = .true. end if + t1 = omp_get_wtime() if (write_out) then output_cnt = output_cnt + 1 time_last_output = global_time @@ -358,6 +372,8 @@ program streamer ["photon_flux", "surf_dens "], output_name, output_cnt) end if end if + t2 = omp_get_wtime() + wc_time_output = wc_time_output + t2 - t1 if (global_dt < dt_min) error stop "dt too small" @@ -393,10 +409,21 @@ program streamer end if end if end if + + t3 = omp_get_wtime() + wc_time_refine = wc_time_refine + t3 - t2 end do call output_status(tree, time, wc_time, it, dt) + write(*, "(A)") "Computational cost breakdown (%)" + write(*, "(7(A10))") "flux", "source", "copy", "field", "output", & + "refine", "photoi" + write(*, "(7(F10.2))") 1e2*wc_time_flux/wc_time, 1e2*wc_time_source/wc_time, & + 1e2*wc_time_copy_state/wc_time, 1e2*wc_time_field/wc_time, & + 1e2*wc_time_output/wc_time, 1e2*wc_time_refine/wc_time, & + 1e2*wc_time_photoi/wc_time + contains subroutine initialize_modules(cfg, tree, mg, restart)