Skip to content

qlibs/prof

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

1 Commit
 
 
 
 
 
 

Repository files navigation

// Overview / Examples / API / FAQ

PROF: C++20 Profiling library

MIT Licence Version Build Try it online

https://en.wikipedia.org/wiki/Profiling_(computer_programming)

Requirements

Optional (docker setup - see FAQ)

Overview

int main() {
  static_assert(prof::profiler<prof::none>);
  static_assert(prof::profiler<prof::callgrind>);
  static_assert(prof::profiler<prof::xray>);
  static_assert(prof::profiler<prof::linux_perf>);
  static_assert(prof::profiler<prof::gperf>);
  static_assert(prof::profiler<prof::intel_vtune>);

  // example
  {
    prof::linux_perf profiler{"/dev/shm/perf"};

    profiler.start();
    // ...
    profiler.stop();
  }
}
# callgrind
$CXX -O3 callgrind.cpp -o callgrind
valgrind --tool=callgrind \
         --instr-atstart=no \
         --branch-sim=yes \
         --dump-instr=yes \
         --collect-jumps=yes \
         ./callgrind
kcachegrind callgrind.*
# llvm-xray
clang++ -O3 -fxray-instrument -fxray-instruction-threshold=1 xray.cpp -o xray
llvm-xray account xray-log.* --top=10 --sort=sum --sortorder=dsc -instr_map=./xray
# linux-perf
$CXX -O3 -g -fno-omit-frame-pointer linux_perf.cpp -o linux_perf
mkfifo /dev/shm/perf
perf stat --control=fifo:/dev/shm/perf --delay=-1 ./linux_perf
perf record --control=fifo:/dev/shm/perf --delay=-1 ./linux_perf
# gperf
$CXX -O3 -g -fno-omit-frame-pointer gperf.cpp -o gperf -lprofiler
CPUPROFILE_FREQUENCY=1000 ./gperf
google-pprof gperf profile.prof
# intel-vtune
VTUNE=/opt/intel/oneapi/vtune/latest
$CXX -I $VTUNE/sdk/include -O3 vtune.cpp -o vtune -L$VTUNE/lib64 -littnotify
$VTUNE/bin64/vtune -collect hotspots -start-paused -result-dir vtune-result -- ./vtune
$VTUNE/bin64/vtune-gui vtune-result

API

namespace prof::inline v1_0_0 {
template<class T>
concept profiler = requires(T t) {
  t.start();
  t.stop();
};

struct none {
  constexpr auto start() { }
  constexpr auto stop() { }
  constexpr auto flush() { } // optional
};

#if __has_include(<valgrind/callgrind.h>)
class callgrind {
 public:
  constexpr explicit callgrind(const char* profile);
  constexpr callgrind(callgrind&&) = default;
  constexpr callgrind(const callgrind&) = delete;
  constexpr ~callgrind() noexcept;
  constexpr auto start();
  constexpr auto stop();
  constexpr auto flush();
};
#endif

#if __has_include(<xray/xray_interface.h>) and \
    __has_include(<xray/xray_log_interface.h>)
class xray {
 public:
  constexpr explicit xray(const char* mode = "xray-fdr",
                          const char* cfg = "xray_logfile_base=xray-log.%");
  constexpr ~xray() noexcept;
  constexpr xray(xray&&) = default;
  constexpr xray(const xray&) = delete;
  [[clang::xray_never_instrument]] constexpr auto start();
  [[clang::xray_never_instrument]] constexpr auto stop();
  constexpr auto flush();
};
#endif

#if __has_include(<fcntl.h>) and __has_include(<unistd.h>)
class linux_perf {
 public:
  constexpr explicit linux_perf(const char* control);
  constexpr linux_perf(linux_perf&&) = default;
  constexpr linux_perf(const linux_perf&) = delete;
  constexpr ~linux_perf() noexcept;
  constexpr auto start();
  constexpr auto stop();
};
#endif

#if __has_include(<gperftools/profiler.h>)
class gperf {
 public:
  constexpr explicit gperf(const char* fname);
  constexpr gperf(gperf&&) = default;
  constexpr gperf(const gperf&) = delete;
  constexpr ~gperf() noexcept;
  constexpr auto start();
  constexpr auto stop();
  constexpr auto flush();
};
#endif

#if __has_include(<ittnotify.h>)
class intel_vtune {
 public:
  constexpr explicit intel_vtune(const char* domain, const char* task);
  constexpr intel_vtune(intel_vtune&&) = default;
  constexpr intel_vtune(const intel_vtune&) = delete;
  constexpr ~intel_vtune() noexcept;
  constexpr auto start();
  constexpr auto stop();
};
#endif
} // namespace prof

FAQ

  • Setup docker (Dockerfile)

    docker build -t prof .
    docker run \
      -it \
      --privileged \
      --network=host \
      -e DISPLAY=${DISPLAY} \
      -v ${PWD}:${PWD} \
      -w ${PWD} \
      prof
  • Setup linux-perf

    sudo mount -o remount,mode=755 /sys/kernel/debug
    sudo mount -o remount,mode=755 /sys/kernel/debug/tracing
    sudo chown `whoami` /sys/kernel/debug/tracing/uprobe_events
    sudo chmod a+rw /sys/kernel/debug/tracing/uprobe_events
    echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
    echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
    echo 1000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
  • Instrumentation with llvm-xray

    [[clang::xray_always_instrument]]
    void always_profile();
    
    [[clang::xray_always_instrument, clang::xray_log_args(1)]]
    void always_profile_and_log_i(int i);
    
    [[clang::xray_never_instrument]]
    void never_profile();
    # profiling threshold
    -fxray-instruction-threshold=1 # default 200 instructions
    # instrumentation info
    llvm-xray extract ./a.out --symbolize

    https://godbolt.org/z/WhsEYf9cc

  • Conditional profiling with callgrind

    prof::callgrind profiler{"example"};
    
    while (true) {
      profiler.start(); // resets profile
    
      if (should_trigger()) {
        trigger();
        profiler.stop();
        proflier.flush(); // dumps `example` profile
      }
    }
    kcachegrind callgrind.* # opens all profiles combined
  • How to integrate with CMake.FetchContent?

    include(FetchContent)
    
    FetchContent_Declare(
      qlibs.prof
      GIT_REPOSITORY https://github.com/qlibs/prof
      GIT_TAG v1.0.0
    )
    
    FetchContent_MakeAvailable(qlibs.prof)
    
    target_link_libraries(${PROJECT_NAME} PUBLIC qlibs.prof);
    
  • Acknowledgments

    https://valgrind.org/docs/manual/cl-manual.html https://llvm.org/docs/XRay.html https://perf.wiki.kernel.org https://github.com/gperftools/gperftools https://www.intel.com/content/www/us/en/docs/vtune-profiler