// Overview / Examples / API / FAQ
https://en.wikipedia.org/wiki/Profiling_(computer_programming)
- C++20 (gcc-11+, clang-12+)
- callgrind (simulation) - https://valgrind.org/docs/manual/cl-manual.html
- llvm-xray (instrumention) - https://llvm.org/docs/XRay.html
- linux-perf (sampling) - https://perf.wiki.kernel.org
- gperftools (sampling) - https://github.com/gperftools/gperftools
- intel-vtune (sampling) - https://www.intel.com/content/www/us/en/docs/vtune-profiler
int main() {
static_assert(prof::profiler<prof::none>);
static_assert(prof::profiler<prof::callgrind>);
static_assert(prof::profiler<prof::xray>);
static_assert(prof::profiler<prof::linux_perf>);
static_assert(prof::profiler<prof::gperf>);
static_assert(prof::profiler<prof::intel_vtune>);
// example
{
prof::linux_perf profiler{"/dev/shm/perf"};
profiler.start();
// ...
profiler.stop();
}
}
# callgrind
$CXX -O3 callgrind.cpp -o callgrind
valgrind --tool=callgrind \
--instr-atstart=no \
--branch-sim=yes \
--dump-instr=yes \
--collect-jumps=yes \
./callgrind
kcachegrind callgrind.*
# llvm-xray
clang++ -O3 -fxray-instrument -fxray-instruction-threshold=1 xray.cpp -o xray
llvm-xray account xray-log.* --top=10 --sort=sum --sortorder=dsc -instr_map=./xray
# linux-perf
$CXX -O3 -g -fno-omit-frame-pointer linux_perf.cpp -o linux_perf
mkfifo /dev/shm/perf
perf stat --control=fifo:/dev/shm/perf --delay=-1 ./linux_perf
perf record --control=fifo:/dev/shm/perf --delay=-1 ./linux_perf
# gperf
$CXX -O3 -g -fno-omit-frame-pointer gperf.cpp -o gperf -lprofiler
CPUPROFILE_FREQUENCY=1000 ./gperf
google-pprof gperf profile.prof
# intel-vtune
VTUNE=/opt/intel/oneapi/vtune/latest
$CXX -I $VTUNE/sdk/include -O3 vtune.cpp -o vtune -L$VTUNE/lib64 -littnotify
$VTUNE/bin64/vtune -collect hotspots -start-paused -result-dir vtune-result -- ./vtune
$VTUNE/bin64/vtune-gui vtune-result
namespace prof::inline v1_0_0 {
template<class T>
concept profiler = requires(T t) {
t.start();
t.stop();
};
struct none {
constexpr auto start() { }
constexpr auto stop() { }
constexpr auto flush() { } // optional
};
#if __has_include(<valgrind/callgrind.h>)
class callgrind {
public:
constexpr explicit callgrind(const char* profile);
constexpr callgrind(callgrind&&) = default;
constexpr callgrind(const callgrind&) = delete;
constexpr ~callgrind() noexcept;
constexpr auto start();
constexpr auto stop();
constexpr auto flush();
};
#endif
#if __has_include(<xray/xray_interface.h>) and \
__has_include(<xray/xray_log_interface.h>)
class xray {
public:
constexpr explicit xray(const char* mode = "xray-fdr",
const char* cfg = "xray_logfile_base=xray-log.%");
constexpr ~xray() noexcept;
constexpr xray(xray&&) = default;
constexpr xray(const xray&) = delete;
[[clang::xray_never_instrument]] constexpr auto start();
[[clang::xray_never_instrument]] constexpr auto stop();
constexpr auto flush();
};
#endif
#if __has_include(<fcntl.h>) and __has_include(<unistd.h>)
class linux_perf {
public:
constexpr explicit linux_perf(const char* control);
constexpr linux_perf(linux_perf&&) = default;
constexpr linux_perf(const linux_perf&) = delete;
constexpr ~linux_perf() noexcept;
constexpr auto start();
constexpr auto stop();
};
#endif
#if __has_include(<gperftools/profiler.h>)
class gperf {
public:
constexpr explicit gperf(const char* fname);
constexpr gperf(gperf&&) = default;
constexpr gperf(const gperf&) = delete;
constexpr ~gperf() noexcept;
constexpr auto start();
constexpr auto stop();
constexpr auto flush();
};
#endif
#if __has_include(<ittnotify.h>)
class intel_vtune {
public:
constexpr explicit intel_vtune(const char* domain, const char* task);
constexpr intel_vtune(intel_vtune&&) = default;
constexpr intel_vtune(const intel_vtune&) = delete;
constexpr ~intel_vtune() noexcept;
constexpr auto start();
constexpr auto stop();
};
#endif
} // namespace prof
-
Setup docker (Dockerfile)
docker build -t prof .
docker run \ -it \ --privileged \ --network=host \ -e DISPLAY=${DISPLAY} \ -v ${PWD}:${PWD} \ -w ${PWD} \ prof
-
Setup linux-perf
sudo mount -o remount,mode=755 /sys/kernel/debug sudo mount -o remount,mode=755 /sys/kernel/debug/tracing sudo chown `whoami` /sys/kernel/debug/tracing/uprobe_events sudo chmod a+rw /sys/kernel/debug/tracing/uprobe_events echo 0 | sudo tee /proc/sys/kernel/kptr_restrict echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid echo 1000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
-
Instrumentation with llvm-xray
[[clang::xray_always_instrument]] void always_profile(); [[clang::xray_always_instrument, clang::xray_log_args(1)]] void always_profile_and_log_i(int i); [[clang::xray_never_instrument]] void never_profile();
# profiling threshold -fxray-instruction-threshold=1 # default 200 instructions
# instrumentation info llvm-xray extract ./a.out --symbolize
-
Conditional profiling with callgrind
prof::callgrind profiler{"example"}; while (true) { profiler.start(); // resets profile if (should_trigger()) { trigger(); profiler.stop(); proflier.flush(); // dumps `example` profile } }
kcachegrind callgrind.* # opens all profiles combined
-
How to integrate with CMake.FetchContent?
include(FetchContent) FetchContent_Declare( qlibs.prof GIT_REPOSITORY https://github.com/qlibs/prof GIT_TAG v1.0.0 ) FetchContent_MakeAvailable(qlibs.prof)
target_link_libraries(${PROJECT_NAME} PUBLIC qlibs.prof);
-
Acknowledgments
https://valgrind.org/docs/manual/cl-manual.html https://llvm.org/docs/XRay.html https://perf.wiki.kernel.org https://github.com/gperftools/gperftools https://www.intel.com/content/www/us/en/docs/vtune-profiler