Skip to content

Commit

Permalink
Merge pull request #4145 from eggrobin/clang-nanobenchmarks
Browse files Browse the repository at this point in the history
More fixing of nanobenchmarks on clang
  • Loading branch information
eggrobin authored Dec 30, 2024
2 parents 2a4c4ac + 10a1625 commit ae7f647
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions nanobenchmarks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,23 @@
#include <utility>
#include <vector>

#include <intrin.h>

#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "base/macros.hpp" // 🧙 For PRINCIPIA_COMPILER_CLANG.
#include "mathematica/logger.hpp"
#include "mathematica/mathematica.hpp"
#include "nanobenchmarks/function_registry.hpp"
#include "nanobenchmarks/microarchitectures.hpp"
#include "nanobenchmarks/performance_settings_controller.hpp"
#include "testing_utilities/statistics.hpp"


#if PRINCIPIA_COMPILER_MSVC
#include <intrin.h>
#endif

ABSL_FLAG(std::size_t,
loop_iterations,
100,
Expand Down Expand Up @@ -149,7 +153,11 @@ __declspec(noinline) LatencyDistributionTable
double const input = absl::GetFlag(FLAGS_input);
double x = input;
// The CPUID barriers prevent out-of-order execution; see [Pao10].
#if PRINCIPIA_COMPILER_MSVC
__cpuid(registers, leaf);
#else
asm volatile("cpuid");
#endif
auto const tsc_start = __rdtsc();
for (int i = 0; i < loop_iterations; ++i) {
x = f(x);
Expand All @@ -164,7 +172,11 @@ __declspec(noinline) LatencyDistributionTable
// globally visible, and subsequent instructions may begin execution before
// the read operation is performed.
auto const tsc_stop = __rdtscp(&tsc_aux);
#if PRINCIPIA_COMPILER_MSVC
__cpuid(registers, leaf);
#else
asm volatile("cpuid");
#endif
double const δtsc = tsc_stop - tsc_start;
samples[j] = δtsc / loop_iterations;
}
Expand Down

0 comments on commit ae7f647

Please sign in to comment.