Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nanobenchmarks #4143

Merged
merged 22 commits into from
Dec 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions Principia.sln
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testing_utilities", "shared
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "functions", "functions\functions.vcxproj", "{7CCA653C-2E8F-4FFD-9E9F-BEE590F3EFAB}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nanobenchmarks", "nanobenchmarks\nanobenchmarks.vcxproj", "{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -693,6 +695,30 @@ Global
{7CCA653C-2E8F-4FFD-9E9F-BEE590F3EFAB}.Release|x64.Build.0 = Release|x64
{7CCA653C-2E8F-4FFD-9E9F-BEE590F3EFAB}.Release|x86.ActiveCfg = Release|Win32
{7CCA653C-2E8F-4FFD-9E9F-BEE590F3EFAB}.Release|x86.Build.0 = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|Any CPU.ActiveCfg = Debug|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|Any CPU.Build.0 = Debug|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|x64.ActiveCfg = Debug|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|x64.Build.0 = Debug|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|x86.ActiveCfg = Debug|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|x86.Build.0 = Debug|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|Any CPU.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|Any CPU.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|x64.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|x64.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|x86.ActiveCfg = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|x86.Build.0 = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|Any CPU.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|Any CPU.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|x64.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|x64.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|x86.ActiveCfg = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|x86.Build.0 = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|Any CPU.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|Any CPU.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|x64.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|x64.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|x86.ActiveCfg = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -715,6 +741,8 @@ Global
EndGlobalSection
GlobalSection(SharedMSBuildProjectFiles) = preSolution
shared\base.vcxitems*{0fd08cdf-228c-48c6-8690-cf0a72cf6c69}*SharedItemsImports = 4
shared\base.vcxitems*{1c6654c0-14e2-4a9e-b0e6-508b84fa8a0e}*SharedItemsImports = 4
shared\numerics.vcxitems*{1c6654c0-14e2-4a9e-b0e6-508b84fa8a0e}*SharedItemsImports = 4
shared\base.vcxitems*{273987f9-5e73-43e6-868e-e9d3c137f01a}*SharedItemsImports = 4
shared\numerics.vcxitems*{273987f9-5e73-43e6-868e-e9d3c137f01a}*SharedItemsImports = 4
shared\base.vcxitems*{2e28828e-8364-4962-a9ff-c20a72eb884c}*SharedItemsImports = 4
Expand Down
67 changes: 38 additions & 29 deletions documentation/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,7 @@ @article{Kutta1901
url = {https://archive.org/details/zeitschriftfrma12runggoog/page/435},
date = {1901-11},
journaltitle = {Zeitschrift für Mathematik und Physik},
pages = {435453},
pages = {435--453},
title = {Beitrag zur näherungsweisen Integration totaler Differentialgleichungen},
volume = {46},
}
Expand Down Expand Up @@ -1170,8 +1170,8 @@ @article{Newhall1989
volume = {45},
}

@article{NguyễnStehlé2009,
author = {Nguyễn, Phong Q. and Stehlé, Damien},
@article{NguyễnStehlé2009,
author = {Nguyễn, Phong Q. and Stehlé, Damien},
url = {https://doi.org/10.1137/070705702},
date = {2009},
doi = {10.1137/070705702},
Expand Down Expand Up @@ -1728,7 +1728,7 @@ @book{Meeus1998
}

@book{MullerBrisebarreDeDinechinJeannerodLefevreMelquiondRevolStehleTorres2010,
author = {Muller, Jean-Michel and Brisebarre, Nicolas and De Dinechin, Florent and Jeannerod, Claude-Pierre and Lefèvre, Vincent and Melquiond, Guillaume and Revol, Nathalie and Stehlé, Damien and Torres, Serge},
author = {Muller, Jean-Michel and Brisebarre, Nicolas and De Dinechin, Florent and Jeannerod, Claude-Pierre and Lefèvre, Vincent and Melquiond, Guillaume and Revol, Nathalie and Stehlé, Damien and Torres, Serge},
publisher = {Birkhäuser},
date = {2010},
isbn = {978-0-8176-4704-9},
Expand All @@ -1752,7 +1752,7 @@ @book{NistHMF2010
}

@book{NocedalWright2006,
author = {Nocedal, Jorge, and Wright, Stephen J.},
author = {Nocedal, Jorge and Wright, Stephen J.},
publisher = {Springer},
date = {2006},
isbn = {978-0387-30303-1},
Expand Down Expand Up @@ -1869,7 +1869,7 @@ @inbook{ZatloukalJohnsonLadner2002
booktitle = {Near Neighbor Searches, and Methodology: Fifth and Sixth DIMACS Implementation Challenges},
date = {2002},
isbn = {0821828924},
pages = {69-86},
pages = {69--86},
title = {Nearest Neighbor Search for Data Compression},
}

Expand Down Expand Up @@ -2074,32 +2074,32 @@ @inproceedings{SofroniouSpaletta2002
venue = {Amsterdam, The Netherlands},
}

@inproceedings{StehléZimmermann2005,
author = {Stehlé, Damien and Zimmermann, Paul},
editor = {Montuschi, Paolo and Schwarz, Eric},
publisher = {IEEE Computer Society},
booktitle = {17th IEEE Symposium on Computer Arithmetic (ARITH'05)},
date = {2005-06},
doi = {10.1109/ARITH.2005.24},
eventdate = {2005-06-27/2005-06-29},
isbn = {0-7695-2366-8},
pages = {257--264},
title = {Gal's accurate tables method revisited},
venue = {Cape Cod, MA, USA},
@inproceedings{StehléZimmermann2005,
author = {Stehlé, Damien and Zimmermann, Paul},
editor = {Montuschi, Paolo and Schwarz, Eric},
publisher = {IEEE Computer Society},
booktitle = {17th IEEE Symposium on Computer Arithmetic (ARITH'05)},
date = {2005-06},
doi = {10.1109/ARITH.2005.24},
eventdate = {2005-06-27/2005-06-29},
isbn = {0-7695-2366-8},
pages = {257--264},
title = {Gal's accurate tables method revisited},
venue = {Cape Cod, MA, USA},
}

@inproceedings{WuZhang1991,
author = {Wu, Xiaolin and Zhang, Kaizhong},
editor = {Storer, James A. and Reif, John H.},
publisher = {IEEE Computer Society},
booktitle = {1991 Data Compression Conference},
date = {1991-04},
doi = {10.1109/DCC.1991.213341},
eventdate = {1991-04-08/1991-04-11},
isbn = {0-8186-9202-2},
pages = {392-401},
title = {A better tree-structured vector quantizer},
venue = {Snowbird, UT, USA},
author = {Wu, Xiaolin and Zhang, Kaizhong},
editor = {Storer, James A. and Reif, John H.},
publisher = {IEEE Computer Society},
booktitle = {1991 Data Compression Conference},
date = {1991-04},
doi = {10.1109/DCC.1991.213341},
eventdate = {1991-04-08/1991-04-11},
isbn = {0-8186-9202-2},
pages = {392--401},
title = {A better tree-structured vector quantizer},
venue = {Snowbird, UT, USA},
}

@mvbook{Fontenelle1758,
Expand Down Expand Up @@ -2224,6 +2224,15 @@ @report{LongCappellariVelezFuchs
type = {techreport},
}

@report{Paoloni2010,
author = {Paoloni, Gabriele},
institution = {Intel Corporation},
date = {2010-09},
number = {324264-001},
title = {How to Benchmark Code Execution Times on Intel® IA-32 and IA-64 Instruction Set Architectures},
type = {White Paper},
}

@report{RiesBettadpurEanesKangKoMcCulloughNagelPiePooleRichterSaveTapley2016,
author = {Ries, J. and Bettadpur, S. and Eanes, R. and Kang, Z. and Ko, U. and McCullough, C. and Nagel, P. and Pie, N. and Poole, S. and Save, H. and Tapley, B.},
institution = {Center for Space Research at the University of Texas at Austin},
Expand Down
Binary file modified documentation/bibliography.pdf
Binary file not shown.
58 changes: 58 additions & 0 deletions nanobenchmarks/examples.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <emmintrin.h>

#include "nanobenchmarks/function_registry.hpp"
#include "numerics/cbrt.hpp"

namespace principia {
namespace nanobenchmarks {
namespace _examples {

using namespace principia::numerics::_cbrt;

BENCHMARKED_FUNCTION(twice) {
return 2 * x;
}

BENCHMARKED_FUNCTION(thrice) {
return 3 * x;
}

BENCHMARKED_FUNCTION(inc) {
return x + 1;
}

BENCHMARKED_FUNCTION(add_4_times) {
return x * x * x * x * x;
}

BENCHMARKED_FUNCTION(add_16_times) {
return x + x + x + x + x + x + x + x + x + x + x + x + x + x + x + x + x;
}

BENCHMARKED_FUNCTION(square_root) {
__m128d x_0 = _mm_set_sd(x);
return _mm_cvtsd_f64(_mm_sqrt_sd(x_0, x_0));
}

BENCHMARKED_FUNCTION(sqrt_sqrt) {
__m128d x_0 = _mm_set_sd(x);
x_0 = _mm_sqrt_sd(x_0, x_0);
return _mm_cvtsd_f64(_mm_sqrt_sd(x_0, x_0));
}

BENCHMARKED_FUNCTION(square_root_division) {
__m128d x_0 = _mm_set_sd(x);
return _mm_cvtsd_f64(_mm_div_sd(x_0, _mm_sqrt_sd(x_0, x_0)));
}
BENCHMARK_FUNCTION(Cbrt);

using namespace principia::numerics::_cbrt::internal;

BENCHMARK_FUNCTION(method_3²ᴄZ5¹::Cbrt<Rounding::Faithful>);
BENCHMARK_FUNCTION(method_3²ᴄZ5¹::Cbrt<Rounding::Correct>);
BENCHMARK_FUNCTION(method_5²Z4¹FMA::Cbrt<Rounding::Faithful>);
BENCHMARK_FUNCTION(method_5²Z4¹FMA::Cbrt<Rounding::Correct>);

} // namespace _examples
} // namespace nanobenchmarks
} // namespace principia
42 changes: 42 additions & 0 deletions nanobenchmarks/function_registry.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "nanobenchmarks/function_registry.hpp"

#include <functional>
#include <map>
#include <string>

#include "glog/logging.h"

namespace principia {
namespace nanobenchmarks {
namespace _function_registry {
namespace internal {

bool FunctionRegistry::Register(std::string_view name,
BenchmarkedFunction function) {
CHECK(singleton_.names_by_function_.emplace(function, name).second)
<< " Registering function " << function << " as " << name << ": "
<< "function already registered as "
<< singleton_.names_by_function_[function];
CHECK(singleton_.functions_by_name_.emplace(name, function).second)
<< " Registering function " << function << " as " << name << ": "
<< " name already taken by "
<< singleton_.functions_by_name_.find(name)->second;
return true;
}

FunctionRegistry& FunctionRegistry::singleton_ = *new FunctionRegistry();

std::map<std::string, BenchmarkedFunction, std::less<>> const&
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use absl maps everywhere.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are ordered, so that the tests show up in alphabetical(ish) order; we don’t use absl ordered maps unless we have a good reason to.

FunctionRegistry::functions_by_name() {
return singleton_.functions_by_name_;
}

std::map<BenchmarkedFunction, std::string> const&
FunctionRegistry::names_by_function() {
return singleton_.names_by_function_;
}

} // namespace internal
} // namespace _function_registry
} // namespace nanobenchmarks
} // namespace principia
61 changes: 61 additions & 0 deletions nanobenchmarks/function_registry.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#pragma once

#include <functional>
#include <map>
#include <string>
#include <string_view>

#include "base/macros.hpp"

namespace principia {
namespace nanobenchmarks {
namespace _function_registry {
namespace internal {

using BenchmarkedFunction = double (*)(double);

class FunctionRegistry {
public:
static bool Register(std::string_view name, BenchmarkedFunction function);
static std::map<std::string, BenchmarkedFunction, std::less<>> const&
functions_by_name();
static std::map<BenchmarkedFunction, std::string> const& names_by_function();

private:
FunctionRegistry() = default;
static FunctionRegistry& singleton_;
std::map<std::string, BenchmarkedFunction, std::less<>> functions_by_name_;
std::map<BenchmarkedFunction, std::string> names_by_function_;
};

#define BENCHMARK_FUNCTION_WITH_NAME(name, ...) \
BENCHMARK_FUNCTION_WITH_NAME_INTERNAL(__LINE__, name, __VA_ARGS__)
#define BENCHMARK_FUNCTION_WITH_NAME_INTERNAL(line, name, ...) \
BENCHMARK_FUNCTION_WITH_NAME_INTERNAL2(line, name, __VA_ARGS__)
#define BENCHMARK_FUNCTION_WITH_NAME_INTERNAL2(line, name, ...) \
namespace { \
static bool registered##line = ::principia::nanobenchmarks:: \
_function_registry::FunctionRegistry::Register(name, &(__VA_ARGS__)); \
}


#define BENCHMARK_FUNCTION(...) \
BENCHMARK_FUNCTION_WITH_NAME(#__VA_ARGS__, __VA_ARGS__)

#define BENCHMARKED_FUNCTION(f) \
double f(double x); \
BENCHMARK_FUNCTION(f); \
double f(double x)

#define BENCHMARK_EXTERN_C_FUNCTION(f) \
extern "C" double f(double); \
BENCHMARK_FUNCTION(f)

} // namespace internal

using internal::BenchmarkedFunction;
using internal::FunctionRegistry;

} // namespace _function_registry
} // namespace nanobenchmarks
} // namespace principia
Loading
Loading