diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..eafaca4f --- /dev/null +++ b/.clang-format @@ -0,0 +1,160 @@ +--- +# General options +Language: Cpp +Standard: c++17 +DisableFormat: false + +AccessModifierOffset: -4 +AlignAfterOpenBracket: AlwaysBreak +AlignArrayOfStructures: None +AlignConsecutiveAssignments: false +AlignConsecutiveBitFields: false +AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: false +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: + Kind: Never +AllowAllArgumentsOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: false +BinPackParameters: false +BitFieldColonSpacing: Both +BreakAfterAttributes: Never +BreakBeforeBinaryOperators: All +BreakBeforeBraces: Allman +BreakBeforeConceptDeclarations: Always +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeComma +BreakInheritanceList: BeforeComma +BreakStringLiterals: true +ColumnLimit: 119 +CommentPragmas: '^ COMMENT pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: Always +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +IncludeBlocks: Regroup +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: true +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: AfterHash +IndentRequiresClause: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: true +IntegerLiteralSeparator: + Binary: 4 + Decimal: 3 + DecimalMinDigits: 7 + Hex: 4 +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: All +PackConstructorInitializers: CurrentLine +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 # default made explicit here +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 # default made explicit here +PenaltyReturnTypeOnItsOwnLine: 1000 +PointerAlignment: Left +PPIndentWidth: -1 # follow IndentWidth +QualifierAlignment: Custom +QualifierOrder: ['friend', 'static', 'inline', 'constexpr', 'type', 'const', 'volatile', 'restrict'] +ReferenceAlignment: Pointer # follow PointerAlignment +ReflowComments: true +RemoveBracesLLVM: false +RemoveSemicolon: false +RequiresClausePosition: WithPreceding +RequiresExpressionIndentation: OuterScope +ShortNamespaceLines: 0 +SortIncludes: true +SortUsingDeclarations: Lexicographic +SeparateDefinitionBlocks: Always +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceAroundPointerQualifiers: Default # follow PointerAlignment +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: Never +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +TabWidth: 4 +UseCRLF: false +UseTab: Never + +# Project specific options -- uncomment and modify as needed +#AttributeMacros: [] +#ForEachMacros: [] +#IfMacros: [] +IncludeCategories: + # Local headers (in "") above all else + - Regex: '"([A-Za-z0-9.\/-_])+"' + Priority: 1 + # "redGrapes/foo.hpp" after local headers (occur inside redGrapes) + - Regex: '"redGrapes/([A-Za-z0-9.\/-_])+"' + Priority: 2 + # after local headers (occur outside redGrapes in examples and test) + - Regex: '' + Priority: 3 + # C++ standard library headers are the last group to be included + - Regex: '<([A-Za-z0-9\/-_])+>' + Priority: 5 + # Includes that made it this far are third-party headers and will be placed + # below redGrapes's includes + - Regex: '<([A-Za-z0-9.\/-_])+>' + Priority: 4 +# Macros: [] +#NamespaceMacros: [] +#StatementAttributeLikeMacros: [] +#StatementMacros: [] +#TypenameMacros: [] +#WhitespaceSensitiveMacros: [] + +... diff --git a/.gitignore b/.gitignore index e14f2b3b..38b4644f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,14 +11,49 @@ compile_commands.json DartConfiguration.tcl Testing - -docs/build -docs/doxyxml - build/ *.swp -*.orig *.png *.dot + +# tmp files *~ + +# netbeans project files +/nbproject/ + +# Code::Blocks project files +/*.cbp +/*.layout + +# Visual Studio Code configuration files +.vscode + +# JetBrains project files and tmp's +.idea/ +/venv/ + +# python byte code +*.pyc + +# original backup files +*.orig + +# doxygen output +docs/xml/ +docs/doxyxml +docs/doxygen_sqlite3.db +docs/html/ +# sphinx & breathe output +docs/build/ + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Visual Studio configuration and output files +out +/.vs \ No newline at end of file diff --git a/README.md b/README.md index 32d3f9c9..b59a0e7d 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,8 @@ int main() [] ( auto a ) { std::cout << a << std::endl; }, a.read() ); + + rg::finalize(); return 0; } @@ -88,6 +90,8 @@ RedGrapes is documented using in-code doxygen comments and reStructured-text fil * [Getting Started](docs/source/tutorial/index.rst) * [Components](docs/source/components.rst) +* [Contributing](docs/source/contributing.rst) + ## Comparision with Similar Projects diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst new file mode 100644 index 00000000..4106bbbb --- /dev/null +++ b/docs/source/contributing.rst @@ -0,0 +1,30 @@ +############ +Contributing +############ + +Formatting +========== + +Please format your code before before opening pull requests using clang-format 16 and the .clang-format file placed in the repository root. + +Visual Studio and CLion +----------------------- + +Suport for clang-format is built-in since Visual Studio 2017 15.7 and CLion 2019.1. +The .clang-format file in the repository will be automatically detected and formatting is done as you type, or triggered when pressing the format hotkey. + +Bash +---- + +First install clang-format-16. Instructions therefore can be found on the web. +To format your changes since branching off develop, you can run this command in bash: + +.. code-block:: bash + + git clang-format-16 develop + +To format all code in your working copy, you can run this command in bash: + +.. code-block:: bash + + find -iname '*.cpp' -o -iname '*.hpp' | xargs clang-format-16 -i diff --git a/examples/#cholesky.cpp# b/examples/#cholesky.cpp# index 883de4c8..16428e05 100644 --- a/examples/#cholesky.cpp# +++ b/examples/#cholesky.cpp# @@ -1,9 +1,11 @@ -#include -#include -#include #include #include +#include +#include + +#include + #define REDGRAPES_TASK_PROPERTIES redGrapes::LabelProperty #include #include @@ -81,8 +83,21 @@ int main(int argc, char* argv[]) [blksz](auto a, auto b, auto c) { spdlog::info("dgemm"); - cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, - blksz, blksz, blksz, -1.0, *a, blksz, *b, blksz, 1.0, *c, blksz); + cblas_dgemm( + CblasColMajor, + CblasNoTrans, + CblasTrans, + blksz, + blksz, + blksz, + -1.0, + *a, + blksz, + *b, + blksz, + 1.0, + *c, + blksz); }, rg::TaskProperties::Builder().label("gemm"), A[k * nblks + i].read(), @@ -98,8 +113,18 @@ int main(int argc, char* argv[]) [blksz, nblks](auto a, auto c) { spdlog::info("dsyrk"); - cblas_dsyrk(CblasColMajor, CblasLower, CblasNoTrans, - blksz, blksz, -1.0, *a, blksz, 1.0, *c, blksz); + cblas_dsyrk( + CblasColMajor, + CblasLower, + CblasNoTrans, + blksz, + blksz, + -1.0, + *a, + blksz, + 1.0, + *c, + blksz); }, A[i * nblks + j].read(), A[j * nblks + j].write()); @@ -121,9 +146,19 @@ int main(int argc, char* argv[]) [blksz, nblks](auto a, auto b) { spdlog::info("dtrsm"); - cblas_dtrsm(CblasColMajor, - CblasRight, CblasLower, CblasTrans, CblasNonUnit, - blksz, blksz, 1.0, *a, blksz, *b, blksz); + cblas_dtrsm( + CblasColMajor, + CblasRight, + CblasLower, + CblasTrans, + CblasNonUnit, + blksz, + blksz, + 1.0, + *a, + blksz, + *b, + blksz); }, A[j * nblks + j].read(), A[j * nblks + i].write()); diff --git a/examples/1_resources.cpp b/examples/1_resources.cpp index 49845f88..f4980c15 100644 --- a/examples/1_resources.cpp +++ b/examples/1_resources.cpp @@ -6,38 +6,31 @@ */ #include -#include #include +#include #include int main(int, char*[]) { redGrapes::init(1); - redGrapes::FieldResource< std::vector > a; + redGrapes::FieldResource> a; redGrapes::IOResource b; redGrapes::IOResource c; - redGrapes::ResourceUser user1({ - a.read(), // complete resource - a.write().area( {0}, {10} ), // write only indices 0 to 10 - b.write() - }); + redGrapes::ResourceUser user1( + {a.read(), // complete resource + a.write().area({0}, {10}), // write only indices 0 to 10 + b.write()}); - redGrapes::ResourceUser user2({ - b.read() - }); + redGrapes::ResourceUser user2({b.read()}); - redGrapes::ResourceUser user3({ - b.read(), - c.write() - }); + redGrapes::ResourceUser user3({b.read(), c.write()}); - std::cout << "is_serial(user1,user1) = " << redGrapes::ResourceUser::is_serial(user1,user1) << std::endl; - std::cout << "is_serial(user1,user2) = " << redGrapes::ResourceUser::is_serial(user1,user2) << std::endl; - std::cout << "is_serial(user1,user3) = " << redGrapes::ResourceUser::is_serial(user1,user3) << std::endl; - std::cout << "is_serial(user2,user3) = " << redGrapes::ResourceUser::is_serial(user2,user3) << std::endl; + std::cout << "is_serial(user1,user1) = " << redGrapes::ResourceUser::is_serial(user1, user1) << std::endl; + std::cout << "is_serial(user1,user2) = " << redGrapes::ResourceUser::is_serial(user1, user2) << std::endl; + std::cout << "is_serial(user1,user3) = " << redGrapes::ResourceUser::is_serial(user1, user3) << std::endl; + std::cout << "is_serial(user2,user3) = " << redGrapes::ResourceUser::is_serial(user2, user3) << std::endl; redGrapes::finalize(); return 0; } - diff --git a/examples/2_functors.cpp b/examples/2_functors.cpp index f0f3be6f..b5e9b0e4 100644 --- a/examples/2_functors.cpp +++ b/examples/2_functors.cpp @@ -5,28 +5,24 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include - #include #include #include -int square (int x) +#include + +int square(int x) { - return x*x; + return x * x; } int main() { spdlog::set_level(spdlog::level::trace); redGrapes::init(1); - - fmt::print( - "square(2) = {}\n", - redGrapes::emplace_task(square, 2).get() - ); - redGrapes::finalize(); + fmt::print("square(2) = {}\n", redGrapes::emplace_task(square, 2).get()); + + redGrapes::finalize(); return 0; } - diff --git a/examples/3_functors_with_resources.cpp b/examples/3_functors_with_resources.cpp index 674a242d..20364fa4 100644 --- a/examples/3_functors_with_resources.cpp +++ b/examples/3_functors_with_resources.cpp @@ -5,24 +5,24 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include - #include #include +#include +#include +#include + int main(void) { - spdlog::set_level( spdlog::level::trace ); + spdlog::set_level(spdlog::level::trace); redGrapes::init(); - redGrapes::IOResource< int > a, b; + redGrapes::IOResource a, b; for(int i = 0; i < 1; ++i) { redGrapes::emplace_task( - []( auto a ) + [](auto a) { std::cout << "Write to A" << std::endl; std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -32,7 +32,7 @@ int main(void) a.write()); redGrapes::emplace_task( - []( auto a ) + [](auto a) { std::cout << "Read A: " << *a << std::endl; std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -40,18 +40,17 @@ int main(void) a.read()); redGrapes::emplace_task( - []( auto b ) + [](auto b) { std::cout << "Write to B" << std::endl; std::this_thread::sleep_for(std::chrono::seconds(2)); *b = 7; std::cout << "Write B done" << std::endl; }, - b.write() - ); + b.write()); redGrapes::emplace_task( - []( auto a, auto b ) + [](auto a, auto b) { std::cout << "Read A & B: " << *a << ", " << *b << std::endl; std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -61,7 +60,6 @@ int main(void) } redGrapes::finalize(); - + return 0; } - diff --git a/examples/4_refinements.cpp b/examples/4_refinements.cpp index 1a13afc9..e30bf56e 100644 --- a/examples/4_refinements.cpp +++ b/examples/4_refinements.cpp @@ -5,13 +5,13 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include +#include + #include #include +#include -#include - -int main( int, char*[] ) +int main(int, char*[]) { spdlog::set_level(spdlog::level::trace); spdlog::set_pattern("[thread %t] %^[%l]%$ %v"); @@ -21,17 +21,18 @@ int main( int, char*[] ) redGrapes::emplace_task( [] { - std::cout << "f1" << "..." << std::endl; + std::cout << "f1" + << "..." << std::endl; int i = 0; - for( auto t : redGrapes::backtrace() ) + for(auto t : redGrapes::backtrace()) fmt::print("refinement 1 backtrace [{}]: {}\n", i++, t.get().label); redGrapes::emplace_task( [] { fmt::print("Refinement 1\n"); - std::this_thread::sleep_for( std::chrono::seconds(1) ); + std::this_thread::sleep_for(std::chrono::seconds(1)); }); SPDLOG_TRACE("EX: create next task task"); @@ -40,18 +41,18 @@ int main( int, char*[] ) [] { fmt::print("Refinement 2\n"); - std::this_thread::sleep_for( std::chrono::seconds(1) ); + std::this_thread::sleep_for(std::chrono::seconds(1)); int i = 0; - for( auto t : redGrapes::backtrace() ) - fmt::print("refinement 2 backtrace [{}]: {}\n", i++, (redGrapes::TaskProperties const&)t); - } - ).label("Child Task 2"); - } - ).label("Parent Task").submit(); + for(auto t : redGrapes::backtrace()) + fmt::print("refinement 2 backtrace [{}]: {}\n", i++, (redGrapes::TaskProperties const&) t); + }) + .label("Child Task 2"); + }) + .label("Parent Task") + .submit(); redGrapes::finalize(); return 0; } - diff --git a/examples/5_access_demotion.cpp b/examples/5_access_demotion.cpp index 9650fb07..03e3e7c3 100644 --- a/examples/5_access_demotion.cpp +++ b/examples/5_access_demotion.cpp @@ -5,52 +5,46 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include - #include #include #include +#include +#include +#include + namespace rg = redGrapes; -int main( int, char*[] ) +int main(int, char*[]) { spdlog::set_level(spdlog::level::trace); rg::init(); - rg::IOResource< int > a; + rg::IOResource a; rg::emplace_task( - []( auto a ) + [](auto a) { std::cout << "f1 writes A" << std::endl; - std::this_thread::sleep_for( std::chrono::seconds(1) ); + std::this_thread::sleep_for(std::chrono::seconds(1)); std::cout << "f1 now only reads A" << std::endl; rg::update_properties( - rg::TaskProperties::Patch::Builder() - .remove_resources({ a.write() }) - .add_resources({ a.read() }) - ); - std::this_thread::sleep_for( std::chrono::seconds(1) ); + rg::TaskProperties::Patch::Builder().remove_resources({a.write()}).add_resources({a.read()})); + std::this_thread::sleep_for(std::chrono::seconds(1)); - std::cout << "f1 done" << std::endl; + std::cout << "f1 done" << std::endl; }, - a.write() - ); + a.write()); rg::emplace_task( - []( auto a ) + [](auto a) { std::cout << "f2 reads A" << std::endl; std::cout << "f2 done" << std::endl; }, - a.read() - ); + a.read()); rg::finalize(); - + return 0; } - diff --git a/examples/6_resource_scope.cpp b/examples/6_resource_scope.cpp index b6a2bbaf..5d267117 100644 --- a/examples/6_resource_scope.cpp +++ b/examples/6_resource_scope.cpp @@ -6,37 +6,36 @@ */ #include - #include -#include #include +#include namespace rg = redGrapes; int main() { rg::init(1); - rg::IOResource< int > a; // scope-level=0 + rg::IOResource a; // scope-level=0 rg::emplace_task( - []( auto a ) + [](auto a) { std::cout << "scope = " << rg::scope_depth() << std::endl; rg::IOResource b; // scope-level=1 rg::emplace_task( - []( auto b ) + [](auto b) { *b = 1; std::cout << "scope = " << rg::scope_depth() << std::endl; }, - b.write() - ).get(); + b.write()) + .get(); std::cout << "scope = " << rg::scope_depth() << std::endl; }, - a.read() - ).enable_stack_switching(); + a.read()) + .enable_stack_switching(); rg::finalize(); } diff --git a/examples/7_event.cpp b/examples/7_event.cpp index df7551e3..96f12203 100644 --- a/examples/7_event.cpp +++ b/examples/7_event.cpp @@ -7,47 +7,45 @@ #define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_OFF -#include -#include -#include - #include +#include #include #include -#include + +#include +#include +#include int main() { - spdlog::set_level( spdlog::level::trace ); + spdlog::set_level(spdlog::level::trace); spdlog::set_pattern("[thread %t] %^[%l]%$ %v"); redGrapes::init(1); - redGrapes::Resource< redGrapes::access::IOAccess > r1; + redGrapes::Resource r1; auto event_f = redGrapes::emplace_task( - [] { - std::cout << "Task 1" << std::endl; - return redGrapes::create_event(); - } - ).resources({ r1.make_access(redGrapes::access::IOAccess::write) }).submit(); - - redGrapes::emplace_task( - [] { - std::cout << "Task 2" << std::endl; - } - ).resources({ r1.make_access(redGrapes::access::IOAccess::write) }); + [] + { + std::cout << "Task 1" << std::endl; + return redGrapes::create_event(); + }) + .resources({r1.make_access(redGrapes::access::IOAccess::write)}) + .submit(); + + redGrapes::emplace_task([] { std::cout << "Task 2" << std::endl; }) + .resources({r1.make_access(redGrapes::access::IOAccess::write)}); auto event = event_f.get(); std::cout << "Task 1 finished" << std::endl; - std::this_thread::sleep_for( std::chrono::seconds(1) ); + std::this_thread::sleep_for(std::chrono::seconds(1)); std::cout << "notify event" << std::endl; event->notify(); redGrapes::finalize(); - + return 0; } - diff --git a/examples/cholesky.cpp b/examples/cholesky.cpp index 6af49f26..65853130 100644 --- a/examples/cholesky.cpp +++ b/examples/cholesky.cpp @@ -1,5 +1,6 @@ -#include #include + +#include // work-around, see // https://github.com/xianyi/OpenBLAS/issues/1992#issuecomment-459474791 // https://github.com/xianyi/OpenBLAS/pull/1998 @@ -8,10 +9,11 @@ #define lapack_complex_double std::complex // end work-around -#include #include #include +#include + #define REDGRAPES_TASK_PROPERTIES redGrapes::LabelProperty #include @@ -90,8 +92,21 @@ int main(int argc, char* argv[]) [blksz](auto a, auto b, auto c) { spdlog::info("dgemm"); - cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, - blksz, blksz, blksz, -1.0, *a, blksz, *b, blksz, 1.0, *c, blksz); + cblas_dgemm( + CblasColMajor, + CblasNoTrans, + CblasTrans, + blksz, + blksz, + blksz, + -1.0, + *a, + blksz, + *b, + blksz, + 1.0, + *c, + blksz); }, A[k * nblks + i].read(), A[k * nblks + j].read(), @@ -106,8 +121,18 @@ int main(int argc, char* argv[]) [blksz, nblks](auto a, auto c) { spdlog::info("dsyrk"); - cblas_dsyrk(CblasColMajor, CblasLower, CblasNoTrans, - blksz, blksz, -1.0, *a, blksz, 1.0, *c, blksz); + cblas_dsyrk( + CblasColMajor, + CblasLower, + CblasNoTrans, + blksz, + blksz, + -1.0, + *a, + blksz, + 1.0, + *c, + blksz); }, A[i * nblks + j].read(), A[j * nblks + j].write()); @@ -129,9 +154,19 @@ int main(int argc, char* argv[]) [blksz, nblks](auto a, auto b) { spdlog::info("dtrsm"); - cblas_dtrsm(CblasColMajor, - CblasRight, CblasLower, CblasTrans, CblasNonUnit, - blksz, blksz, 1.0, *a, blksz, *b, blksz); + cblas_dtrsm( + CblasColMajor, + CblasRight, + CblasLower, + CblasTrans, + CblasNonUnit, + blksz, + blksz, + 1.0, + *a, + blksz, + *b, + blksz); }, A[j * nblks + j].read(), A[j * nblks + i].write()); diff --git a/examples/config/redGrapes_config.hpp b/examples/config/redGrapes_config.hpp index 9ed168a8..3aff6572 100644 --- a/examples/config/redGrapes_config.hpp +++ b/examples/config/redGrapes_config.hpp @@ -1,8 +1,8 @@ #pragma once -#include #include +#include enum SchedulerTags { @@ -10,33 +10,30 @@ enum SchedulerTags SCHED_CUDA }; -#define REDGRAPES_TASK_PROPERTIES \ - redGrapes::LabelProperty, \ - redGrapes::scheduler::SchedulingTagProperties< SchedulerTags > +#define REDGRAPES_TASK_PROPERTIES \ + redGrapes::LabelProperty, redGrapes::scheduler::SchedulingTagProperties -#define REDGRAPES_ALLOC_CHUNKSIZE ( 64 * 1024 ) +#define REDGRAPES_ALLOC_CHUNKSIZE (64 * 1024) -template <> -struct fmt::formatter< SchedulerTags > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - SchedulerTags const & tag, - FormatContext & ctx - ) + template + auto format(SchedulerTags const& tag, FormatContext& ctx) { switch(tag) { - case SCHED_MPI: return fmt::format_to(ctx.out(), "\"MPI\""); - case SCHED_CUDA: return fmt::format_to(ctx.out(), "\"CUDA\""); - default: return fmt::format_to(ctx.out(), "\"undefined\""); + case SCHED_MPI: + return fmt::format_to(ctx.out(), "\"MPI\""); + case SCHED_CUDA: + return fmt::format_to(ctx.out(), "\"CUDA\""); + default: + return fmt::format_to(ctx.out(), "\"undefined\""); } } }; - - diff --git a/examples/cuda_mandelbrot.cu b/examples/cuda_mandelbrot.cu index 8feaee0a..627b4c9e 100644 --- a/examples/cuda_mandelbrot.cu +++ b/examples/cuda_mandelbrot.cu @@ -7,24 +7,26 @@ #include #include + +#include +#include #include #include -#include -#include -enum SchedulerTag { SCHED_CUDA }; +enum SchedulerTag +{ + SCHED_CUDA +}; -#define REDGRAPES_TASK_PROPERTIES \ - dispatch::cuda::CudaTaskProperties, \ - scheduler::SchedulingTagProperties< SchedulerTag > +#define REDGRAPES_TASK_PROPERTIES dispatch::cuda::CudaTaskProperties, scheduler::SchedulingTagProperties #include -#include -#include +#include #include #include +#include +#include #include -#include namespace rg = redGrapes; @@ -33,8 +35,18 @@ struct Color float r, g, b; }; -__global__ void hello_world() {} -__global__ void mandelbrot(double begin_x, double end_x, double begin_y, double end_y, int buffer_width, int buffer_height, Color * out) +__global__ void hello_world() +{ +} + +__global__ void mandelbrot( + double begin_x, + double end_x, + double begin_y, + double end_y, + int buffer_width, + int buffer_height, + Color* out) { int xi = blockIdx.x * blockDim.x + threadIdx.x; int yi = blockIdx.y * blockDim.y + threadIdx.y; @@ -60,29 +72,16 @@ __global__ void mandelbrot(double begin_x, double end_x, double begin_y, double int main() { - auto default_scheduler = - std::make_shared( 4 /* number of CPU workers */); + auto default_scheduler = std::make_shared(4 /* number of CPU workers */); - auto cuda_scheduler = - std::make_shared( - [](rg::Task const & t) - { - return t.required_scheduler_tags.test(SCHED_CUDA); - }, - 4 /* number of cuda streams */ - ); + auto cuda_scheduler = std::make_shared( + [](rg::Task const& t) { return t.required_scheduler_tags.test(SCHED_CUDA); }, + 4 /* number of cuda streams */ + ); - rg::idle = - [cuda_scheduler] - { - cuda_scheduler->poll(); - }; + rg::idle = [cuda_scheduler] { cuda_scheduler->poll(); }; - rg::init( - rg::scheduler::make_tag_match_scheduler() - .add({}, default_scheduler) - .add({SCHED_CUDA}, cuda_scheduler) - ); + rg::init(rg::scheduler::make_tag_match_scheduler().add({}, default_scheduler).add({SCHED_CUDA}, cuda_scheduler)); double mid_x = 0.41820187155955555; double mid_y = 0.32743154895555555; @@ -91,28 +90,30 @@ int main() size_t height = 4096; size_t area = width * height; - rg::IOResource host_buffer; - rg::IOResource device_buffer; + rg::IOResource host_buffer; + rg::IOResource device_buffer; rg::emplace_task( - [area](auto host_buffer) { - void * ptr; + [area](auto host_buffer) + { + void* ptr; cudaMallocHost(&ptr, area * sizeof(Color)); - *host_buffer = (Color *)ptr; + *host_buffer = (Color*) ptr; }, host_buffer.write()); rg::emplace_task( - [area](auto device_buffer) { - void * ptr; + [area](auto device_buffer) + { + void* ptr; cudaMalloc(&ptr, area * sizeof(Color)); - *device_buffer = (Color *)ptr; + *device_buffer = (Color*) ptr; }, device_buffer.write()); // warmup cuda - //hello_world<<< 1, 1, 0, 0 >>>(); - //cudaMemcpy(*host_buffer, *device_buffer, sizeof(Color), cudaMemcpyDeviceToHost); + // hello_world<<< 1, 1, 0, 0 >>>(); + // cudaMemcpy(*host_buffer, *device_buffer, sizeof(Color), cudaMemcpyDeviceToHost); auto t1 = std::chrono::high_resolution_clock::now(); @@ -124,40 +125,44 @@ int main() * calculate picture */ rg::emplace_task( - [width, height, area, i, mid_x, mid_y, w]( auto device_buffer ) { + [width, height, area, i, mid_x, mid_y, w](auto device_buffer) + { double begin_x = mid_x - w; - double end_x = mid_x + w; + double end_x = mid_x + w; double begin_y = mid_y - w; - double end_y = mid_y + w; + double end_y = mid_y + w; dim3 threadsPerBlock(8, 8); dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y); - mandelbrot<<< - numBlocks, - threadsPerBlock, - 0, - rg::dispatch::cuda::current_stream - >>>( - begin_x, end_x, - begin_y, end_y, - width, height, - *device_buffer - ); - std::cout << "launched kernel to stream " << rg::dispatch::cuda::current_stream << std::endl; + mandelbrot<<>>( + begin_x, + end_x, + begin_y, + end_y, + width, + height, + *device_buffer); + std::cout << "launched kernel to stream " << rg::dispatch::cuda::current_stream << std::endl; }, - rg::TaskProperties::Builder().scheduling_tags({ SCHED_CUDA }), + rg::TaskProperties::Builder().scheduling_tags({SCHED_CUDA}), device_buffer.write()); /* * copy data */ rg::emplace_task( - [area]( auto host_buffer, auto device_buffer ) { - cudaMemcpyAsync(*host_buffer, *device_buffer, area * sizeof(Color), cudaMemcpyDeviceToHost, rg::dispatch::cuda::current_stream); + [area](auto host_buffer, auto device_buffer) + { + cudaMemcpyAsync( + *host_buffer, + *device_buffer, + area * sizeof(Color), + cudaMemcpyDeviceToHost, + rg::dispatch::cuda::current_stream); std::cout << "launched memcpy to stream " << rg::dispatch::cuda::current_stream << std::endl; }, - rg::TaskProperties::Builder().scheduling_tags({ SCHED_CUDA }), + rg::TaskProperties::Builder().scheduling_tags({SCHED_CUDA}), host_buffer.write(), device_buffer.read()); @@ -165,7 +170,8 @@ int main() * write png */ rg::emplace_task( - [width, height, i]( auto host_buffer ) { + [width, height, i](auto host_buffer) + { std::stringstream step; step << std::setw(6) << std::setfill('0') << i; @@ -177,34 +183,27 @@ int main() { for(int x = 0; x < width; ++x) { - auto & color = (*host_buffer)[x + y * width]; + auto& color = (*host_buffer)[x + y * width]; png.plot(x + 1, height - y, color.r, color.g, color.b); } } png.close(); - std::cout << "wrote png" << std::endl; + std::cout << "wrote png" << std::endl; }, host_buffer.read()); } - rg::emplace_task([](auto b){}, host_buffer.write()).get(); + rg::emplace_task([](auto b) {}, host_buffer.write()).get(); auto t2 = std::chrono::high_resolution_clock::now(); - std::cout << "runtime: " << std::chrono::duration_cast( t2 - t1 ).count() << " μs" << std::endl; + std::cout << "runtime: " << std::chrono::duration_cast(t2 - t1).count() << " μs" + << std::endl; /* * cleanup */ - rg::emplace_task( - []( auto host_buffer ) { - cudaFreeHost(*host_buffer); - }, - host_buffer.write()); + rg::emplace_task([](auto host_buffer) { cudaFreeHost(*host_buffer); }, host_buffer.write()); - rg::emplace_task( - []( auto device_buffer ) { - cudaFree(*device_buffer); - }, - device_buffer.write()); + rg::emplace_task([](auto device_buffer) { cudaFree(*device_buffer); }, device_buffer.write()); } diff --git a/examples/game_of_life.cpp b/examples/game_of_life.cpp index 0943ca8e..592bb0b0 100644 --- a/examples/game_of_life.cpp +++ b/examples/game_of_life.cpp @@ -9,125 +9,125 @@ * @file examples/game_of_life.cpp */ +#include +#include +#include +#include +#include + #include #include #include #include #include -#include -#include -#include -#include -#include +struct Vec2 +{ + int x, y; +}; -struct Vec2 { int x, y; }; -enum Cell { DEAD, ALIVE }; -static constexpr Vec2 size { 32, 32 }; -static constexpr Vec2 chunk_size { 4, 4 }; +enum Cell +{ + DEAD, + ALIVE +}; + +static constexpr Vec2 size{32, 32}; +static constexpr Vec2 chunk_size{4, 4}; -Cell next_state( Cell const neighbours [][size.x+2] ) +Cell next_state(Cell const neighbours[][size.x + 2]) { - int count = neighbours[-1][-1] + neighbours[-1][0] + neighbours[-1][1] + - neighbours[0][-1] + neighbours[0][1] + neighbours[1][-1] + - neighbours[1][0] + neighbours[1][1]; - if ( count < 2 || count > 3 ) + int count = neighbours[-1][-1] + neighbours[-1][0] + neighbours[-1][1] + neighbours[0][-1] + neighbours[0][1] + + neighbours[1][-1] + neighbours[1][0] + neighbours[1][1]; + if(count < 2 || count > 3) return DEAD; - else if ( count == 3 ) + else if(count == 3) return ALIVE; else return neighbours[0][0]; } -int main( int, char * [] ) +int main(int, char*[]) { - spdlog::set_level( spdlog::level::trace ); + spdlog::set_level(spdlog::level::trace); spdlog::set_pattern("[thread %t] %^[%l]%$ %v"); redGrapes::init(4); - using Buffer = - std::array< - std::array< - Cell, - size.x+2 - >, - size.y+2 - >; + using Buffer = std::array, size.y + 2>; - std::vector< redGrapes::FieldResource > buffers; + std::vector> buffers; for(size_t i = 0; i < 4; ++i) - buffers.emplace_back( new Buffer() ); + buffers.emplace_back(new Buffer()); int current = 0; // initialization redGrapes::emplace_task( - []( auto buf ) + [](auto buf) { std::default_random_engine generator; std::bernoulli_distribution distribution{0.35}; - for ( size_t x = 0; x < size.x+2; ++x ) - for ( size_t y = 0; y < size.y+2; ++y ) - buf[{x, y}] = distribution( generator ) ? ALIVE : DEAD; + for(size_t x = 0; x < size.x + 2; ++x) + for(size_t y = 0; y < size.y + 2; ++y) + buf[{x, y}] = distribution(generator) ? ALIVE : DEAD; }, - buffers[current].write() - ); + buffers[current].write()); - for ( int generation = 0; generation < 500; ++generation ) + for(int generation = 0; generation < 500; ++generation) { - int next = ( current + 1 ) % buffers.size(); + int next = (current + 1) % buffers.size(); // copy borders redGrapes::emplace_task( - []( auto buf ) + [](auto buf) { - for ( size_t x = 0; x < size.x+2; ++x ) + for(size_t x = 0; x < size.x + 2; ++x) { - buf[{x, 0}] = buf[{x, size.y}];; - buf[{x, size.y+1}] = buf[{x, 1}]; + buf[{x, 0}] = buf[{x, size.y}]; + ; + buf[{x, size.y + 1}] = buf[{x, 1}]; } - for ( size_t y = 0; y < size.y+2; ++y ) + for(size_t y = 0; y < size.y + 2; ++y) { buf[{0, y}] = buf[{size.x, y}]; - buf[{size.x+1, y}] = buf[{1, y}]; + buf[{size.x + 1, y}] = buf[{1, y}]; } }, - buffers[current].write() - ); + buffers[current].write()); // print buffer redGrapes::emplace_task( - []( auto buf ) + [](auto buf) { - for ( size_t x = 1; x < size.x; ++x ) + for(size_t x = 1; x < size.x; ++x) { - for ( size_t y = 1; y < size.y; ++y ) + for(size_t y = 1; y < size.y; ++y) { - std::cout << ( ( buf[{x,y}] == ALIVE ) ? "" : "" ) << " "; + std::cout << ((buf[{x, y}] == ALIVE) ? "" : "") << " "; } std::cout << "" << std::endl; } std::cout << std::endl; }, - buffers[current].read() - ).get(); + buffers[current].read()) + .get(); // calculate next step - for ( size_t x = 1; x <= size.x; x += chunk_size.x ) - for ( size_t y = 1; y <= size.y; y += chunk_size.y ) + for(size_t x = 1; x <= size.x; x += chunk_size.x) + for(size_t y = 1; y <= size.y; y += chunk_size.y) redGrapes::emplace_task( - [x, y]( auto dst, auto src ) + [x, y](auto dst, auto src) { - for ( int xi = 0; xi < chunk_size.x; ++xi ) - for ( int yi = 0; yi < chunk_size.y; ++yi ) - dst[{x+xi, y+yi}] = next_state( (Cell const (*)[size.x+2]) &(src[{x+xi, y+yi}]) ); + for(int xi = 0; xi < chunk_size.x; ++xi) + for(int yi = 0; yi < chunk_size.y; ++yi) + dst[{x + xi, y + yi}] + = next_state((Cell const(*)[size.x + 2]) & (src[{x + xi, y + yi}])); }, buffers[next].write().area({x, y}, {x + chunk_size.x, y + chunk_size.y}), - buffers[current].read().area({x-1, y-1}, {x+chunk_size.x+2, y + chunk_size.y+2}) - ); + buffers[current].read().area({x - 1, y - 1}, {x + chunk_size.x + 2, y + chunk_size.y + 2})); current = next; } @@ -135,7 +135,6 @@ int main( int, char * [] ) redGrapes::finalize(); SPDLOG_DEBUG("END!!!!"); - + return 0; } - diff --git a/examples/mpi.cpp b/examples/mpi.cpp index db37a237..94867e2c 100644 --- a/examples/mpi.cpp +++ b/examples/mpi.cpp @@ -1,12 +1,10 @@ -#include -#include - -#include - +#include #include -#include +#include #include -#include +#include +#include +#include namespace rg = redGrapes; @@ -39,7 +37,7 @@ struct MPIConfig int main() { spdlog::set_pattern("[thread %t] %^[%l]%$ %v"); - spdlog::set_level( spdlog::level::trace ); + spdlog::set_level(spdlog::level::trace); /* int prov; @@ -47,51 +45,53 @@ int main() assert( prov == MPI_THREAD_MULTIPLE ); */ - MPI_Init( nullptr, nullptr ); + MPI_Init(nullptr, nullptr); auto default_scheduler = std::make_shared(); auto mpi_request_pool = std::make_shared(); - hwloc_obj_t obj = hwloc_get_obj_by_type( redGrapes::SingletonContext::get().hwloc_ctx.topology, HWLOC_OBJ_PU, 1 ); - rg::memory::ChunkedBumpAlloc< rg::memory::HwlocAlloc > mpi_alloc( rg::memory::HwlocAlloc( redGrapes::SingletonContext::get().hwloc_ctx, obj ) ); - auto mpi_worker = std::make_shared( mpi_alloc, redGrapes::SingletonContext::get().hwloc_ctx, obj, 4 ); + hwloc_obj_t obj = hwloc_get_obj_by_type(redGrapes::SingletonContext::get().hwloc_ctx.topology, HWLOC_OBJ_PU, 1); + rg::memory::ChunkedBumpAlloc mpi_alloc( + rg::memory::HwlocAlloc(redGrapes::SingletonContext::get().hwloc_ctx, obj)); + auto mpi_worker = std::make_shared( + mpi_alloc, + redGrapes::SingletonContext::get().hwloc_ctx, + obj, + 4); // initialize main thread to execute tasks from the mpi-queue and poll - rg::SingletonContext::get().idle = - [mpi_worker, mpi_request_pool] - { - mpi_request_pool->poll(); - - redGrapes::Task * task; - - if( task = mpi_worker->ready_queue.pop() ) - redGrapes::SingletonContext::get().execute_task( *task ); - - while( mpi_worker->init_dependencies( task, true ) ) - if( task ) - { - redGrapes::SingletonContext::get().execute_task( *task ); - break; - } - }; - - rg::init(4, - rg::scheduler::make_tag_match_scheduler() - .add({}, default_scheduler) - .add({ SCHED_MPI }, mpi_worker)); - + rg::SingletonContext::get().idle = [mpi_worker, mpi_request_pool] + { + mpi_request_pool->poll(); + + redGrapes::Task* task; + + if(task = mpi_worker->ready_queue.pop()) + redGrapes::SingletonContext::get().execute_task(*task); + + while(mpi_worker->init_dependencies(task, true)) + if(task) + { + redGrapes::SingletonContext::get().execute_task(*task); + break; + } + }; + + rg::init(4, rg::scheduler::make_tag_match_scheduler().add({}, default_scheduler).add({SCHED_MPI}, mpi_worker)); + // initialize MPI config - rg::IOResource< MPIConfig > mpi_config; + rg::IOResource mpi_config; rg::emplace_task( - []( auto config ) { + [](auto config) + { MPI_Comm_rank(MPI_COMM_WORLD, &config->world_rank); MPI_Comm_size(MPI_COMM_WORLD, &config->world_size); }, - mpi_config.write() - ).scheduling_tags( std::bitset<64>().set(SCHED_MPI) ); + mpi_config.write()) + .scheduling_tags(std::bitset<64>().set(SCHED_MPI)); // main loop - rg::FieldResource< std::array > field[2] = { + rg::FieldResource> field[2] = { rg::FieldResource>(new std::array()), rg::FieldResource>(new std::array()), }; @@ -100,15 +100,14 @@ int main() // initialize rg::emplace_task( - []( auto buf, auto mpi_config ) + [](auto buf, auto mpi_config) { int offset = 3 * mpi_config->world_rank; - for( size_t i = 0; i < buf->size(); ++i ) + for(size_t i = 0; i < buf->size(); ++i) buf[{i}] = offset + i; }, field[current].write(), - mpi_config.read() - ); + mpi_config.read()); for(size_t i = 0; i < 1; ++i) { @@ -120,78 +119,66 @@ int main() // Send rg::emplace_task( - [i, current, mpi_request_pool]( auto field, auto mpi_config ) + [i, current, mpi_request_pool](auto field, auto mpi_config) { - int dst = ( mpi_config->world_rank + 1 ) % mpi_config->world_size; + int dst = (mpi_config->world_rank + 1) % mpi_config->world_size; MPI_Request request; - MPI_Isend( &field[{3}], sizeof(int), MPI_CHAR, dst, current, MPI_COMM_WORLD, &request ); + MPI_Isend(&field[{3}], sizeof(int), MPI_CHAR, dst, current, MPI_COMM_WORLD, &request); - mpi_request_pool->get_status( request ); + mpi_request_pool->get_status(request); }, field[current].at({3}).read(), mpi_config.read()) - .scheduling_tags({ SCHED_MPI }) + .scheduling_tags({SCHED_MPI}) .enable_stack_switching(); // Receive rg::emplace_task( - [i, current, mpi_request_pool]( auto field, auto mpi_config ) + [i, current, mpi_request_pool](auto field, auto mpi_config) { - int src = ( mpi_config->world_rank - 1 ) % mpi_config->world_size; + int src = (mpi_config->world_rank - 1) % mpi_config->world_size; MPI_Request request; - MPI_Irecv( &field[{0}], sizeof(int), MPI_CHAR, src, current, MPI_COMM_WORLD, &request ); + MPI_Irecv(&field[{0}], sizeof(int), MPI_CHAR, src, current, MPI_COMM_WORLD, &request); - MPI_Status status = mpi_request_pool->get_status( request ); + MPI_Status status = mpi_request_pool->get_status(request); int recv_data_count; - MPI_Get_count( &status, MPI_CHAR, &recv_data_count ); + MPI_Get_count(&status, MPI_CHAR, &recv_data_count); }, field[current].at({0}).write(), mpi_config.read()) - .scheduling_tags({ SCHED_MPI }) + .scheduling_tags({SCHED_MPI}) .enable_stack_switching(); /* * Compute iteration */ - for( size_t i = 1; i < field[current]->size(); ++i ) + for(size_t i = 1; i < field[current]->size(); ++i) rg::emplace_task( - [i]( auto dst, auto src ) - { - dst[{i}] = src[{i - 1}]; - }, + [i](auto dst, auto src) { dst[{i}] = src[{i - 1}]; }, field[next].at({i}).write(), - field[current].at({i-1}).read() - ); + field[current].at({i - 1}).read()); /* * Write Output */ rg::emplace_task( - [i]( auto buf, auto mpi_config ) - { + [i](auto buf, auto mpi_config) + { std::cout << "Step[" << i << "], rank[" << mpi_config->world_rank << "] :: "; - for( size_t i = 0; i < buf->size(); ++i ) + for(size_t i = 0; i < buf->size(); ++i) std::cout << buf[{i}] << "; "; std::cout << std::endl; }, field[current].read(), - mpi_config.read() - ); + mpi_config.read()); current = next; } - rg::emplace_task( - []( auto m ) - { - MPI_Finalize(); - }, - mpi_config.write() - ).scheduling_tags({ SCHED_MPI }); + rg::emplace_task([](auto m) { MPI_Finalize(); }, mpi_config.write()).scheduling_tags({SCHED_MPI}); rg::finalize(); } - diff --git a/redGrapes/dispatch/cuda/event_pool.hpp b/redGrapes/dispatch/cuda/event_pool.hpp index a246cde7..a8123117 100644 --- a/redGrapes/dispatch/cuda/event_pool.hpp +++ b/redGrapes/dispatch/cuda/event_pool.hpp @@ -7,70 +7,70 @@ #pragma once -#include #include +#include namespace redGrapes { -namespace dispatch -{ -namespace cuda -{ - -//! Manages the recycling of cuda events -struct EventPool -{ -public: - EventPool(EventPool const &) = delete; - void operator=(EventPool const &) = delete; - - EventPool() {} - - static EventPool & get() - { - static EventPool singleton; - return singleton; - } - - ~EventPool() + namespace dispatch { - std::lock_guard< std::mutex > lock( mutex ); - for( auto e : unused_cuda_events ) - cudaEventDestroy( e ); - } - - cudaEvent_t alloc() - { - std::lock_guard< std::mutex > lock( mutex ); - - cudaEvent_t e; - - if( unused_cuda_events.empty() ) - cudaEventCreate( &e ); - else + namespace cuda { - e = unused_cuda_events.back(); - unused_cuda_events.pop_back(); - } - - return e; - } - void free( cudaEvent_t event ) - { - std::lock_guard< std::mutex > lock( mutex ); - unused_cuda_events.push_back( event ); - } - -private: - std::mutex mutex; - std::vector< cudaEvent_t > unused_cuda_events; - -}; + //! Manages the recycling of cuda events + struct EventPool + { + public: + EventPool(EventPool const&) = delete; + void operator=(EventPool const&) = delete; + + EventPool() + { + } + + static EventPool& get() + { + static EventPool singleton; + return singleton; + } + + ~EventPool() + { + std::lock_guard lock(mutex); + for(auto e : unused_cuda_events) + cudaEventDestroy(e); + } + + cudaEvent_t alloc() + { + std::lock_guard lock(mutex); + + cudaEvent_t e; + + if(unused_cuda_events.empty()) + cudaEventCreate(&e); + else + { + e = unused_cuda_events.back(); + unused_cuda_events.pop_back(); + } + + return e; + } + + void free(cudaEvent_t event) + { + std::lock_guard lock(mutex); + unused_cuda_events.push_back(event); + } + + private: + std::mutex mutex; + std::vector unused_cuda_events; + }; + + } // namespace cuda + + } // namespace dispatch -} // namespace cuda - -} // namespace dispatch - } // namespace redGrapes - diff --git a/redGrapes/dispatch/cuda/scheduler.hpp b/redGrapes/dispatch/cuda/scheduler.hpp index d26a77b3..20d8f4a4 100644 --- a/redGrapes/dispatch/cuda/scheduler.hpp +++ b/redGrapes/dispatch/cuda/scheduler.hpp @@ -7,199 +7,188 @@ #pragma once -#include -#include -#include -#include -#include +#include +#include #include #include #include -#include -#include -#include #include +#include -namespace redGrapes -{ -namespace dispatch -{ -namespace cuda -{ - -thread_local cudaStream_t current_stream; +#include +#include +#include +#include +#include -// this class is not thread safe -template < - typename Task -> -struct CudaStreamDispatcher +namespace redGrapes { - cudaStream_t cuda_stream; - std::recursive_mutex mutex; - std::queue< - std::pair< - cudaEvent_t, - scheduler::EventPtr - > - > events; - - CudaStreamDispatcher() + namespace dispatch { - cudaStreamCreate( &cuda_stream ); - } - - CudaStreamDispatcher( CudaStreamDispatcher const & other ) - { - spdlog::warn("CudaStreamDispatcher copy constructor called!"); - } - - ~CudaStreamDispatcher() - { - cudaStreamDestroy( cuda_stream ); - } - - void poll() - { - std::lock_guard< std::recursive_mutex > lock( mutex ); - if( ! events.empty() ) + namespace cuda { - auto & cuda_event = events.front().first; - auto & event = events.front().second; - - if( cudaEventQuery( cuda_event ) == cudaSuccess ) - { - SPDLOG_TRACE("cuda event {} ready", cuda_event); - EventPool::get().free( cuda_event ); - event.notify(); - - events.pop(); - } - } - } - void dispatch_task( Task & task ) - { - std::lock_guard< std::recursive_mutex > lock( mutex ); - - for(auto predecessor : task.in_edges) - { - SPDLOG_TRACE("cudaDispatcher: consider predecessor \"{}\"", predecessor->label); + thread_local cudaStream_t current_stream; - if(auto cuda_event = predecessor->cuda_event) + // this class is not thread safe + template + struct CudaStreamDispatcher { - SPDLOG_TRACE("cudaDispatcher: task {} \"{}\" wait for {}", task.task_id, task.label, *cuda_event); - - cudaStreamWaitEvent( cuda_stream, *cuda_event, 0 ); - } - } - - SPDLOG_TRACE( - "CudaScheduler: start {}", - task_id - ); - - current_stream = cuda_stream; - - // run the code that calls the CUDA API and submits work to current_stream - task->run(); - - cudaEvent_t cuda_event = EventPool::get().alloc(); - cudaEventRecord( cuda_event, cuda_stream ); - task->cuda_event = cuda_event; - - task->get_pre_event().notify(); - - SPDLOG_TRACE( "CudaStreamDispatcher {}: recorded event {}", cuda_stream, cuda_event ); - events.push( std::make_pair( cuda_event, task->get_post_event() ) ); - } -}; - -struct CudaScheduler : redGrapes::scheduler::IScheduler -{ -private: - bool recording; - bool cuda_graph_enabled; - - std::recursive_mutex mutex; - unsigned int current_stream; - std::vector< CudaStreamDispatcher< Task > > streams; - - std::function< bool(Task const&) > is_cuda_task; - -public: - CudaScheduler( - std::function< bool(Task const&) > is_cuda_task, - size_t stream_count = 1, - bool cuda_graph_enabled = false - ) : - is_cuda_task( is_cuda_task ), - current_stream( 0 ), - cuda_graph_enabled( cuda_graph_enabled ) - { - // reserve to avoid copy constructor of CudaStreamDispatcher - streams.reserve( stream_count ); - - for( size_t i = 0; i < stream_count; ++i ) - streams.emplace_back(); - - SPDLOG_TRACE( "CudaScheduler: use {} streams", streams.size() ); - } - - //! submits the call to the cuda runtime - void activate_task( Task & task ) - { - unsigned int stream_id = current_stream; - current_stream = ( current_stream + 1 ) % streams.size(); - - SPDLOG_TRACE( "Dispatch Cuda task {} \"{}\" on stream {}", task.task_id, task.label, stream_id ); - streams[ stream_id ].dispatch_task( task ); - } - - //! checks if some cuda calls finished and notify the redGrapes manager - void poll() - { - for( size_t stream_id = 0; stream_id < streams.size(); ++stream_id ) - streams[ stream_id ].poll(); - } - - /*! whats the task dependency type for the edge a -> b (task a precedes task b) - * @return true if task b depends on the pre event of task a, false if task b depends on the post event of task b. - */ - bool task_dependency_type( Task const & a, Task const & b ) - { - assert( is_cuda_task( b ) ); - return is_cuda_task( a ); - } -}; - -} // namespace cuda - -} // namespace dispatch + cudaStream_t cuda_stream; + std::recursive_mutex mutex; + std::queue> events; + + CudaStreamDispatcher() + { + cudaStreamCreate(&cuda_stream); + } + + CudaStreamDispatcher(CudaStreamDispatcher const& other) + { + spdlog::warn("CudaStreamDispatcher copy constructor called!"); + } + + ~CudaStreamDispatcher() + { + cudaStreamDestroy(cuda_stream); + } + + void poll() + { + std::lock_guard lock(mutex); + if(!events.empty()) + { + auto& cuda_event = events.front().first; + auto& event = events.front().second; + + if(cudaEventQuery(cuda_event) == cudaSuccess) + { + SPDLOG_TRACE("cuda event {} ready", cuda_event); + EventPool::get().free(cuda_event); + event.notify(); + + events.pop(); + } + } + } + + void dispatch_task(Task& task) + { + std::lock_guard lock(mutex); + + for(auto predecessor : task.in_edges) + { + SPDLOG_TRACE("cudaDispatcher: consider predecessor \"{}\"", predecessor->label); + + if(auto cuda_event = predecessor->cuda_event) + { + SPDLOG_TRACE( + "cudaDispatcher: task {} \"{}\" wait for {}", + task.task_id, + task.label, + *cuda_event); + + cudaStreamWaitEvent(cuda_stream, *cuda_event, 0); + } + } + + SPDLOG_TRACE("CudaScheduler: start {}", task_id); + + current_stream = cuda_stream; + + // run the code that calls the CUDA API and submits work to current_stream + task->run(); + + cudaEvent_t cuda_event = EventPool::get().alloc(); + cudaEventRecord(cuda_event, cuda_stream); + task->cuda_event = cuda_event; + + task->get_pre_event().notify(); + + SPDLOG_TRACE("CudaStreamDispatcher {}: recorded event {}", cuda_stream, cuda_event); + events.push(std::make_pair(cuda_event, task->get_post_event())); + } + }; + + struct CudaScheduler : redGrapes::scheduler::IScheduler + { + private: + bool recording; + bool cuda_graph_enabled; + + std::recursive_mutex mutex; + unsigned int current_stream; + std::vector> streams; + + std::function is_cuda_task; + + public: + CudaScheduler( + std::function is_cuda_task, + size_t stream_count = 1, + bool cuda_graph_enabled = false) + : is_cuda_task(is_cuda_task) + , current_stream(0) + , cuda_graph_enabled(cuda_graph_enabled) + { + // reserve to avoid copy constructor of CudaStreamDispatcher + streams.reserve(stream_count); + + for(size_t i = 0; i < stream_count; ++i) + streams.emplace_back(); + + SPDLOG_TRACE("CudaScheduler: use {} streams", streams.size()); + } + + //! submits the call to the cuda runtime + void activate_task(Task& task) + { + unsigned int stream_id = current_stream; + current_stream = (current_stream + 1) % streams.size(); + + SPDLOG_TRACE("Dispatch Cuda task {} \"{}\" on stream {}", task.task_id, task.label, stream_id); + streams[stream_id].dispatch_task(task); + } + + //! checks if some cuda calls finished and notify the redGrapes manager + void poll() + { + for(size_t stream_id = 0; stream_id < streams.size(); ++stream_id) + streams[stream_id].poll(); + } + + /*! whats the task dependency type for the edge a -> b (task a precedes task b) + * @return true if task b depends on the pre event of task a, false if task b depends on the post event + * of task b. + */ + bool task_dependency_type(Task const& a, Task const& b) + { + assert(is_cuda_task(b)); + return is_cuda_task(a); + } + }; + + } // namespace cuda + + } // namespace dispatch } // namespace redGrapes - -template <> -struct fmt::formatter< redGrapes::dispatch::cuda::CudaTaskProperties > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::dispatch::cuda::CudaTaskProperties const & prop, - FormatContext & ctx - ) + template + auto format(redGrapes::dispatch::cuda::CudaTaskProperties const& prop, FormatContext& ctx) { - if( auto e = prop.cuda_event ) - return fmt::format_to( ctx.out(), "\"cuda_event\" : {}", *e ); + if(auto e = prop.cuda_event) + return fmt::format_to(ctx.out(), "\"cuda_event\" : {}", *e); else - return fmt::format_to( ctx.out(), "\"cuda_event\" : null"); + return fmt::format_to(ctx.out(), "\"cuda_event\" : null"); } }; - - diff --git a/redGrapes/dispatch/cuda/task_properties.hpp b/redGrapes/dispatch/cuda/task_properties.hpp index d399f6aa..e8532fc0 100644 --- a/redGrapes/dispatch/cuda/task_properties.hpp +++ b/redGrapes/dispatch/cuda/task_properties.hpp @@ -9,40 +9,43 @@ namespace redGrapes { -namespace dispatch -{ -namespace cuda -{ - -struct CudaTaskProperties -{ - std::optional< cudaEvent_t > cuda_event; - - CudaTaskProperties() {} - - template < typename PropertiesBuilder > - struct Builder + namespace dispatch { - PropertiesBuilder & builder; - - Builder( PropertiesBuilder & b ) - : builder(b) - {} - }; - - struct Patch - { - template - struct Builder + namespace cuda { - Builder( PatchBuilder & ) {} - }; - }; - - void apply_patch( Patch const & ) {}; -}; - -} -} -} + struct CudaTaskProperties + { + std::optional cuda_event; + + CudaTaskProperties() + { + } + + template + struct Builder + { + PropertiesBuilder& builder; + + Builder(PropertiesBuilder& b) : builder(b) + { + } + }; + + struct Patch + { + template + struct Builder + { + Builder(PatchBuilder&) + { + } + }; + }; + + void apply_patch(Patch const&){}; + }; + + } // namespace cuda + } // namespace dispatch +} // namespace redGrapes diff --git a/redGrapes/dispatch/cupla/event_pool.hpp b/redGrapes/dispatch/cupla/event_pool.hpp index 8ce96c8c..fda016db 100644 --- a/redGrapes/dispatch/cupla/event_pool.hpp +++ b/redGrapes/dispatch/cupla/event_pool.hpp @@ -7,70 +7,70 @@ #pragma once -#include #include +#include namespace redGrapes { -namespace dispatch -{ -namespace cupla -{ - -//! Manages the recycling of cuda events -struct EventPool -{ -public: - EventPool(EventPool const &) = delete; - void operator=(EventPool const &) = delete; - - EventPool() {} - - static EventPool & get() - { - static EventPool singleton; - return singleton; - } - - ~EventPool() + namespace dispatch { - std::lock_guard< std::mutex > lock( mutex ); - for( auto e : unused_cupla_events ) - cuplaEventDestroy( e ); - } - - cuplaEvent_t alloc() - { - std::lock_guard< std::mutex > lock( mutex ); - - cuplaEvent_t e; - - if( unused_cupla_events.empty() ) - cuplaEventCreate( &e ); - else + namespace cupla { - e = unused_cupla_events.back(); - unused_cupla_events.pop_back(); - } - - return e; - } - void free( cuplaEvent_t event ) - { - std::lock_guard< std::mutex > lock( mutex ); - unused_cupla_events.push_back( event ); - } - -private: - std::mutex mutex; - std::vector< cuplaEvent_t > unused_cupla_events; - -}; + //! Manages the recycling of cuda events + struct EventPool + { + public: + EventPool(EventPool const&) = delete; + void operator=(EventPool const&) = delete; + + EventPool() + { + } + + static EventPool& get() + { + static EventPool singleton; + return singleton; + } + + ~EventPool() + { + std::lock_guard lock(mutex); + for(auto e : unused_cupla_events) + cuplaEventDestroy(e); + } + + cuplaEvent_t alloc() + { + std::lock_guard lock(mutex); + + cuplaEvent_t e; + + if(unused_cupla_events.empty()) + cuplaEventCreate(&e); + else + { + e = unused_cupla_events.back(); + unused_cupla_events.pop_back(); + } + + return e; + } + + void free(cuplaEvent_t event) + { + std::lock_guard lock(mutex); + unused_cupla_events.push_back(event); + } + + private: + std::mutex mutex; + std::vector unused_cupla_events; + }; + + } // namespace cupla + + } // namespace dispatch -} // namespace cupla - -} // namespace dispatch - } // namespace redGrapes - diff --git a/redGrapes/dispatch/cupla/scheduler.hpp b/redGrapes/dispatch/cupla/scheduler.hpp index 774c4e87..7491567c 100644 --- a/redGrapes/dispatch/cupla/scheduler.hpp +++ b/redGrapes/dispatch/cupla/scheduler.hpp @@ -7,199 +7,188 @@ #pragma once -#include -#include -#include -#include -#include +#include +#include #include #include #include -#include -#include -#include #include +#include -namespace redGrapes -{ -namespace dispatch -{ -namespace cupla -{ - -thread_local cuplaStream_t current_stream; +#include +#include +#include +#include +#include -// this class is not thread safe -template < - typename Task -> -struct CuplaStreamDispatcher +namespace redGrapes { - cuplaStream_t cupla_stream; - std::recursive_mutex mutex; - std::queue< - std::pair< - cuplaEvent_t, - scheduler::EventPtr - > - > events; - - CuplaStreamDispatcher() + namespace dispatch { - cuplaStreamCreate( &cupla_stream ); - } - - CuplaStreamDispatcher( CuplaStreamDispatcher const & other ) - { - spdlog::warn("CuplaStreamDispatcher copy constructor called!"); - } - - ~CuplaStreamDispatcher() - { - cuplaStreamDestroy( cupla_stream ); - } - - void poll() - { - std::lock_guard< std::recursive_mutex > lock( mutex ); - if( ! events.empty() ) + namespace cupla { - auto & cupla_event = events.front().first; - auto & event = events.front().second; - - if( cuplaEventQuery( cupla_event ) == cuplaSuccess ) - { - SPDLOG_TRACE("cupla event {} ready", cupla_event); - EventPool::get().free( cupla_event ); - event.notify(); - - events.pop(); - } - } - } - void dispatch_task( Task & task ) - { - std::lock_guard< std::recursive_mutex > lock( mutex ); - - for(auto predecessor : task.in_edges) - { - SPDLOG_TRACE("cuplaDispatcher: consider predecessor \"{}\"", predecessor->label); + thread_local cuplaStream_t current_stream; - if(auto cupla_event = predecessor->cupla_event) + // this class is not thread safe + template + struct CuplaStreamDispatcher { - SPDLOG_TRACE("cuplaDispatcher: task {} \"{}\" wait for {}", task.task_id, task.label, *cupla_event); - - cuplaStreamWaitEvent( cupla_stream, *cupla_event, 0 ); - } - } - - SPDLOG_TRACE( - "CuplaScheduler: start {}", - task.task_id - ); - - current_stream = cupla_stream; - - // run the code that calls the CUDA API and submits work to current_stream - task->run(); - - cuplaEvent_t cupla_event = EventPool::get().alloc(); - cuplaEventRecord( cupla_event, cupla_stream ); - task->cupla_event = cupla_event; - - task->get_pre_event().notify(); - - SPDLOG_TRACE( "CuplaStreamDispatcher {}: recorded event {}", cupla_stream, cupla_event ); - events.push( std::make_pair( cupla_event, task->get_post_event() ) ); - } -}; - -struct CuplaScheduler : redGrapes::scheduler::IScheduler -{ -private: - bool recording; - bool cupla_graph_enabled; - - std::recursive_mutex mutex; - unsigned int current_stream; - std::vector< CuplaStreamDispatcher< Task > > streams; - - std::function< bool(Task const&) > is_cupla_task; - -public: - CuplaScheduler( - std::function< bool(Task const&) > is_cupla_task, - size_t stream_count = 1, - bool cupla_graph_enabled = false - ) : - is_cupla_task( is_cupla_task ), - current_stream( 0 ), - cupla_graph_enabled( cupla_graph_enabled ) - { - // reserve to avoid copy constructor of CuplaStreamDispatcher - streams.reserve( stream_count ); - - for( size_t i = 0; i < stream_count; ++i ) - streams.emplace_back(); - - SPDLOG_TRACE( "CuplaScheduler: use {} streams", streams.size() ); - } - - //! submits the call to the cupla runtime - void activate_task( Task & task ) - { - unsigned int stream_id = current_stream; - current_stream = ( current_stream + 1 ) % streams.size(); - - SPDLOG_TRACE( "Dispatch Cupla task {} \"{}\" on stream {}", task.task_id, task.label, stream_id ); - streams[ stream_id ].dispatch_task( task ); - } - - //! checks if some cupla calls finished and notify the redGrapes manager - void poll() - { - for( size_t stream_id = 0; stream_id < streams.size(); ++stream_id ) - streams[ stream_id ].poll(); - } - - /*! whats the task dependency type for the edge a -> b (task a precedes task b) - * @return true if task b depends on the pre event of task a, false if task b depends on the post event of task b. - */ - bool task_dependency_type( Task const & a, Task const & b ) - { - assert( is_cupla_task( b ) ); - return is_cupla_task( a ); - } -}; - -} // namespace cupla - -} // namespace dispatch + cuplaStream_t cupla_stream; + std::recursive_mutex mutex; + std::queue> events; + + CuplaStreamDispatcher() + { + cuplaStreamCreate(&cupla_stream); + } + + CuplaStreamDispatcher(CuplaStreamDispatcher const& other) + { + spdlog::warn("CuplaStreamDispatcher copy constructor called!"); + } + + ~CuplaStreamDispatcher() + { + cuplaStreamDestroy(cupla_stream); + } + + void poll() + { + std::lock_guard lock(mutex); + if(!events.empty()) + { + auto& cupla_event = events.front().first; + auto& event = events.front().second; + + if(cuplaEventQuery(cupla_event) == cuplaSuccess) + { + SPDLOG_TRACE("cupla event {} ready", cupla_event); + EventPool::get().free(cupla_event); + event.notify(); + + events.pop(); + } + } + } + + void dispatch_task(Task& task) + { + std::lock_guard lock(mutex); + + for(auto predecessor : task.in_edges) + { + SPDLOG_TRACE("cuplaDispatcher: consider predecessor \"{}\"", predecessor->label); + + if(auto cupla_event = predecessor->cupla_event) + { + SPDLOG_TRACE( + "cuplaDispatcher: task {} \"{}\" wait for {}", + task.task_id, + task.label, + *cupla_event); + + cuplaStreamWaitEvent(cupla_stream, *cupla_event, 0); + } + } + + SPDLOG_TRACE("CuplaScheduler: start {}", task.task_id); + + current_stream = cupla_stream; + + // run the code that calls the CUDA API and submits work to current_stream + task->run(); + + cuplaEvent_t cupla_event = EventPool::get().alloc(); + cuplaEventRecord(cupla_event, cupla_stream); + task->cupla_event = cupla_event; + + task->get_pre_event().notify(); + + SPDLOG_TRACE("CuplaStreamDispatcher {}: recorded event {}", cupla_stream, cupla_event); + events.push(std::make_pair(cupla_event, task->get_post_event())); + } + }; + + struct CuplaScheduler : redGrapes::scheduler::IScheduler + { + private: + bool recording; + bool cupla_graph_enabled; + + std::recursive_mutex mutex; + unsigned int current_stream; + std::vector> streams; + + std::function is_cupla_task; + + public: + CuplaScheduler( + std::function is_cupla_task, + size_t stream_count = 1, + bool cupla_graph_enabled = false) + : is_cupla_task(is_cupla_task) + , current_stream(0) + , cupla_graph_enabled(cupla_graph_enabled) + { + // reserve to avoid copy constructor of CuplaStreamDispatcher + streams.reserve(stream_count); + + for(size_t i = 0; i < stream_count; ++i) + streams.emplace_back(); + + SPDLOG_TRACE("CuplaScheduler: use {} streams", streams.size()); + } + + //! submits the call to the cupla runtime + void activate_task(Task& task) + { + unsigned int stream_id = current_stream; + current_stream = (current_stream + 1) % streams.size(); + + SPDLOG_TRACE("Dispatch Cupla task {} \"{}\" on stream {}", task.task_id, task.label, stream_id); + streams[stream_id].dispatch_task(task); + } + + //! checks if some cupla calls finished and notify the redGrapes manager + void poll() + { + for(size_t stream_id = 0; stream_id < streams.size(); ++stream_id) + streams[stream_id].poll(); + } + + /*! whats the task dependency type for the edge a -> b (task a precedes task b) + * @return true if task b depends on the pre event of task a, false if task b depends on the post event + * of task b. + */ + bool task_dependency_type(Task const& a, Task const& b) + { + assert(is_cupla_task(b)); + return is_cupla_task(a); + } + }; + + } // namespace cupla + + } // namespace dispatch } // namespace redGrapes - -template <> -struct fmt::formatter< redGrapes::dispatch::cupla::CuplaTaskProperties > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::dispatch::cupla::CuplaTaskProperties const & prop, - FormatContext & ctx - ) + template + auto format(redGrapes::dispatch::cupla::CuplaTaskProperties const& prop, FormatContext& ctx) { - if( auto e = prop.cupla_event ) - return fmt::format_to( ctx.out(), "\"cupla_event\" : {}", *e ); + if(auto e = prop.cupla_event) + return fmt::format_to(ctx.out(), "\"cupla_event\" : {}", *e); else - return fmt::format_to( ctx.out(), "\"cupla_event\" : null"); + return fmt::format_to(ctx.out(), "\"cupla_event\" : null"); } }; - - diff --git a/redGrapes/dispatch/cupla/task_properties.hpp b/redGrapes/dispatch/cupla/task_properties.hpp index 82dcf6cc..48ce89f4 100644 --- a/redGrapes/dispatch/cupla/task_properties.hpp +++ b/redGrapes/dispatch/cupla/task_properties.hpp @@ -9,40 +9,43 @@ namespace redGrapes { -namespace dispatch -{ -namespace cupla -{ - -struct CuplaTaskProperties -{ - std::optional< cuplaEvent_t > cupla_event; - - CuplaTaskProperties() {} - - template < typename PropertiesBuilder > - struct Builder + namespace dispatch { - PropertiesBuilder & builder; - - Builder( PropertiesBuilder & b ) - : builder(b) - {} - }; - - struct Patch - { - template - struct Builder + namespace cupla { - Builder( PatchBuilder & ) {} - }; - }; - - void apply_patch( Patch const & ) {}; -}; - -} -} -} + struct CuplaTaskProperties + { + std::optional cupla_event; + + CuplaTaskProperties() + { + } + + template + struct Builder + { + PropertiesBuilder& builder; + + Builder(PropertiesBuilder& b) : builder(b) + { + } + }; + + struct Patch + { + template + struct Builder + { + Builder(PatchBuilder&) + { + } + }; + }; + + void apply_patch(Patch const&){}; + }; + + } // namespace cupla + } // namespace dispatch +} // namespace redGrapes diff --git a/redGrapes/dispatch/dispatcher.hpp b/redGrapes/dispatch/dispatcher.hpp index b07061bf..5896a741 100644 --- a/redGrapes/dispatch/dispatcher.hpp +++ b/redGrapes/dispatch/dispatcher.hpp @@ -7,4 +7,3 @@ struct IDispatcher virtual void notify() = 0; }; */ - diff --git a/redGrapes/dispatch/mpi/request_pool.hpp b/redGrapes/dispatch/mpi/request_pool.hpp index bfae6895..390ed7bc 100644 --- a/redGrapes/dispatch/mpi/request_pool.hpp +++ b/redGrapes/dispatch/mpi/request_pool.hpp @@ -7,102 +7,101 @@ #pragma once +#include + #include -#include + #include #include - -#include +#include namespace redGrapes { -namespace dispatch -{ -namespace mpi -{ - -struct RequestPool -{ - std::mutex mutex; - - std::vector< MPI_Request > requests; - std::vector< scheduler::EventPtr > events; - std::vector< std::shared_ptr< MPI_Status > > statuses; + void yield(scheduler::EventPtr event); + std::optional create_event(); - RequestPool( ) - {} - - /*! - * Tests all currently active MPI requests - * and notifies the corresponding events if the requests finished - */ - void poll() + namespace dispatch { - std::lock_guard< std::mutex > lock( mutex ); - - if( ! requests.empty() ) + namespace mpi { - int outcount; - std::vector< int > indices( requests.size() ); - std::vector< MPI_Status > out_statuses( requests.size() ); - - MPI_Testsome( - requests.size(), - requests.data(), - &outcount, - indices.data(), - out_statuses.data()); - - for( int i = 0; i < outcount; ++i ) - { - int idx = indices[ i ]; - - // write status - *(this->statuses[ idx ]) = out_statuses[ i ]; - - // finish task waiting for request - events[ idx ].notify(); - - requests.erase( requests.begin() + idx ); - statuses.erase( statuses.begin() + idx ); - events.erase( events.begin() + idx ); - - for( int j = i; j < outcount; ++j ) - if( indices[ j ] > idx ) - indices[ j ] --; - } - } - } - - /*! - * Adds a new MPI request to the pool and - * yields until the request is done. While waiting - * for this request, other tasks will be executed. - * - * @param request The MPI request to wait for - * @return the resulting MPI status of the request - */ - MPI_Status get_status( MPI_Request request ) - { - auto status = memory::alloc_shared< MPI_Status >(); - auto event = *create_event(); - - //SPDLOG_TRACE("MPI RequestPool: status event = {}", (void*)event.get()); - { - std::lock_guard lock( mutex ); - requests.push_back( request ); - events.push_back( event ); - statuses.push_back( status ); - } - - yield( event ); - - return *status; - } -}; - -} // namespace mpi -} // namespace dispatch + struct RequestPool + { + std::mutex mutex; + + std::vector requests; + std::vector events; + std::vector> statuses; + + RequestPool() + { + } + + /*! + * Tests all currently active MPI requests + * and notifies the corresponding events if the requests finished + */ + void poll() + { + std::lock_guard lock(mutex); + + if(!requests.empty()) + { + int outcount; + std::vector indices(requests.size()); + std::vector out_statuses(requests.size()); + + MPI_Testsome(requests.size(), requests.data(), &outcount, indices.data(), out_statuses.data()); + + for(int i = 0; i < outcount; ++i) + { + int idx = indices[i]; + + // write status + *(this->statuses[idx]) = out_statuses[i]; + + // finish task waiting for request + events[idx].notify(); + + requests.erase(requests.begin() + idx); + statuses.erase(statuses.begin() + idx); + events.erase(events.begin() + idx); + + for(int j = i; j < outcount; ++j) + if(indices[j] > idx) + indices[j]--; + } + } + } + + /*! + * Adds a new MPI request to the pool and + * yields until the request is done. While waiting + * for this request, other tasks will be executed. + * + * @param request The MPI request to wait for + * @return the resulting MPI status of the request + */ + MPI_Status get_status(MPI_Request request) + { + auto status = memory::alloc_shared(); + auto event = *create_event(); + + // SPDLOG_TRACE("MPI RequestPool: status event = {}", (void*)event.get()); + + { + std::lock_guard lock(mutex); + requests.push_back(request); + events.push_back(event); + statuses.push_back(status); + } + + yield(event); + + return *status; + } + }; + + } // namespace mpi + } // namespace dispatch } // namespace redGrapes - diff --git a/redGrapes/dispatch/thread/cpuset.cpp b/redGrapes/dispatch/thread/cpuset.cpp index fec09f74..a3d86116 100644 --- a/redGrapes/dispatch/thread/cpuset.cpp +++ b/redGrapes/dispatch/thread/cpuset.cpp @@ -4,40 +4,40 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include #include +#include + namespace redGrapes { -namespace dispatch -{ -namespace thread -{ + namespace dispatch + { + namespace thread + { -void pin_cpu( unsigned cpuidx ) -{ - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(cpuidx % CPU_SETSIZE, &cpuset); + void pin_cpu(unsigned cpuidx) + { + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpuidx % CPU_SETSIZE, &cpuset); - int rc = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); - if( rc != 0 ) - spdlog::error("cannot set thread affinity ({})", rc); -} + int rc = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + if(rc != 0) + spdlog::error("cannot set thread affinity ({})", rc); + } -void unpin_cpu() -{ - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - for(int j=0; j < 64; ++j) - CPU_SET(j, &cpuset); + void unpin_cpu() + { + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + for(int j = 0; j < 64; ++j) + CPU_SET(j, &cpuset); - int rc = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); - if( rc != 0 ) - spdlog::error("cannot set thread affinity ({})", rc); -} + int rc = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + if(rc != 0) + spdlog::error("cannot set thread affinity ({})", rc); + } -} // namespace thread -} // namespace dispatch + } // namespace thread + } // namespace dispatch } // namespace redGrapes - diff --git a/redGrapes/dispatch/thread/cpuset.hpp b/redGrapes/dispatch/thread/cpuset.hpp index 658fce80..29714f98 100644 --- a/redGrapes/dispatch/thread/cpuset.hpp +++ b/redGrapes/dispatch/thread/cpuset.hpp @@ -9,15 +9,14 @@ namespace redGrapes { -namespace dispatch -{ -namespace thread -{ - -void pin_cpu( unsigned ); -void unpin_cpu(); + namespace dispatch + { + namespace thread + { -} -} -} + void pin_cpu(unsigned); + void unpin_cpu(); + } // namespace thread + } // namespace dispatch +} // namespace redGrapes diff --git a/redGrapes/dispatch/thread/execute.cpp b/redGrapes/dispatch/thread/execute.cpp index 536e2548..2e2dc254 100644 --- a/redGrapes/dispatch/thread/execute.cpp +++ b/redGrapes/dispatch/thread/execute.cpp @@ -5,15 +5,16 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include - -#include +#include #include +#include #include #include -#include + +#include +#include + +#include namespace redGrapes { @@ -23,33 +24,32 @@ namespace dispatch namespace thread {*/ -void Context::execute_task( Task & task ) -{ - TRACE_EVENT("Worker", "dispatch task"); + void Context::execute_task(Task& task) + { + TRACE_EVENT("Worker", "dispatch task"); - SPDLOG_DEBUG("thread dispatch: execute task {}", task.task_id); - assert( task.is_ready() ); + SPDLOG_DEBUG("thread dispatch: execute task {}", task.task_id); + assert(task.is_ready()); - task.get_pre_event().notify(); - current_task = &task; + task.get_pre_event().notify(); + current_task = &task; - auto event = task(); - - if( event ) - { - event->get_event().waker_id = current_worker->get_waker_id(); - task.sg_pause( *event ); + auto event = task(); - task.pre_event.up(); - task.get_pre_event().notify(); - } - else - task.get_post_event().notify(); + if(event) + { + event->get_event().waker_id = current_worker->get_waker_id(); + task.sg_pause(*event); - current_task = nullptr; -} + task.pre_event.up(); + task.get_pre_event().notify(); + } + else + task.get_post_event().notify(); -//} // namespace thread -//} // namespace dispatch -} // namespace redGrapes + current_task = nullptr; + } + //} // namespace thread + //} // namespace dispatch +} // namespace redGrapes diff --git a/redGrapes/dispatch/thread/worker.cpp b/redGrapes/dispatch/thread/worker.cpp index 49ddb4aa..e25c5d20 100644 --- a/redGrapes/dispatch/thread/worker.cpp +++ b/redGrapes/dispatch/thread/worker.cpp @@ -5,209 +5,224 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include -#include -#include #include #include +#include +#include #include +#include +#include -namespace redGrapes -{ -namespace dispatch -{ -namespace thread -{ -WorkerThread::WorkerThread( memory::ChunkedBumpAlloc< memory::HwlocAlloc > & alloc, HwlocContext & hwloc_ctx, hwloc_obj_t const & obj, WorkerId worker_id ) - : Worker( alloc, hwloc_ctx, obj, worker_id ) -{ -} - -WorkerThread::~WorkerThread() -{ -} - -void WorkerThread::start() -{ - thread = std::thread([this]{ this->run(); }); -} - -Worker::Worker( memory::ChunkedBumpAlloc & alloc, HwlocContext & hwloc_ctx, hwloc_obj_t const & obj, WorkerId worker_id ) - : alloc( alloc ) - , hwloc_ctx( hwloc_ctx ) - , id( worker_id ) -{ -} - -Worker::~Worker() -{ -} - -void Worker::stop() -{ - SPDLOG_TRACE("Worker::stop()"); - m_stop.store(true, std::memory_order_release); - wake(); -} +#include -void WorkerThread::stop() -{ - Worker::stop(); - thread.join(); -} +#include -void WorkerThread::run() +namespace redGrapes { - /* setup membind- & cpubind policies using hwloc - */ - this->cpubind(); - this->membind(); - - /* since we are in a worker, there should always - * be a task running (we always have a parent task - * and therefore yield() guarantees to do - * a context-switch instead of idling - */ + namespace dispatch + { + namespace thread + { + WorkerThread::WorkerThread( + memory::ChunkedBumpAlloc& alloc, + HwlocContext& hwloc_ctx, + hwloc_obj_t const& obj, + WorkerId worker_id) + : Worker(alloc, hwloc_ctx, obj, worker_id) + { + } + + WorkerThread::~WorkerThread() + { + } + + void WorkerThread::start() + { + thread = std::thread([this] { this->run(); }); + } + + Worker::Worker( + memory::ChunkedBumpAlloc& alloc, + HwlocContext& hwloc_ctx, + hwloc_obj_t const& obj, + WorkerId worker_id) + : alloc(alloc) + , hwloc_ctx(hwloc_ctx) + , id(worker_id) + { + } + + Worker::~Worker() + { + } + + void Worker::stop() + { + SPDLOG_TRACE("Worker::stop()"); + m_stop.store(true, std::memory_order_release); + wake(); + } + + void WorkerThread::stop() + { + Worker::stop(); + thread.join(); + } + + void WorkerThread::run() + { + /* setup membind- & cpubind policies using hwloc + */ + this->cpubind(); + this->membind(); + + /* since we are in a worker, there should always + * be a task running (we always have a parent task + * and therefore yield() guarantees to do + * a context-switch instead of idling + */ /* idle = [this] { throw std::runtime_error("idle in worker thread!"); }; */ - /* initialize thread-local variables - */ - SingletonContext::get().current_worker = this->shared_from_this(); - SingletonContext::get().current_waker_id = this->get_waker_id(); - SingletonContext::get().current_arena = this->get_worker_id(); - - /* execute tasks until stop() - */ - this->work_loop(); - - SingletonContext::get().current_worker.reset(); - - SPDLOG_TRACE("Worker Finished!"); -} - -void WorkerThread::cpubind() -{ - size_t n_pus = hwloc_get_nbobjs_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU); - hwloc_obj_t obj = hwloc_get_obj_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU, id%n_pus); - - if( hwloc_set_cpubind(hwloc_ctx.topology, obj->cpuset, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT) ) - { - char *str; - int error = errno; - hwloc_bitmap_asprintf(&str, obj->cpuset); - spdlog::warn("Couldn't cpubind to cpuset {}: {}\n", str, strerror(error)); - free(str); - } -} - -void WorkerThread::membind() -{ - size_t n_pus = hwloc_get_nbobjs_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU); - hwloc_obj_t obj = hwloc_get_obj_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU, id%n_pus); - if( hwloc_set_membind(hwloc_ctx.topology, obj->cpuset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_THREAD | HWLOC_MEMBIND_STRICT ) ) - { - char *str; - int error = errno; - hwloc_bitmap_asprintf(&str, obj->cpuset); - spdlog::warn("Couldn't membind to cpuset {}: {}\n", str, strerror(error)); - free(str); - } -} - -void Worker::work_loop() -{ - SPDLOG_TRACE("Worker {} start work_loop()", id); - while( ! m_stop.load(std::memory_order_consume) ) - { - SingletonContext::get().worker_pool->set_worker_state( id, dispatch::thread::WorkerState::AVAILABLE ); - cv.wait(); - - while( Task * task = this->gather_task() ) - { - SingletonContext::get().worker_pool->set_worker_state( id, dispatch::thread::WorkerState::BUSY ); - SingletonContext::get().execute_task( *task ); - } - - } - SPDLOG_TRACE("Worker {} end work_loop()", id); -} + /* initialize thread-local variables + */ + SingletonContext::get().current_worker = this->shared_from_this(); + SingletonContext::get().current_waker_id = this->get_waker_id(); + SingletonContext::get().current_arena = this->get_worker_id(); + + /* execute tasks until stop() + */ + this->work_loop(); + + SingletonContext::get().current_worker.reset(); + + SPDLOG_TRACE("Worker Finished!"); + } + + void WorkerThread::cpubind() + { + size_t n_pus = hwloc_get_nbobjs_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU); + hwloc_obj_t obj = hwloc_get_obj_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU, id % n_pus); + + if(hwloc_set_cpubind(hwloc_ctx.topology, obj->cpuset, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT)) + { + char* str; + int error = errno; + hwloc_bitmap_asprintf(&str, obj->cpuset); + spdlog::warn("Couldn't cpubind to cpuset {}: {}\n", str, strerror(error)); + free(str); + } + } + + void WorkerThread::membind() + { + size_t n_pus = hwloc_get_nbobjs_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU); + hwloc_obj_t obj = hwloc_get_obj_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU, id % n_pus); + if(hwloc_set_membind( + hwloc_ctx.topology, + obj->cpuset, + HWLOC_MEMBIND_BIND, + HWLOC_MEMBIND_THREAD | HWLOC_MEMBIND_STRICT)) + { + char* str; + int error = errno; + hwloc_bitmap_asprintf(&str, obj->cpuset); + spdlog::warn("Couldn't membind to cpuset {}: {}\n", str, strerror(error)); + free(str); + } + } + + void Worker::work_loop() + { + SPDLOG_TRACE("Worker {} start work_loop()", id); + while(!m_stop.load(std::memory_order_consume)) + { + SingletonContext::get().worker_pool->set_worker_state( + id, + dispatch::thread::WorkerState::AVAILABLE); + cv.wait(); + + while(Task* task = this->gather_task()) + { + SingletonContext::get().worker_pool->set_worker_state(id, dispatch::thread::WorkerState::BUSY); + SingletonContext::get().execute_task(*task); + } + } + SPDLOG_TRACE("Worker {} end work_loop()", id); + } + + Task* Worker::gather_task() + { + TRACE_EVENT("Worker", "gather_task()"); + Task* task = nullptr; + + /* STAGE 1: + * + * first, execute all tasks in the ready queue + */ + SPDLOG_TRACE("Worker {}: consume ready queue", id); + if(task = ready_queue.pop()) + return task; + + /* STAGE 2: + * + * after the ready queue is fully consumed, + * try initializing new tasks until one + * of them is found to be ready + */ + SPDLOG_TRACE("Worker {}: try init new tasks", id); + while(this->init_dependencies(task, true)) + if(task) + return task; + + /* set worker state to signal that we are requesting tasks + */ + SingletonContext::get().worker_pool->set_worker_state(id, dispatch::thread::WorkerState::AVAILABLE); -Task * Worker::gather_task() -{ - TRACE_EVENT("Worker", "gather_task()"); - Task * task = nullptr; - - /* STAGE 1: - * - * first, execute all tasks in the ready queue - */ - SPDLOG_TRACE("Worker {}: consume ready queue", id); - if( task = ready_queue.pop() ) - return task; - - /* STAGE 2: - * - * after the ready queue is fully consumed, - * try initializing new tasks until one - * of them is found to be ready - */ - SPDLOG_TRACE("Worker {}: try init new tasks", id); - while( this->init_dependencies( task, true ) ) - if( task ) - return task; - - /* set worker state to signal that we are requesting tasks - */ - SingletonContext::get().worker_pool->set_worker_state( id, dispatch::thread::WorkerState::AVAILABLE ); - #ifndef ENABLE_WORKSTEALING -#define ENABLE_WORKSTEALING 1 +# define ENABLE_WORKSTEALING 1 #endif - + #if ENABLE_WORKSTEALING - /* STAGE 3: - * - * after all tasks from own queues are consumed, try to steal tasks - */ - SPDLOG_TRACE("Worker {}: try to steal tasks", id); - task = SingletonContext::get().scheduler->steal_task( *this ); + /* STAGE 3: + * + * after all tasks from own queues are consumed, try to steal tasks + */ + SPDLOG_TRACE("Worker {}: try to steal tasks", id); + task = SingletonContext::get().scheduler->steal_task(*this); #endif - return task; -} - -bool Worker::init_dependencies( Task* & t, bool claimed ) -{ - TRACE_EVENT("Worker", "init_dependencies()"); - if(Task * task = emplacement_queue.pop()) - { - SPDLOG_DEBUG("init task {}", task->task_id); - - task->pre_event.up(); - task->init_graph(); - - if( task->get_pre_event().notify( claimed ) ) - t = task; - else - { - t = nullptr; - } - - return true; - } - else - return false; -} - -} // namespace thread -} // namespace dispatch + return task; + } + + bool Worker::init_dependencies(Task*& t, bool claimed) + { + TRACE_EVENT("Worker", "init_dependencies()"); + if(Task* task = emplacement_queue.pop()) + { + SPDLOG_DEBUG("init task {}", task->task_id); + + task->pre_event.up(); + task->init_graph(); + + if(task->get_pre_event().notify(claimed)) + t = task; + else + { + t = nullptr; + } + + return true; + } + else + return false; + } + + } // namespace thread + } // namespace dispatch } // namespace redGrapes - diff --git a/redGrapes/dispatch/thread/worker.hpp b/redGrapes/dispatch/thread/worker.hpp index 650d07a9..09137df2 100644 --- a/redGrapes/dispatch/thread/worker.hpp +++ b/redGrapes/dispatch/thread/worker.hpp @@ -7,130 +7,147 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include +#include #include -#include -#include - -namespace redGrapes -{ - -namespace dispatch -{ -namespace thread -{ +#include +#include -struct WorkerThread; +#include +#include +#include +#include +#include -/*! - * Creates a thread which repeatedly calls consume() - * until stop() is invoked or the object destroyed. - * - * Sleeps when no jobs are available. - */ -struct Worker - : redGrapes::scheduler::IScheduler +namespace redGrapes { - //private: - WorkerId id; - - /*! if true, the thread shall stop - * instead of waiting when it is out of jobs - */ - std::atomic_bool m_stop{ false }; - - - std::atomic task_count{ 0 }; - - //! condition variable for waiting if queue is empty - CondVar cv; - - static constexpr size_t queue_capacity = 128; - -public: - memory::ChunkedBumpAlloc< memory::HwlocAlloc > & alloc; - HwlocContext & hwloc_ctx; - - task::Queue emplacement_queue{ queue_capacity }; - task::Queue ready_queue{ queue_capacity }; - Worker( memory::ChunkedBumpAlloc< memory::HwlocAlloc > & alloc, HwlocContext & hwloc_ctx, hwloc_obj_t const & obj, WorkerId id ); - virtual ~Worker(); - - inline WorkerId get_worker_id() { return id; } - inline scheduler::WakerId get_waker_id() { return id + 1; } - inline bool wake() { return cv.notify(); } - - virtual void stop(); - - /* adds a new task to the emplacement queue - * and wakes up thread to kickstart execution - */ - inline void emplace_task( Task & task ) + namespace dispatch { - emplacement_queue.push( &task ); - wake(); - } - - inline void activate_task( Task & task ) - { - ready_queue.push( &task ); - wake(); - } - - //private: - - /* repeatedly try to find and execute tasks - * until stop-flag is triggered by stop() - */ - void work_loop(); - - /* find a task that shall be executed next - */ - Task * gather_task(); - - /*! take a task from the emplacement queue and initialize it, - * @param t is set to the task if the new task is ready, - * @param t is set to nullptr if the new task is blocked. - * @param claimed if set, the new task will not be actiated, - * if it is false, activate_task will be called by notify_event - * - * @return false if queue is empty - */ - bool init_dependencies( Task* & t, bool claimed = true ); -}; - -struct WorkerThread - : Worker - , std::enable_shared_from_this -{ - std::thread thread; - - WorkerThread( memory::ChunkedBumpAlloc & alloc, HwlocContext & hwloc_ctx, hwloc_obj_t const & obj, WorkerId worker_id ); - ~WorkerThread(); - - void start(); - void stop(); - - /* function the thread will execute - */ - void run(); - - void cpubind(); - void membind(); -}; - -} // namespace thread -} // namespace dispatch + namespace thread + { + + struct WorkerThread; + + /*! + * Creates a thread which repeatedly calls consume() + * until stop() is invoked or the object destroyed. + * + * Sleeps when no jobs are available. + */ + struct Worker : redGrapes::scheduler::IScheduler + { + // private: + WorkerId id; + + /*! if true, the thread shall stop + * instead of waiting when it is out of jobs + */ + std::atomic_bool m_stop{false}; + + + std::atomic task_count{0}; + + //! condition variable for waiting if queue is empty + CondVar cv; + + static constexpr size_t queue_capacity = 128; + + public: + memory::ChunkedBumpAlloc& alloc; + HwlocContext& hwloc_ctx; + + task::Queue emplacement_queue{queue_capacity}; + task::Queue ready_queue{queue_capacity}; + + Worker( + memory::ChunkedBumpAlloc& alloc, + HwlocContext& hwloc_ctx, + hwloc_obj_t const& obj, + WorkerId id); + virtual ~Worker(); + + inline WorkerId get_worker_id() + { + return id; + } + + inline scheduler::WakerId get_waker_id() + { + return id + 1; + } + + inline bool wake() + { + return cv.notify(); + } + + virtual void stop(); + + /* adds a new task to the emplacement queue + * and wakes up thread to kickstart execution + */ + inline void emplace_task(Task& task) + { + emplacement_queue.push(&task); + wake(); + } + + inline void activate_task(Task& task) + { + ready_queue.push(&task); + wake(); + } + + // private: + + /* repeatedly try to find and execute tasks + * until stop-flag is triggered by stop() + */ + void work_loop(); + + /* find a task that shall be executed next + */ + Task* gather_task(); + + /*! take a task from the emplacement queue and initialize it, + * @param t is set to the task if the new task is ready, + * @param t is set to nullptr if the new task is blocked. + * @param claimed if set, the new task will not be actiated, + * if it is false, activate_task will be called by notify_event + * + * @return false if queue is empty + */ + bool init_dependencies(Task*& t, bool claimed = true); + }; + + struct WorkerThread + : Worker + , std::enable_shared_from_this + { + std::thread thread; + + WorkerThread( + memory::ChunkedBumpAlloc& alloc, + HwlocContext& hwloc_ctx, + hwloc_obj_t const& obj, + WorkerId worker_id); + ~WorkerThread(); + + void start(); + void stop(); + + /* function the thread will execute + */ + void run(); + + void cpubind(); + void membind(); + }; + + } // namespace thread + } // namespace dispatch } // namespace redGrapes - diff --git a/redGrapes/dispatch/thread/worker_pool.cpp b/redGrapes/dispatch/thread/worker_pool.cpp index 920916c9..194df68c 100644 --- a/redGrapes/dispatch/thread/worker_pool.cpp +++ b/redGrapes/dispatch/thread/worker_pool.cpp @@ -7,102 +7,99 @@ #include #include #include -#include #include +#include #include +#include -//#include +// #include namespace redGrapes { -namespace dispatch -{ -namespace thread -{ - -WorkerPool::WorkerPool( HwlocContext & hwloc_ctx, size_t n_workers ) - : hwloc_ctx( hwloc_ctx ) - , worker_state( n_workers ) -{ - Context::current_waker_id = 0; -} - -void WorkerPool::emplace_workers( size_t n_workers ) -{ - unsigned n_pus = hwloc_get_nbobjs_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU); - if( n_workers > n_pus ) - spdlog::warn("{} worker-threads requested, but only {} PUs available!", n_workers, n_pus); - - allocs.reserve( n_workers ); - workers.reserve( n_workers ); - - SPDLOG_INFO("populate WorkerPool with {} workers", n_workers); - for( size_t worker_id = 0; worker_id < n_workers; ++worker_id ) + namespace dispatch { - unsigned pu_id = worker_id % n_pus; - // allocate worker with id `i` on arena `i`, - hwloc_obj_t obj = hwloc_get_obj_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU, pu_id); - allocs.emplace_back( - memory::HwlocAlloc( hwloc_ctx, obj ), - REDGRAPES_ALLOC_CHUNKSIZE - ); - - SingletonContext::get().current_arena = pu_id; - auto worker = memory::alloc_shared_bind( pu_id, get_alloc(pu_id), hwloc_ctx, obj, worker_id ); -// auto worker = std::make_shared< WorkerThread >( get_alloc(i), hwloc_ctx, obj, i ); - workers.emplace_back( worker ); - } -} - -WorkerPool::~WorkerPool() -{ -} + namespace thread + { -void WorkerPool::start() -{ - for( auto & worker : workers ) - worker->start(); -} - -void WorkerPool::stop() -{ - for( auto & worker : workers ) - worker->stop(); + WorkerPool::WorkerPool(HwlocContext& hwloc_ctx, size_t n_workers) + : hwloc_ctx(hwloc_ctx) + , worker_state(n_workers) + { + Context::current_waker_id = 0; + } - workers.clear(); -} + void WorkerPool::emplace_workers(size_t n_workers) + { + unsigned n_pus = hwloc_get_nbobjs_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU); + if(n_workers > n_pus) + spdlog::warn("{} worker-threads requested, but only {} PUs available!", n_workers, n_pus); + + allocs.reserve(n_workers); + workers.reserve(n_workers); + + SPDLOG_INFO("populate WorkerPool with {} workers", n_workers); + for(size_t worker_id = 0; worker_id < n_workers; ++worker_id) + { + unsigned pu_id = worker_id % n_pus; + // allocate worker with id `i` on arena `i`, + hwloc_obj_t obj = hwloc_get_obj_by_type(hwloc_ctx.topology, HWLOC_OBJ_PU, pu_id); + allocs.emplace_back(memory::HwlocAlloc(hwloc_ctx, obj), REDGRAPES_ALLOC_CHUNKSIZE); + + SingletonContext::get().current_arena = pu_id; + auto worker + = memory::alloc_shared_bind(pu_id, get_alloc(pu_id), hwloc_ctx, obj, worker_id); + // auto worker = std::make_shared< WorkerThread >( get_alloc(i), hwloc_ctx, obj, i ); + workers.emplace_back(worker); + } + } + + WorkerPool::~WorkerPool() + { + } -int WorkerPool::find_free_worker() -{ - TRACE_EVENT("Scheduler", "find_worker"); + void WorkerPool::start() + { + for(auto& worker : workers) + worker->start(); + } - SPDLOG_TRACE("find worker..."); + void WorkerPool::stop() + { + for(auto& worker : workers) + worker->stop(); - unsigned start_idx = 0; - if(auto w = SingletonContext::get().current_worker) - start_idx = w->get_worker_id(); + workers.clear(); + } - std::optional idx = - this->probe_worker_by_state( - [this](unsigned idx) -> std::optional + int WorkerPool::find_free_worker() { - if(set_worker_state(idx, WorkerState::BUSY)) - return idx; + TRACE_EVENT("Scheduler", "find_worker"); + + SPDLOG_TRACE("find worker..."); + + unsigned start_idx = 0; + if(auto w = SingletonContext::get().current_worker) + start_idx = w->get_worker_id(); + + std::optional idx = this->probe_worker_by_state( + [this](unsigned idx) -> std::optional + { + if(set_worker_state(idx, WorkerState::BUSY)) + return idx; + else + return std::nullopt; + }, + dispatch::thread::WorkerState::AVAILABLE, // find a free worker + start_idx, + false); + + if(idx) + return *idx; else - return std::nullopt; - }, - dispatch::thread::WorkerState::AVAILABLE, // find a free worker - start_idx, - false); - - if( idx ) - return *idx; - else - // no free worker found, - return -1; -} - -} // namespace thread -} // namespace dispatch -} // namespace redGrapes + // no free worker found, + return -1; + } + } // namespace thread + } // namespace dispatch +} // namespace redGrapes diff --git a/redGrapes/dispatch/thread/worker_pool.hpp b/redGrapes/dispatch/thread/worker_pool.hpp index 38b20b15..d8acefa1 100644 --- a/redGrapes/dispatch/thread/worker_pool.hpp +++ b/redGrapes/dispatch/thread/worker_pool.hpp @@ -6,102 +6,106 @@ */ #pragma once -#include -#include -#include #include +#include +#include -namespace redGrapes -{ -struct HwlocContext; - -namespace dispatch -{ -namespace thread -{ - -using WorkerId = unsigned; -enum WorkerState { - BUSY = 0, - AVAILABLE = 1 -}; - -struct WorkerThread; +#include -struct WorkerPool +namespace redGrapes { - WorkerPool( HwlocContext & hwloc_ctx, size_t n_workers = 1 ); - ~WorkerPool(); + struct HwlocContext; - void emplace_workers( size_t n_workers ); - - /* get the number of workers in this pool - */ - inline size_t size() - { - return workers.size(); - } - - /* signals all workers to start executing tasks - */ - void start(); - - /* signals all workers that no new tasks will be added - */ - void stop(); - - inline memory::ChunkedBumpAlloc< memory::HwlocAlloc > & get_alloc( WorkerId worker_id ) - { - assert( worker_id < allocs.size() ); - return allocs[ worker_id ]; - } - - inline WorkerThread & get_worker( WorkerId worker_id ) + namespace dispatch { - assert( worker_id < size() ); - return *workers[ worker_id ]; - } - - inline WorkerState get_worker_state( WorkerId worker_id ) - { - return worker_state.get(worker_id) ? WorkerState::AVAILABLE : WorkerState::BUSY; - } - - /* return true on success - */ - inline bool set_worker_state( WorkerId worker_id, WorkerState state ) - { - return worker_state.set( worker_id, state ) != state; - } - - template - inline std::optional< T > - probe_worker_by_state( - F && f, - bool expected_worker_state, - unsigned start_worker_idx, - bool exclude_start = true) - { - return worker_state.template probe_by_value( std::move(f), expected_worker_state, start_worker_idx ); - } - - /*! - * tries to find an available worker, but potentially - * returns a busy worker if no free worker is available - * - * @return worker_id - */ - int find_free_worker(); - -private: - HwlocContext & hwloc_ctx; - - std::vector< memory::ChunkedBumpAlloc< memory::HwlocAlloc > > allocs; - std::vector< std::shared_ptr< dispatch::thread::WorkerThread > > workers; - AtomicBitfield worker_state; -}; - -} // namespace thread -} // namespace dispatch + namespace thread + { + + using WorkerId = unsigned; + + enum WorkerState + { + BUSY = 0, + AVAILABLE = 1 + }; + + struct WorkerThread; + + struct WorkerPool + { + WorkerPool(HwlocContext& hwloc_ctx, size_t n_workers = 1); + ~WorkerPool(); + + void emplace_workers(size_t n_workers); + + /* get the number of workers in this pool + */ + inline size_t size() + { + return workers.size(); + } + + /* signals all workers to start executing tasks + */ + void start(); + + /* signals all workers that no new tasks will be added + */ + void stop(); + + inline memory::ChunkedBumpAlloc& get_alloc(WorkerId worker_id) + { + assert(worker_id < allocs.size()); + return allocs[worker_id]; + } + + inline WorkerThread& get_worker(WorkerId worker_id) + { + assert(worker_id < size()); + return *workers[worker_id]; + } + + inline WorkerState get_worker_state(WorkerId worker_id) + { + return worker_state.get(worker_id) ? WorkerState::AVAILABLE : WorkerState::BUSY; + } + + /* return true on success + */ + inline bool set_worker_state(WorkerId worker_id, WorkerState state) + { + return worker_state.set(worker_id, state) != state; + } + + template + inline std::optional probe_worker_by_state( + F&& f, + bool expected_worker_state, + unsigned start_worker_idx, + bool exclude_start = true) + { + return worker_state.template probe_by_value( + std::move(f), + expected_worker_state, + start_worker_idx); + } + + /*! + * tries to find an available worker, but potentially + * returns a busy worker if no free worker is available + * + * @return worker_id + */ + int find_free_worker(); + + private: + HwlocContext& hwloc_ctx; + + std::vector> allocs; + std::vector> workers; + AtomicBitfield worker_state; + }; + + } // namespace thread + } // namespace dispatch } // namespace redGrapes - diff --git a/redGrapes/memory/allocator.cpp b/redGrapes/memory/allocator.cpp index 5a982283..09f867ae 100644 --- a/redGrapes/memory/allocator.cpp +++ b/redGrapes/memory/allocator.cpp @@ -1,34 +1,33 @@ -#include -#include #include - +#include #include #include +#include + namespace redGrapes { -namespace memory -{ + namespace memory + { -Allocator::Allocator() - : Allocator(SingletonContext::get().current_arena) {} + Allocator::Allocator() : Allocator(SingletonContext::get().current_arena) + { + } -Allocator::Allocator( dispatch::thread::WorkerId worker_id ) - : worker_id( - worker_id % SingletonContext::get().n_workers - ) -{} + Allocator::Allocator(dispatch::thread::WorkerId worker_id) + : worker_id(worker_id % SingletonContext::get().n_workers) + { + } -Block Allocator::allocate( size_t n_bytes ) -{ - return SingletonContext::get().worker_pool->get_alloc( worker_id ).allocate( n_bytes ); -} + Block Allocator::allocate(size_t n_bytes) + { + return SingletonContext::get().worker_pool->get_alloc(worker_id).allocate(n_bytes); + } -void Allocator::deallocate( Block blk ) -{ - SingletonContext::get().worker_pool->get_alloc( worker_id ).deallocate( blk ); -} + void Allocator::deallocate(Block blk) + { + SingletonContext::get().worker_pool->get_alloc(worker_id).deallocate(blk); + } -} // namespace memory + } // namespace memory } // namespace redGrapes - diff --git a/redGrapes/memory/allocator.hpp b/redGrapes/memory/allocator.hpp index 4c090ba3..5167b260 100644 --- a/redGrapes/memory/allocator.hpp +++ b/redGrapes/memory/allocator.hpp @@ -1,107 +1,120 @@ #pragma once -#include -#include #include #include -#include - -namespace redGrapes -{ - -namespace dispatch -{ -namespace thread -{ - using WorkerId = unsigned; - struct WorkerPool; -} -} -extern std::shared_ptr< dispatch::thread::WorkerPool > worker_pool; - -namespace memory -{ - -struct Allocator -{ - dispatch::thread::WorkerId worker_id; - - // allocate on `current_arena` given by `SingletonContext` - Allocator(); - - // allocate on arena for specific worker - Allocator( dispatch::thread::WorkerId worker_id ); +#include +#include - Block allocate( size_t n_bytes ); - void deallocate( Block blk ); -}; +#include -template < typename T > -struct StdAllocator +namespace redGrapes { - Allocator alloc; - typedef T value_type; - - StdAllocator () : alloc() {} - StdAllocator( dispatch::thread::WorkerId worker_id ) : alloc( worker_id ) {} - - template< typename U > - constexpr StdAllocator(StdAllocator const& other) noexcept - : alloc( other.alloc ) - { - } - inline T* allocate( std::size_t n ) + namespace dispatch { - Block blk = alloc.allocate( sizeof(T) * n ); - SPDLOG_TRACE("allocate {},{},{}", (uintptr_t)blk.ptr, n*sizeof(T), boost::core::demangle(typeid(T).name())); + namespace thread + { + using WorkerId = unsigned; + struct WorkerPool; + } // namespace thread + } // namespace dispatch - return (T*) blk.ptr; - } - - inline void deallocate(T* p, std::size_t n = 0) noexcept - { - alloc.deallocate(Block { (uintptr_t)p, sizeof(T)*n }); - } - - template < typename U, typename... Args > - void construct(U * p, Args&&... args ) - { - new (p) U ( std::forward(args)... ); - } + extern std::shared_ptr worker_pool; - template < typename U > - void destroy( U * p ) + namespace memory { - p->~U(); - } -}; -template -bool operator==(StdAllocator const&, StdAllocator const &) { return true; } - -template -bool operator!=(StdAllocator const&, StdAllocator const&) { return false; } - - -/* allocates a shared_ptr in the memory pool of a given worker - */ -template < typename T, typename... Args > -std::shared_ptr alloc_shared_bind( dispatch::thread::WorkerId worker_id, Args&&... args ) -{ - return std::allocate_shared< T, StdAllocator >( StdAllocator( worker_id ), std::forward(args)... ); -} - -/* allocates a shared_ptr in the memory pool of the current worker - */ -template < typename T, typename... Args > -std::shared_ptr alloc_shared( Args&&... args ) -{ - return std::allocate_shared< T, StdAllocator >( StdAllocator(), std::forward(args)... ); -} - -} // namespace memory + struct Allocator + { + dispatch::thread::WorkerId worker_id; + + // allocate on `current_arena` given by `SingletonContext` + Allocator(); + + // allocate on arena for specific worker + Allocator(dispatch::thread::WorkerId worker_id); + + Block allocate(size_t n_bytes); + void deallocate(Block blk); + }; + + template + struct StdAllocator + { + Allocator alloc; + typedef T value_type; + + StdAllocator() : alloc() + { + } + + StdAllocator(dispatch::thread::WorkerId worker_id) : alloc(worker_id) + { + } + + template + constexpr StdAllocator(StdAllocator const& other) noexcept : alloc(other.alloc) + { + } + + inline T* allocate(std::size_t n) + { + Block blk = alloc.allocate(sizeof(T) * n); + SPDLOG_TRACE( + "allocate {},{},{}", + (uintptr_t) blk.ptr, + n * sizeof(T), + boost::core::demangle(typeid(T).name())); + + return (T*) blk.ptr; + } + + inline void deallocate(T* p, std::size_t n = 0) noexcept + { + alloc.deallocate(Block{(uintptr_t) p, sizeof(T) * n}); + } + + template + void construct(U* p, Args&&... args) + { + new(p) U(std::forward(args)...); + } + + template + void destroy(U* p) + { + p->~U(); + } + }; + + template + bool operator==(StdAllocator const&, StdAllocator const&) + { + return true; + } + + template + bool operator!=(StdAllocator const&, StdAllocator const&) + { + return false; + } + + /* allocates a shared_ptr in the memory pool of a given worker + */ + template + std::shared_ptr alloc_shared_bind(dispatch::thread::WorkerId worker_id, Args&&... args) + { + return std::allocate_shared>(StdAllocator(worker_id), std::forward(args)...); + } + + /* allocates a shared_ptr in the memory pool of the current worker + */ + template + std::shared_ptr alloc_shared(Args&&... args) + { + return std::allocate_shared>(StdAllocator(), std::forward(args)...); + } + + } // namespace memory } // namespace redGrapes - - diff --git a/redGrapes/memory/block.hpp b/redGrapes/memory/block.hpp index ba9afeee..2f308cfd 100644 --- a/redGrapes/memory/block.hpp +++ b/redGrapes/memory/block.hpp @@ -18,39 +18,35 @@ namespace redGrapes { -namespace memory -{ - -struct Block -{ - uintptr_t ptr; - std::size_t len; - - inline operator void* () const - { - return reinterpret_cast(ptr); - } - - inline bool operator==(Block const & other) const - { - return ptr == other.ptr && len == other.len; - } - - inline operator bool() const + namespace memory { - return (bool)ptr; - } - inline static Block null() - { - return Block { - .ptr = 0, - .len = 0 + struct Block + { + uintptr_t ptr; + std::size_t len; + + inline operator void*() const + { + return reinterpret_cast(ptr); + } + + inline bool operator==(Block const& other) const + { + return ptr == other.ptr && len == other.len; + } + + inline operator bool() const + { + return (bool) ptr; + } + + static inline Block null() + { + return Block{.ptr = 0, .len = 0}; + } }; - } -}; - -} // memory -} // redGrapes + } // namespace memory +} // namespace redGrapes diff --git a/redGrapes/memory/bump_allocator.cpp b/redGrapes/memory/bump_allocator.cpp index 003b324f..ba77e2ae 100644 --- a/redGrapes/memory/bump_allocator.cpp +++ b/redGrapes/memory/bump_allocator.cpp @@ -5,81 +5,78 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include - #include #include +#include + +#include +#include + namespace redGrapes { -namespace memory -{ + namespace memory + { -BumpAllocator::BumpAllocator( Block blk ) - : BumpAllocator( - (uintptr_t)blk.ptr, - (uintptr_t)blk.ptr + blk.len - ) -{} + BumpAllocator::BumpAllocator(Block blk) : BumpAllocator((uintptr_t) blk.ptr, (uintptr_t) blk.ptr + blk.len) + { + } -BumpAllocator::BumpAllocator( uintptr_t lower_limit, uintptr_t upper_limit ) - : lower_limit( lower_limit ) - , upper_limit( upper_limit ) - , count(0) -{ - SPDLOG_INFO("bumpallochunk: lower={}, upper={}", lower_limit, upper_limit); - next_addr = upper_limit; -} + BumpAllocator::BumpAllocator(uintptr_t lower_limit, uintptr_t upper_limit) + : lower_limit(lower_limit) + , upper_limit(upper_limit) + , count(0) + { + SPDLOG_INFO("bumpallochunk: lower={}, upper={}", lower_limit, upper_limit); + next_addr = upper_limit; + } -BumpAllocator::~BumpAllocator() -{ + BumpAllocator::~BumpAllocator() + { #ifndef NDEBUG - if( !empty() ) - spdlog::warn("BumpAllocChunk: {} allocations remaining not deallocated.", count.load()); + if(!empty()) + spdlog::warn("BumpAllocChunk: {} allocations remaining not deallocated.", count.load()); #endif -} + } -bool BumpAllocator::empty() const -{ - return (count == 0); -} + bool BumpAllocator::empty() const + { + return (count == 0); + } -bool BumpAllocator::full() const -{ - return next_addr <= lower_limit; -} + bool BumpAllocator::full() const + { + return next_addr <= lower_limit; + } -void BumpAllocator::reset() -{ - next_addr = upper_limit; - count = 0; -} + void BumpAllocator::reset() + { + next_addr = upper_limit; + count = 0; + } -Block BumpAllocator::allocate( size_t n_bytes ) -{ - uintptr_t addr = next_addr.fetch_sub( n_bytes ) - n_bytes; - if( addr >= lower_limit ) - { - count ++; - return Block { addr, n_bytes }; - } - else - return Block::null(); -} + Block BumpAllocator::allocate(size_t n_bytes) + { + uintptr_t addr = next_addr.fetch_sub(n_bytes) - n_bytes; + if(addr >= lower_limit) + { + count++; + return Block{addr, n_bytes}; + } + else + return Block::null(); + } -uint16_t BumpAllocator::deallocate( Block blk ) -{ - assert( owns(blk) ); - return count.fetch_sub(1); -} + uint16_t BumpAllocator::deallocate(Block blk) + { + assert(owns(blk)); + return count.fetch_sub(1); + } -bool BumpAllocator::owns( Block const & blk ) const -{ - return blk.ptr >= lower_limit && blk.ptr < upper_limit; -} + bool BumpAllocator::owns(Block const& blk) const + { + return blk.ptr >= lower_limit && blk.ptr < upper_limit; + } -} // namespace memory + } // namespace memory } // namespace redGrapes - diff --git a/redGrapes/memory/bump_allocator.hpp b/redGrapes/memory/bump_allocator.hpp index 1c376963..f6aa9397 100644 --- a/redGrapes/memory/bump_allocator.hpp +++ b/redGrapes/memory/bump_allocator.hpp @@ -9,65 +9,65 @@ #pragma once +#include + #include #include -#include namespace redGrapes { -namespace memory -{ - -/* The `BumpAllocator` manages a chunk of memory, - * given by `lower_limit` and `upper_limit` by - * decrementing the `next_addr` by the requested size, - * and counting the number of active allocations. - * The data will start immediately after this management object - */ -struct BumpAllocator -{ - BumpAllocator( Block blk ); - BumpAllocator( uintptr_t lower_limit, uintptr_t upper_limit ); - BumpAllocator( BumpAllocator const & ) = delete; - BumpAllocator( BumpAllocator & ) = delete; - ~BumpAllocator(); - - void reset(); - - bool empty() const; - - /* check whether this allocator is exhausted already. - * @return true if no free space remains - */ - bool full() const; - - /*! checks whether this block is managed by this allocator - */ - bool owns( Block const & ) const; - - /*! @param n_bytes size of requested memory block - * @return Block with len = n_bytes and some non-nullpointer - * if successful, return Block::null() on exhaustion. - */ - Block allocate( size_t n_bytes ); - - /*! @return how many active allocations remain, - * if it returns 0, this allocator needs to be reset. - */ - uint16_t deallocate( Block blk ); - -private: - //! number of active allocations - std::atomic< uint16_t > count; - - //! pointer to the upper-limit of the next allocation - std::atomic< uintptr_t > next_addr; - - - uintptr_t const lower_limit; - uintptr_t const upper_limit; -}; - -} // namespace memory + namespace memory + { + + /* The `BumpAllocator` manages a chunk of memory, + * given by `lower_limit` and `upper_limit` by + * decrementing the `next_addr` by the requested size, + * and counting the number of active allocations. + * The data will start immediately after this management object + */ + struct BumpAllocator + { + BumpAllocator(Block blk); + BumpAllocator(uintptr_t lower_limit, uintptr_t upper_limit); + BumpAllocator(BumpAllocator const&) = delete; + BumpAllocator(BumpAllocator&) = delete; + ~BumpAllocator(); + + void reset(); + + bool empty() const; + + /* check whether this allocator is exhausted already. + * @return true if no free space remains + */ + bool full() const; + + /*! checks whether this block is managed by this allocator + */ + bool owns(Block const&) const; + + /*! @param n_bytes size of requested memory block + * @return Block with len = n_bytes and some non-nullpointer + * if successful, return Block::null() on exhaustion. + */ + Block allocate(size_t n_bytes); + + /*! @return how many active allocations remain, + * if it returns 0, this allocator needs to be reset. + */ + uint16_t deallocate(Block blk); + + private: + //! number of active allocations + std::atomic count; + + //! pointer to the upper-limit of the next allocation + std::atomic next_addr; + + + uintptr_t const lower_limit; + uintptr_t const upper_limit; + }; + + } // namespace memory } // namespace redGrapes - diff --git a/redGrapes/memory/chunked_bump_alloc.hpp b/redGrapes/memory/chunked_bump_alloc.hpp index 81480471..20a6e876 100644 --- a/redGrapes/memory/chunked_bump_alloc.hpp +++ b/redGrapes/memory/chunked_bump_alloc.hpp @@ -7,160 +7,163 @@ #pragma once -#include +#include +#include +#include +#include +#include +#include + #include +#include + +#include #include #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -//#include +// #include #if REDGRAPES_ENABLE_BACKWARDCPP -#include +# include #endif namespace redGrapes { -namespace memory -{ + namespace memory + { /* use 64KiB as default chunksize */ #ifndef REDGRAPES_ALLOC_CHUNKSIZE -#define REDGRAPES_ALLOC_CHUNKSIZE ( 64 * 1024 ) +# define REDGRAPES_ALLOC_CHUNKSIZE (64 * 1024) #endif -struct HwlocAlloc; - -template < typename Alloc = HwlocAlloc > -struct ChunkedBumpAlloc -{ - size_t const chunk_size; + struct HwlocAlloc; - AtomicList< BumpAllocator, Alloc > bump_allocators; + template + struct ChunkedBumpAlloc + { + size_t const chunk_size; - ChunkedBumpAlloc( Alloc && alloc, size_t chunk_size = REDGRAPES_ALLOC_CHUNKSIZE ) - : chunk_size( chunk_size ) - , bump_allocators( std::move(alloc), chunk_size ) - { - } + AtomicList bump_allocators; - ChunkedBumpAlloc( ChunkedBumpAlloc && other ) - : chunk_size(other.chunk_size) - , bump_allocators(other.bump_allocators) - { - } + ChunkedBumpAlloc(Alloc&& alloc, size_t chunk_size = REDGRAPES_ALLOC_CHUNKSIZE) + : chunk_size(chunk_size) + , bump_allocators(std::move(alloc), chunk_size) + { + } - ~ChunkedBumpAlloc() - { - } + ChunkedBumpAlloc(ChunkedBumpAlloc&& other) + : chunk_size(other.chunk_size) + , bump_allocators(other.bump_allocators) + { + } - inline static size_t roundup_to_poweroftwo( size_t s ) - { - s--; - s |= s >> 0x1; - s |= s >> 0x2; - s |= s >> 0x4; - s |= s >> 0x8; - s |= s >> 0x10; - s |= s >> 0x20; - s++; - return s; - } - - Block allocate( std::size_t n = 1 ) noexcept - { - TRACE_EVENT("Allocator", "ChunkedBumpAlloc::allocate()"); - size_t alloc_size = roundup_to_poweroftwo( n ); - - size_t const chunk_capacity = bump_allocators.get_chunk_capacity(); + ~ChunkedBumpAlloc() + { + } - if( alloc_size <= chunk_capacity ) - { - Block blk = Block::null(); + static inline size_t roundup_to_poweroftwo(size_t s) + { + s--; + s |= s >> 0x1; + s |= s >> 0x2; + s |= s >> 0x4; + s |= s >> 0x8; + s |= s >> 0x10; + s |= s >> 0x20; + s++; + return s; + } - while( !blk ) + Block allocate(std::size_t n = 1) noexcept { - // try to alloc in current chunk - auto chunk = bump_allocators.rbegin(); + TRACE_EVENT("Allocator", "ChunkedBumpAlloc::allocate()"); + size_t alloc_size = roundup_to_poweroftwo(n); + + size_t const chunk_capacity = bump_allocators.get_chunk_capacity(); - if( chunk != bump_allocators.rend() ) + if(alloc_size <= chunk_capacity) { - blk = chunk->allocate( alloc_size ); + Block blk = Block::null(); - // chunk is full, create a new one - if( !blk ) - bump_allocators.allocate_item(); + while(!blk) + { + // try to alloc in current chunk + auto chunk = bump_allocators.rbegin(); + + if(chunk != bump_allocators.rend()) + { + blk = chunk->allocate(alloc_size); + + // chunk is full, create a new one + if(!blk) + bump_allocators.allocate_item(); + } + // no chunk exists, create a new one + else + bump_allocators.allocate_item(); + } + + SPDLOG_TRACE("ChunkedBumpAlloc: alloc {},{}", blk.ptr, blk.len); + return blk; } - // no chunk exists, create a new one else - bump_allocators.allocate_item(); + { + spdlog::error( + "ChunkedBumpAlloc: requested allocation of {} bytes exceeds chunk capacity of {} bytes", + alloc_size, + chunk_capacity); + return Block::null(); + } } - SPDLOG_TRACE("ChunkedBumpAlloc: alloc {},{}", blk.ptr, blk.len); - return blk; - } - else - { - spdlog::error("ChunkedBumpAlloc: requested allocation of {} bytes exceeds chunk capacity of {} bytes", alloc_size, chunk_capacity); - return Block::null(); - } - } - - void deallocate( Block blk ) - { - TRACE_EVENT("Allocator", "ChunkedBumpAlloc::deallocate()"); - SPDLOG_TRACE("ChunkedBumpAlloc[{}]: free {} ", (void*)this, (uintptr_t)blk.ptr); - - /* find the chunk that contains `ptr` and deallocate there. - * Additionally, delete the chunk if possible. - */ - - auto prev = bump_allocators.rbegin(); - for( auto it = bump_allocators.rbegin(); it != bump_allocators.rend(); ++it ) - { - if( it->owns(blk) ) + void deallocate(Block blk) { - /* if no allocations remain in this chunk - * and this chunk is not `head`, - * remove this chunk + TRACE_EVENT("Allocator", "ChunkedBumpAlloc::deallocate()"); + SPDLOG_TRACE("ChunkedBumpAlloc[{}]: free {} ", (void*) this, (uintptr_t) blk.ptr); + + /* find the chunk that contains `ptr` and deallocate there. + * Additionally, delete the chunk if possible. */ - if( it->deallocate(blk) == 1 ) + + auto prev = bump_allocators.rbegin(); + for(auto it = bump_allocators.rbegin(); it != bump_allocators.rend(); ++it) { - SPDLOG_TRACE("ChunkedBumpAlloc: erase chunk"); - if( it->full() ) + if(it->owns(blk)) { - bump_allocators.erase( it ); - prev.optimize(); + /* if no allocations remain in this chunk + * and this chunk is not `head`, + * remove this chunk + */ + if(it->deallocate(blk) == 1) + { + SPDLOG_TRACE("ChunkedBumpAlloc: erase chunk"); + if(it->full()) + { + bump_allocators.erase(it); + prev.optimize(); + } + } + + return; } + prev = it; } - return; - } - prev = it; - } - #if REDGRAPES_ENABLE_BACKWARDCPP - spdlog::error("try to deallocate invalid pointer ({}). this={}", (void*)blk.ptr, (void*)this); + spdlog::error("try to deallocate invalid pointer ({}). this={}", (void*) blk.ptr, (void*) this); - backward::StackTrace st; - st.load_here(32); - backward::Printer p; - p.print(st); + backward::StackTrace st; + st.load_here(32); + backward::Printer p; + p.print(st); #endif + } + }; - } -}; - -} // namespace memory + } // namespace memory } // namespace redGrapes - diff --git a/redGrapes/memory/hwloc_alloc.hpp b/redGrapes/memory/hwloc_alloc.hpp index ca64dd2c..ce085183 100644 --- a/redGrapes/memory/hwloc_alloc.hpp +++ b/redGrapes/memory/hwloc_alloc.hpp @@ -7,97 +7,100 @@ #pragma once -#include -#include #include -#include - #include -//#include -namespace redGrapes -{ - -struct HwlocContext -{ - hwloc_topology_t topology; - - HwlocContext() - { - hwloc_topology_init(&topology); - hwloc_topology_load(topology); - } +#include +#include - ~HwlocContext() - { - hwloc_topology_destroy(topology); - } -}; +#include -namespace memory -{ +// #include -struct HwlocAlloc +namespace redGrapes { - //! redGrapes context - HwlocContext & ctx; - - //! hwloc-object used for membind - hwloc_obj_t obj; - HwlocAlloc( HwlocContext & ctx, hwloc_obj_t const & obj ) noexcept - : ctx( ctx ), obj( obj ) - {} - - Block allocate( std::size_t alloc_size ) const noexcept + struct HwlocContext { - TRACE_EVENT("Allocator", "HwlocAlloc::allocate"); - - void * ptr = hwloc_alloc_membind( - ctx.topology, alloc_size, obj->cpuset, - HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_NOCPUBIND | HWLOC_MEMBIND_STRICT - ); - - SPDLOG_TRACE("hwloc_alloc {},{}", (uintptr_t)ptr, alloc_size); - - if( ptr ) - return Block{ (uintptr_t)ptr, alloc_size }; - else - { - int error = errno; - spdlog::error("hwloc_alloc_membind failed: {}\n", strerror(error)); - return Block::null(); - } - - // touch memory - hwloc_cpuset_t last_cpuset; - { - TRACE_EVENT("Allocator", "rebind cpu"); - hwloc_get_cpubind(ctx.topology, last_cpuset, HWLOC_CPUBIND_THREAD); - hwloc_set_cpubind(ctx.topology, obj->cpuset, HWLOC_CPUBIND_THREAD); - } + hwloc_topology_t topology; + HwlocContext() { - TRACE_EVENT("Allocator", "memset"); - memset( ptr, 0, alloc_size ); + hwloc_topology_init(&topology); + hwloc_topology_load(topology); } + ~HwlocContext() { - TRACE_EVENT("Allocator", "rebind cpu"); - hwloc_set_cpubind(ctx.topology, last_cpuset, HWLOC_CPUBIND_THREAD); + hwloc_topology_destroy(topology); } - } + }; - void deallocate( Block blk ) noexcept + namespace memory { - TRACE_EVENT("Allocator", "HwlocAlloc::deallocate"); -// SPDLOG_TRACE("hwloc free {}", (uintptr_t)p); - hwloc_free( ctx.topology, (void*)blk.ptr, blk.len ); - } -}; - -} // namespace memory + struct HwlocAlloc + { + //! redGrapes context + HwlocContext& ctx; + + //! hwloc-object used for membind + hwloc_obj_t obj; + + HwlocAlloc(HwlocContext& ctx, hwloc_obj_t const& obj) noexcept : ctx(ctx), obj(obj) + { + } + + Block allocate(std::size_t alloc_size) const noexcept + { + TRACE_EVENT("Allocator", "HwlocAlloc::allocate"); + + void* ptr = hwloc_alloc_membind( + ctx.topology, + alloc_size, + obj->cpuset, + HWLOC_MEMBIND_BIND, + HWLOC_MEMBIND_NOCPUBIND | HWLOC_MEMBIND_STRICT); + + SPDLOG_TRACE("hwloc_alloc {},{}", (uintptr_t) ptr, alloc_size); + + if(ptr) + return Block{(uintptr_t) ptr, alloc_size}; + else + { + int error = errno; + spdlog::error("hwloc_alloc_membind failed: {}\n", strerror(error)); + return Block::null(); + } + + // touch memory + hwloc_cpuset_t last_cpuset; + { + TRACE_EVENT("Allocator", "rebind cpu"); + hwloc_get_cpubind(ctx.topology, last_cpuset, HWLOC_CPUBIND_THREAD); + hwloc_set_cpubind(ctx.topology, obj->cpuset, HWLOC_CPUBIND_THREAD); + } + + { + TRACE_EVENT("Allocator", "memset"); + memset(ptr, 0, alloc_size); + } + + { + TRACE_EVENT("Allocator", "rebind cpu"); + hwloc_set_cpubind(ctx.topology, last_cpuset, HWLOC_CPUBIND_THREAD); + } + } + + void deallocate(Block blk) noexcept + { + TRACE_EVENT("Allocator", "HwlocAlloc::deallocate"); + + // SPDLOG_TRACE("hwloc free {}", (uintptr_t)p); + hwloc_free(ctx.topology, (void*) blk.ptr, blk.len); + } + }; + + } // namespace memory } // namespace redGrapes - diff --git a/redGrapes/redGrapes.cpp b/redGrapes/redGrapes.cpp index 53a3aad3..9d1e5025 100644 --- a/redGrapes/redGrapes.cpp +++ b/redGrapes/redGrapes.cpp @@ -5,19 +5,19 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include -#include - -#include #include -#include -#include +#include #include - +#include +#include #include +#include + +#include +#include +#include + #if REDGRAPES_ENABLE_TRACE PERFETTO_TRACK_EVENT_STATIC_STORAGE(); #endif @@ -25,168 +25,160 @@ PERFETTO_TRACK_EVENT_STATIC_STORAGE(); namespace redGrapes { -thread_local Task * Context::current_task; -thread_local std::function< void() > Context::idle; -thread_local unsigned Context::next_worker; -thread_local unsigned Context::current_arena; -thread_local scheduler::WakerId Context::current_waker_id; -thread_local std::shared_ptr< dispatch::thread::WorkerThread > Context::current_worker; + thread_local Task* Context::current_task; + thread_local std::function Context::idle; + thread_local unsigned Context::next_worker; + thread_local unsigned Context::current_arena; + thread_local scheduler::WakerId Context::current_waker_id; + thread_local std::shared_ptr Context::current_worker; -Context::Context() -{ - idle = [this] { - this->scheduler->idle(); - }; -} + Context::Context() + { + idle = [this] { this->scheduler->idle(); }; + } -Context::~Context() -{ - -} + Context::~Context() + { + } -std::shared_ptr Context::current_task_space() const -{ - if( current_task ) + std::shared_ptr Context::current_task_space() const { - if( ! current_task->children ) + if(current_task) { - auto task_space = std::make_shared(current_task); - SPDLOG_TRACE("create child space = {}", (void*)task_space.get()); - current_task->children = task_space; + if(!current_task->children) + { + auto task_space = std::make_shared(current_task); + SPDLOG_TRACE("create child space = {}", (void*) task_space.get()); + current_task->children = task_space; - std::unique_lock< std::shared_mutex > wr_lock( current_task->space->active_child_spaces_mutex ); - current_task->space->active_child_spaces.push_back( task_space ); + std::unique_lock wr_lock(current_task->space->active_child_spaces_mutex); + current_task->space->active_child_spaces.push_back(task_space); + } + + return current_task->children; } + else + return root_space; + } - return current_task->children; + unsigned Context::scope_depth() const + { + if(auto ts = current_task_space()) + return ts->depth; + else + return 0; } - else - return root_space; -} -unsigned Context::scope_depth() const -{ - if( auto ts = current_task_space() ) - return ts->depth; - else - return 0; -} - -/*! Create an event on which the termination of the current task depends. - * A task must currently be running. - * - * @return Handle to flag the event with `reach_event` later. - * nullopt if there is no task running currently - */ -std::optional< scheduler::EventPtr > Context::create_event() -{ - if( current_task ) - return current_task->make_event(); - else - return std::nullopt; -} - -//! get backtrace from currently running task -std::vector> Context::backtrace() -{ - std::vector> bt; - for( - Task * task = current_task; - task != nullptr; - task = task->space->parent - ) - bt.push_back(*task); - - return bt; -} - -void Context::init_tracing() -{ + /*! Create an event on which the termination of the current task depends. + * A task must currently be running. + * + * @return Handle to flag the event with `reach_event` later. + * nullopt if there is no task running currently + */ + std::optional Context::create_event() + { + if(current_task) + return current_task->make_event(); + else + return std::nullopt; + } + + //! get backtrace from currently running task + std::vector> Context::backtrace() + { + std::vector> bt; + for(Task* task = current_task; task != nullptr; task = task->space->parent) + bt.push_back(*task); + + return bt; + } + + void Context::init_tracing() + { #if REDGRAPES_ENABLE_TRACE - perfetto::TracingInitArgs args; - args.backends |= perfetto::kInProcessBackend; - perfetto::Tracing::Initialize(args); - perfetto::TrackEvent::Register(); + perfetto::TracingInitArgs args; + args.backends |= perfetto::kInProcessBackend; + perfetto::Tracing::Initialize(args); + perfetto::TrackEvent::Register(); - tracing_session = StartTracing(); + tracing_session = StartTracing(); #endif -} + } -void Context::finalize_tracing() -{ + void Context::finalize_tracing() + { #if REDGRAPES_ENABLE_TRACE - StopTracing( tracing_session ); + StopTracing(tracing_session); #endif -} - -void Context::init( size_t n_workers, std::shared_ptr scheduler ) -{ - init_tracing(); + } - this->n_workers = n_workers; - worker_pool = std::make_shared( hwloc_ctx, n_workers ); - worker_pool->emplace_workers( n_workers ); + void Context::init(size_t n_workers, std::shared_ptr scheduler) + { + init_tracing(); - root_space = std::make_shared(); - this->scheduler = scheduler; + this->n_workers = n_workers; + worker_pool = std::make_shared(hwloc_ctx, n_workers); + worker_pool->emplace_workers(n_workers); - worker_pool->start(); -} + root_space = std::make_shared(); + this->scheduler = scheduler; -void Context::init( size_t n_workers ) -{ - init( n_workers, std::make_shared()); -} + worker_pool->start(); + } -/*! wait until all tasks in the current task space finished - */ -void Context::barrier() -{ - SPDLOG_TRACE("barrier"); + void Context::init(size_t n_workers) + { + init(n_workers, std::make_shared()); + } - while( ! root_space->empty() ) - idle(); -} + /*! wait until all tasks in the current task space finished + */ + void Context::barrier() + { + SPDLOG_TRACE("barrier"); -void Context::finalize() -{ - barrier(); + while(!root_space->empty()) + idle(); + } - worker_pool->stop(); + void Context::finalize() + { + barrier(); - scheduler.reset(); - root_space.reset(); + worker_pool->stop(); - finalize_tracing(); -} + scheduler.reset(); + root_space.reset(); -//! pause the currently running task at least until event is reached -void Context::yield( scheduler::EventPtr event ) -{ - if( current_task ) - { - while( ! event->is_reached() ) - current_task->yield(event); + finalize_tracing(); } - else + + //! pause the currently running task at least until event is reached + void Context::yield(scheduler::EventPtr event) { - event->waker_id = Context::current_waker_id; - while( ! event->is_reached() ) - idle(); + if(current_task) + { + while(!event->is_reached()) + current_task->yield(event); + } + else + { + event->waker_id = Context::current_waker_id; + while(!event->is_reached()) + idle(); + } } -} -//! apply a patch to the properties of the currently running task -void Context::update_properties(typename TaskProperties::Patch const& patch) -{ - if( current_task ) + //! apply a patch to the properties of the currently running task + void Context::update_properties(typename TaskProperties::Patch const& patch) { - current_task->apply_patch(patch); - current_task->update_graph(); + if(current_task) + { + current_task->apply_patch(patch); + current_task->update_graph(); + } + else + throw std::runtime_error("update_properties: currently no task running"); } - else - throw std::runtime_error("update_properties: currently no task running"); -} } // namespace redGrapes - diff --git a/redGrapes/redGrapes.hpp b/redGrapes/redGrapes.hpp index 38c57eb9..0fc5a5cf 100644 --- a/redGrapes/redGrapes.hpp +++ b/redGrapes/redGrapes.hpp @@ -9,150 +9,165 @@ #pragma once -#include // std::shared_ptr -#include - #include #include -//#include +#include + +#include // std::shared_ptr + +// #include +#include +#include #include #include -#include -#include namespace redGrapes { -struct Context -{ - Context(); - ~Context(); - - void init_tracing(); - void finalize_tracing(); - - void init( size_t n_workers, std::shared_ptr scheduler); - void init( size_t n_workers = std::thread::hardware_concurrency() ); - void finalize(); - - //! wait until all tasks in the current task space finished - void barrier(); - - //! pause the currently running task at least until event is reached - void yield(scheduler::EventPtr event); - - //! apply a patch to the properties of the currently running task - void update_properties(typename TaskProperties::Patch const& patch); - - //! get backtrace from currently running task - std::vector> backtrace(); - - /*! Create an event on which the termination of the current task depends. - * A task must currently be running. - * - * @return Handle to flag the event with `reach_event` later. - * nullopt if there is no task running currently - */ - std::optional create_event(); - - unsigned scope_depth() const; - std::shared_ptr current_task_space() const; - - void execute_task( Task & task ); - - /*! create a new task, as child of the currently running task (if there is one) - * - * @param f callable that takes "proprty-building" objects as args - * @param args are forwarded to f after the each arg added its - * properties to the task - * - * For the argument-types can a trait be implemented which - * defines a hook to add task properties depending the the - * argument. - * - * @return future from f's result - */ - template< typename Callable, typename... Args > - auto emplace_task(Callable&& f, Args&&... args); - - static thread_local Task * current_task; - static thread_local std::function< void () > idle; - static thread_local unsigned next_worker; - - static thread_local scheduler::WakerId current_waker_id; - static thread_local std::shared_ptr< dispatch::thread::WorkerThread > current_worker; - - unsigned n_workers; - static thread_local unsigned current_arena; - HwlocContext hwloc_ctx; - std::shared_ptr< dispatch::thread::WorkerPool > worker_pool; - - std::shared_ptr< TaskSpace > root_space; - std::shared_ptr< scheduler::IScheduler > scheduler; + struct Context + { + Context(); + ~Context(); + + void init_tracing(); + void finalize_tracing(); + + void init(size_t n_workers, std::shared_ptr scheduler); + void init(size_t n_workers = std::thread::hardware_concurrency()); + void finalize(); + + //! wait until all tasks in the current task space finished + void barrier(); + + //! pause the currently running task at least until event is reached + void yield(scheduler::EventPtr event); + + //! apply a patch to the properties of the currently running task + void update_properties(typename TaskProperties::Patch const& patch); + + //! get backtrace from currently running task + std::vector> backtrace(); + + /*! Create an event on which the termination of the current task depends. + * A task must currently be running. + * + * @return Handle to flag the event with `reach_event` later. + * nullopt if there is no task running currently + */ + std::optional create_event(); + + unsigned scope_depth() const; + std::shared_ptr current_task_space() const; + + void execute_task(Task& task); + + /*! create a new task, as child of the currently running task (if there is one) + * + * @param f callable that takes "proprty-building" objects as args + * @param args are forwarded to f after the each arg added its + * properties to the task + * + * For the argument-types can a trait be implemented which + * defines a hook to add task properties depending the the + * argument. + * + * @return future from f's result + */ + template + auto emplace_task(Callable&& f, Args&&... args); + + static thread_local Task* current_task; + static thread_local std::function idle; + static thread_local unsigned next_worker; + + static thread_local scheduler::WakerId current_waker_id; + static thread_local std::shared_ptr current_worker; + + unsigned n_workers; + static thread_local unsigned current_arena; + HwlocContext hwloc_ctx; + std::shared_ptr worker_pool; + + std::shared_ptr root_space; + std::shared_ptr scheduler; #if REDGRAPES_ENABLE_TRACE - std::shared_ptr< perfetto::TracingSession > tracing_session; + std::shared_ptr tracing_session; #endif -}; + }; + /* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + * S I N G L E T O N + * ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + */ -/* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - * S I N G L E T O N - * ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - */ - -struct SingletonContext -{ - inline static Context & get() + struct SingletonContext + { + static inline Context& get() + { + static Context ctx; + return ctx; + } + }; + + inline void init(size_t n_workers, std::shared_ptr scheduler) { - static Context ctx; - return ctx; + SingletonContext::get().init(n_workers, scheduler); } -}; - -inline void init( size_t n_workers, std::shared_ptr scheduler ) { - SingletonContext::get().init( n_workers, scheduler); } - -inline void init( size_t n_workers = std::thread::hardware_concurrency() ) { - SingletonContext::get().init( n_workers ); } -inline void finalize() { - SingletonContext::get().finalize(); } - -inline void barrier() { - SingletonContext::get().barrier(); } + inline void init(size_t n_workers = std::thread::hardware_concurrency()) + { + SingletonContext::get().init(n_workers); + } -inline void yield(scheduler::EventPtr event) { - SingletonContext::get().yield(event); } + inline void finalize() + { + SingletonContext::get().finalize(); + } -inline void update_properties(typename TaskProperties::Patch const& patch) { - SingletonContext::get().update_properties( patch ); } + inline void barrier() + { + SingletonContext::get().barrier(); + } -inline std::vector> backtrace() { - return SingletonContext::get().backtrace(); } + inline void yield(scheduler::EventPtr event) + { + SingletonContext::get().yield(event); + } -inline std::optional create_event() { - return SingletonContext::get().create_event(); } + inline void update_properties(typename TaskProperties::Patch const& patch) + { + SingletonContext::get().update_properties(patch); + } -inline unsigned scope_depth() { - return SingletonContext::get().scope_depth(); } + inline std::vector> backtrace() + { + return SingletonContext::get().backtrace(); + } -inline std::shared_ptr current_task_space() { - return SingletonContext::get().current_task_space(); } + inline std::optional create_event() + { + return SingletonContext::get().create_event(); + } -template -inline auto emplace_task(Callable&& f, Args&&... args) { - return std::move( - SingletonContext::get().emplace_task( - std::move(f), - std::forward(args)... - ) - ); } + inline unsigned scope_depth() + { + return SingletonContext::get().scope_depth(); + } -} //namespace redGrapes + inline std::shared_ptr current_task_space() + { + return SingletonContext::get().current_task_space(); + } + template + inline auto emplace_task(Callable&& f, Args&&... args) + { + return std::move(SingletonContext::get().emplace_task(std::move(f), std::forward(args)...)); + } +} // namespace redGrapes // `TaskBuilder` needs "Context`, so can only include here after definiton #include @@ -163,18 +178,17 @@ namespace redGrapes auto Context::emplace_task(Callable&& f, Args&&... args) { dispatch::thread::WorkerId worker_id = - // linear - next_worker % worker_pool->size(); + // linear + next_worker % worker_pool->size(); - // interleaved - // 2*next_worker % worker_pool->size() + ((2*next_worker) / worker_pool->size())%2; + // interleaved + // 2*next_worker % worker_pool->size() + ((2*next_worker) / worker_pool->size())%2; next_worker++; current_arena = worker_id; SPDLOG_TRACE("emplace task to worker {} next_worker={}", worker_id, next_worker); - return std::move(TaskBuilder< Callable, Args... >( std::move(f), std::forward(args)... )); + return std::move(TaskBuilder(std::move(f), std::forward(args)...)); } } // namespace redGrapes - diff --git a/redGrapes/resource/access/area.hpp b/redGrapes/resource/access/area.hpp index 0e32c28d..521b353c 100644 --- a/redGrapes/resource/access/area.hpp +++ b/redGrapes/resource/access/area.hpp @@ -11,86 +11,67 @@ #pragma once -#include -#include #include -namespace redGrapes -{ -namespace access -{ +#include +#include -struct AreaAccess : std::array +namespace redGrapes { - AreaAccess() + namespace access { - ( *this )[ 0 ] = std::numeric_limits< size_t >::min(); - ( *this )[ 1 ] = std::numeric_limits< size_t >::max(); - } - AreaAccess(std::array a) - : std::array(a) {} + struct AreaAccess : std::array + { + AreaAccess() + { + (*this)[0] = std::numeric_limits::min(); + (*this)[1] = std::numeric_limits::max(); + } - bool is_synchronizing() const - { - return ( *this )[ 0 ] == std::numeric_limits< size_t >::min() && - ( *this )[ 1 ] == std::numeric_limits< size_t >::max(); - } - - static bool - is_serial( - AreaAccess const & a, - AreaAccess const & b - ) - { - return !( - (a[1] <= b[0]) || - (a[0] >= b[1]) - ); - } + AreaAccess(std::array a) : std::array(a) + { + } - bool - is_superset_of(AreaAccess const & a) const - { - return ( - ((*this)[0] <= a[0]) && - ((*this)[1] >= a[1]) - ); - } + bool is_synchronizing() const + { + return (*this)[0] == std::numeric_limits::min() + && (*this)[1] == std::numeric_limits::max(); + } - bool operator==(AreaAccess const & other) const - { - return (*this)[0] == other[0] && (*this)[1] == other[1]; - } + static bool is_serial(AreaAccess const& a, AreaAccess const& b) + { + return !((a[1] <= b[0]) || (a[0] >= b[1])); + } -}; // struct AreaAccess + bool is_superset_of(AreaAccess const& a) const + { + return (((*this)[0] <= a[0]) && ((*this)[1] >= a[1])); + } + bool operator==(AreaAccess const& other) const + { + return (*this)[0] == other[0] && (*this)[1] == other[1]; + } + }; // struct AreaAccess -} // namespace access + + } // namespace access } // namespace redGrapes -template <> -struct fmt::formatter< redGrapes::access::AreaAccess > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::access::AreaAccess const & acc, - FormatContext & ctx - ) + template + auto format(redGrapes::access::AreaAccess const& acc, FormatContext& ctx) { - return format_to( - ctx.out(), - "{{ \"area\" : {{ \"begin\" : {}, \"end\" : {} }} }}", - acc[0], - acc[1] - ); + return format_to(ctx.out(), "{{ \"area\" : {{ \"begin\" : {}, \"end\" : {} }} }}", acc[0], acc[1]); } }; - diff --git a/redGrapes/resource/access/combine.hpp b/redGrapes/resource/access/combine.hpp index 5ffd95c6..85617c60 100644 --- a/redGrapes/resource/access/combine.hpp +++ b/redGrapes/resource/access/combine.hpp @@ -11,276 +11,210 @@ #pragma once +#include + #include -#include #include - -#include +#include namespace redGrapes { -namespace access -{ - -// TODO: better tag names -struct And_t {}; -struct Or_t {}; - -template < - typename Access, - size_t N, - typename Op = Or_t -> -struct ArrayAccess : std::array -{ - ArrayAccess() - { - for( Access & a : *this ) - a = Access(); - } - - ArrayAccess(std::array const & a) - : std::array(a) {} - - bool is_synchronizing() const + namespace access { - for(std::size_t i = 0; i < N; ++i) - if( !(*this)[i].is_synchronizing() ) - return false; - return true; - } - - //! both array accesses are only serial if all element pairs are serial - static bool - is_serial( - ArrayAccess const & a, - ArrayAccess const & b - ) - { - for(std::size_t i = 0; i < N; ++i) - if(Access::is_serial(a[i], b[i])) - return true; - - return false; - } + // TODO: better tag names + struct And_t + { + }; - //! both array accesses are serial if at least one element pair is serial - static bool - is_serial( - ArrayAccess const & a, - ArrayAccess const & b - ) - { - for(std::size_t i = 0; i < N; ++i) - if(! Access::is_serial(a[i], b[i])) - return false; + struct Or_t + { + }; - return true; - } + template + struct ArrayAccess : std::array + { + ArrayAccess() + { + for(Access& a : *this) + a = Access(); + } + + ArrayAccess(std::array const& a) : std::array(a) + { + } + + bool is_synchronizing() const + { + for(std::size_t i = 0; i < N; ++i) + if(!(*this)[i].is_synchronizing()) + return false; - bool - is_superset_of( ArrayAccess const & a ) const - { - for(std::size_t i = 0; i < N; ++i) - if( ! (*this)[i].is_superset_of( a[i] ) ) - return false; + return true; + } - return true; - } + //! both array accesses are only serial if all element pairs are serial + static bool is_serial(ArrayAccess const& a, ArrayAccess const& b) + { + for(std::size_t i = 0; i < N; ++i) + if(Access::is_serial(a[i], b[i])) + return true; - bool - operator==( ArrayAccess const & other ) const - { - for(std::size_t i = 0; i < N; ++i) - if( ! ( (*this)[i] == other[i] ) ) return false; + } - return true; - } -}; // struct ArrayAccess - -template < - typename Acc1, - typename Acc2, - typename Op = And_t -> -struct CombineAccess : std::pair -{ - CombineAccess() - : std::pair( Acc1(), Acc2() ) {} + //! both array accesses are serial if at least one element pair is serial + static bool is_serial(ArrayAccess const& a, ArrayAccess const& b) + { + for(std::size_t i = 0; i < N; ++i) + if(!Access::is_serial(a[i], b[i])) + return false; - CombineAccess(Acc1 a) - : std::pair(a, Acc2()) {} - - CombineAccess(Acc1 a, Acc2 b) - : std::pair(a,b) {} + return true; + } - static bool - is_serial( - CombineAccess const & a, - CombineAccess const & b - ) - { - return ( - Acc1::is_serial(a.first, b.first) && - Acc2::is_serial(a.second, b.second) - ); - } + bool is_superset_of(ArrayAccess const& a) const + { + for(std::size_t i = 0; i < N; ++i) + if(!(*this)[i].is_superset_of(a[i])) + return false; - bool is_synchronizing() const - { - return this->first.is_synchronizing() && this->second.is_synchronizing(); - } - - static bool - is_serial( - CombineAccess const & a, - CombineAccess const & b - ) - { - return ( - Acc1::is_serial(a.first, b.first) || - Acc2::is_serial(a.second, b.second) - ); - } + return true; + } - bool - is_superset_of( CombineAccess const & a ) const - { - return ( - this->first.is_superset_of(a.first) && - this->second.is_superset_of(a.second) - ); - } + bool operator==(ArrayAccess const& other) const + { + for(std::size_t i = 0; i < N; ++i) + if(!((*this)[i] == other[i])) + return false; - bool - operator==( CombineAccess const & other ) const - { - return ( - this->first == other.first && - this->second == other.second - ); - } -}; // struct CombineAccess + return true; + } + }; // struct ArrayAccess -} // namespace access + template + struct CombineAccess : std::pair + { + CombineAccess() : std::pair(Acc1(), Acc2()) + { + } + + CombineAccess(Acc1 a) : std::pair(a, Acc2()) + { + } + + CombineAccess(Acc1 a, Acc2 b) : std::pair(a, b) + { + } + + static bool is_serial(CombineAccess const& a, CombineAccess const& b) + { + return (Acc1::is_serial(a.first, b.first) && Acc2::is_serial(a.second, b.second)); + } + + bool is_synchronizing() const + { + return this->first.is_synchronizing() && this->second.is_synchronizing(); + } + + static bool is_serial(CombineAccess const& a, CombineAccess const& b) + { + return (Acc1::is_serial(a.first, b.first) || Acc2::is_serial(a.second, b.second)); + } + + bool is_superset_of(CombineAccess const& a) const + { + return (this->first.is_superset_of(a.first) && this->second.is_superset_of(a.second)); + } + + bool operator==(CombineAccess const& other) const + { + return (this->first == other.first && this->second == other.second); + } + }; // struct CombineAccess + + } // namespace access } // namespace redGrapes - -template < - typename Access, - size_t N -> -struct fmt::formatter< - redGrapes::access::ArrayAccess< Access, N, redGrapes::access::And_t > -> +template +struct fmt::formatter> { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::access::ArrayAccess< Access, N, redGrapes::access::And_t > const & acc, - FormatContext & ctx - ) + template + auto format(redGrapes::access::ArrayAccess const& acc, FormatContext& ctx) { auto out = ctx.out(); - out = fmt::format_to( out, "{{ \"and\" : [" ); + out = fmt::format_to(out, "{{ \"and\" : ["); - for( auto it = acc.begin(); it != acc.end(); ) + for(auto it = acc.begin(); it != acc.end();) { - out = fmt::format_to( out, "{}", *it ); - if( ++it != acc.end() ) - out = fmt::format_to( out, ", " ); + out = fmt::format_to(out, "{}", *it); + if(++it != acc.end()) + out = fmt::format_to(out, ", "); } - out = fmt::format_to( out, "] }}" ); + out = fmt::format_to(out, "] }}"); return out; } }; -template < - typename Access, - size_t N -> -struct fmt::formatter< - redGrapes::access::ArrayAccess< Access, N, redGrapes::access::Or_t > -> +template +struct fmt::formatter> { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::access::ArrayAccess< Access, N, redGrapes::access::Or_t > const & acc, - FormatContext & ctx - ) + template + auto format(redGrapes::access::ArrayAccess const& acc, FormatContext& ctx) { auto out = ctx.out(); - out = fmt::format_to( out, "{{ \"or\" : [" ); + out = fmt::format_to(out, "{{ \"or\" : ["); - for( auto it = acc.begin(); it != acc.end(); ) + for(auto it = acc.begin(); it != acc.end();) { - out = fmt::format_to( out, "{}", *it ); - if( ++it != acc.end() ) - out = fmt::format_to( out, ", " ); + out = fmt::format_to(out, "{}", *it); + if(++it != acc.end()) + out = fmt::format_to(out, ", "); } - out = fmt::format_to( out, "] }}" ); + out = fmt::format_to(out, "] }}"); return out; } }; -template < - typename Acc1, - typename Acc2 -> -struct fmt::formatter< - redGrapes::access::CombineAccess< Acc1, Acc2, redGrapes::access::And_t > -> +template +struct fmt::formatter> { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::access::CombineAccess< Acc1, Acc2, redGrapes::access::And_t > const & acc, - FormatContext & ctx - ) + template + auto format(redGrapes::access::CombineAccess const& acc, FormatContext& ctx) { - return fmt::format_to( ctx.out(), "{{ \"and\" : [ {}, {} ] }}", acc.first, acc.second ); + return fmt::format_to(ctx.out(), "{{ \"and\" : [ {}, {} ] }}", acc.first, acc.second); } }; -template < - typename Acc1, - typename Acc2 -> -struct fmt::formatter< - redGrapes::access::CombineAccess< Acc1, Acc2, redGrapes::access::Or_t > -> +template +struct fmt::formatter> { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::access::CombineAccess< Acc1, Acc2, redGrapes::access::Or_t > const & acc, - FormatContext & ctx - ) + template + auto format(redGrapes::access::CombineAccess const& acc, FormatContext& ctx) { - return fmt::format_to( ctx.out(), "{{ \"or\" : [ {}, {} ] }}", acc.first, acc.second ); + return fmt::format_to(ctx.out(), "{{ \"or\" : [ {}, {} ] }}", acc.first, acc.second); } }; - - diff --git a/redGrapes/resource/access/field.hpp b/redGrapes/resource/access/field.hpp index 2693f825..798e1e0a 100644 --- a/redGrapes/resource/access/field.hpp +++ b/redGrapes/resource/access/field.hpp @@ -11,28 +11,18 @@ #pragma once -#include #include #include +#include namespace redGrapes { -namespace access -{ + namespace access + { -template < - size_t dimension_t -> -using FieldAccess = CombineAccess< - IOAccess, - ArrayAccess< - AreaAccess, - dimension_t, - And_t - >, - And_t ->; + template + using FieldAccess = CombineAccess, And_t>; -} // namespace access + } // namespace access } // namespace redGrapes diff --git a/redGrapes/resource/access/io.hpp b/redGrapes/resource/access/io.hpp index 4b860a7b..8dc5ec4d 100644 --- a/redGrapes/resource/access/io.hpp +++ b/redGrapes/resource/access/io.hpp @@ -51,9 +51,8 @@ namespace redGrapes static bool is_serial(IOAccess a, IOAccess b) { - return - !( (a.mode == read && b.mode == read) - || (a.mode == aadd && b.mode == aadd) + return !( + (a.mode == read && b.mode == read) || (a.mode == aadd && b.mode == aadd) || (a.mode == amul && b.mode == amul)); } @@ -99,4 +98,3 @@ struct fmt::formatter return fmt::format_to(ctx.out(), "{{ \"IOAccess\" : \"{}\" }}", mode_str); } }; - diff --git a/redGrapes/resource/fieldresource.hpp b/redGrapes/resource/fieldresource.hpp index 71a5cd24..9e8793bf 100644 --- a/redGrapes/resource/fieldresource.hpp +++ b/redGrapes/resource/fieldresource.hpp @@ -11,205 +11,237 @@ #pragma once -#include - #include #include #include #include -namespace redGrapes -{ - -namespace trait -{ - -template < typename Container > -struct Field {}; - -template < typename T > -struct Field< std::vector > -{ - using Item = T; - static constexpr size_t dim = 1; - - static std::array extent( std::vector & v ) - { - return { v.size() }; - } - - static Item & get( std::vector & v, std::array index ) - { - return v[index[0]]; - } -}; - -template < typename T, size_t N > -struct Field< std::array > -{ - using Item = T; - static constexpr size_t dim = 1; - - static Item & get( std::array & array, std::array index ) - { - return array[ index[0] ]; - } -}; +#include -template < typename T, size_t Nx, size_t Ny > -struct Field< std::array, Ny> > +namespace redGrapes { - using Item = T; - static constexpr size_t dim = 2; - static Item & get( std::array, Ny> & array, std::array index ) + namespace trait { - return array[ index[1] ][ index[0] ]; - } -}; -}; // namespace trait - -namespace fieldresource -{ - -template < typename Container > -struct AreaGuard : SharedResourceObject< Container, access::FieldAccess< trait::Field::dim > > -{ - static constexpr size_t dim = trait::Field< Container >::dim; - using Item = typename trait::Field< Container >::Item; - using Index = std::array< size_t, dim >; - - bool contains( Index index ) const noexcept - { - for( size_t d = 0; d < dim; d++ ) + template + struct Field { - if(index[d] < m_area[d][0] || - index[d] >= m_area[d][1]) - return false; - } - return true; - } - -protected: - AreaGuard( std::shared_ptr obj ) - : SharedResourceObject< Container, access::FieldAccess >( obj ) - {} - - AreaGuard( AreaGuard const & other, Index begin, Index end ) - : SharedResourceObject< Container, access::FieldAccess >( other ) - , m_area( other.make_area(begin, end) ) - {} - - Item & get ( Index index ) const - { - if( !contains(index) ) - throw std::out_of_range("invalid area access"); + }; - return trait::Field< Container >::get( *this->obj, index ); - } - - access::ArrayAccess make_area( Index begin, Index end ) const - { - std::array< access::AreaAccess, dim > sub_area; - for( int d = 0; d < dim; ++d ) - sub_area[d] = access::AreaAccess({ begin[d], end[d] }); - - if( ! m_area.is_superset_of( sub_area ) ) - throw std::out_of_range("invalid sub area"); - - return access::ArrayAccess(sub_area); - } - - access::ArrayAccess m_area; -}; - -template < typename Container > -struct ReadGuard : AreaGuard< Container > -{ - static constexpr size_t dim = trait::Field< Container >::dim; - using typename AreaGuard< Container >::Index; - using typename AreaGuard< Container >::Item; - - ReadGuard read() const noexcept { return *this; } - ReadGuard area( Index begin, Index end ) const { return ReadGuard(*this, begin, end); } - ReadGuard at( Index pos ) const - { - Index end = pos; - for(size_t d = 0; d < dim; ++d) end[d]++; - return ReadGuard(*this, pos, end); - } + template + struct Field> + { + using Item = T; + static constexpr size_t dim = 1; + + static std::array extent(std::vector& v) + { + return {v.size()}; + } + + static Item& get(std::vector& v, std::array index) + { + return v[index[0]]; + } + }; + + template + struct Field> + { + using Item = T; + static constexpr size_t dim = 1; - Item const & operator[] ( Index index ) const { return this->get( index ); } - Container const * operator-> () const noexcept { return this->obj.get(); } + static Item& get(std::array& array, std::array index) + { + return array[index[0]]; + } + }; - operator ResourceAccess() const noexcept - { - return this->make_access( access::FieldAccess(access::IOAccess::read, this->m_area) ); - } - -protected: - ReadGuard( ReadGuard const & other, Index begin, Index end ) - : AreaGuard< Container >( other, begin, end ) - {} + template + struct Field, Ny>> + { + using Item = T; + static constexpr size_t dim = 2; - ReadGuard( std::shared_ptr obj ) - : AreaGuard< Container >( obj ) - {} -}; + static Item& get(std::array, Ny>& array, std::array index) + { + return array[index[1]][index[0]]; + } + }; -template < typename Container > -struct WriteGuard : ReadGuard< Container > -{ - static constexpr size_t dim = trait::Field< Container >::dim; - using typename ReadGuard< Container >::Index; - using typename ReadGuard< Container >::Item; + }; // namespace trait - WriteGuard write() const noexcept { return *this; } - WriteGuard area( Index begin, Index end ) const { return WriteGuard(*this, begin, end); } - WriteGuard at( Index pos ) const + namespace fieldresource { - Index end = pos; - for(size_t d = 0; d < dim; ++d) end[d]++; - return WriteGuard(*this, pos, end); - } - Item & operator[] ( Index index ) const { return this->get( index ); } - Container * operator-> () const noexcept { return this->obj.get(); } - - operator ResourceAccess() const noexcept + template + struct AreaGuard : SharedResourceObject::dim>> + { + static constexpr size_t dim = trait::Field::dim; + using Item = typename trait::Field::Item; + using Index = std::array; + + bool contains(Index index) const noexcept + { + for(size_t d = 0; d < dim; d++) + { + if(index[d] < m_area[d][0] || index[d] >= m_area[d][1]) + return false; + } + return true; + } + + protected: + AreaGuard(std::shared_ptr obj) : SharedResourceObject>(obj) + { + } + + AreaGuard(AreaGuard const& other, Index begin, Index end) + : SharedResourceObject>(other) + , m_area(other.make_area(begin, end)) + { + } + + Item& get(Index index) const + { + if(!contains(index)) + throw std::out_of_range("invalid area access"); + + return trait::Field::get(*this->obj, index); + } + + access::ArrayAccess make_area(Index begin, Index end) const + { + std::array sub_area; + for(int d = 0; d < dim; ++d) + sub_area[d] = access::AreaAccess({begin[d], end[d]}); + + if(!m_area.is_superset_of(sub_area)) + throw std::out_of_range("invalid sub area"); + + return access::ArrayAccess(sub_area); + } + + access::ArrayAccess m_area; + }; + + template + struct ReadGuard : AreaGuard + { + static constexpr size_t dim = trait::Field::dim; + using typename AreaGuard::Index; + using typename AreaGuard::Item; + + ReadGuard read() const noexcept + { + return *this; + } + + ReadGuard area(Index begin, Index end) const + { + return ReadGuard(*this, begin, end); + } + + ReadGuard at(Index pos) const + { + Index end = pos; + for(size_t d = 0; d < dim; ++d) + end[d]++; + return ReadGuard(*this, pos, end); + } + + Item const& operator[](Index index) const + { + return this->get(index); + } + + Container const* operator->() const noexcept + { + return this->obj.get(); + } + + operator ResourceAccess() const noexcept + { + return this->make_access(access::FieldAccess(access::IOAccess::read, this->m_area)); + } + + protected: + ReadGuard(ReadGuard const& other, Index begin, Index end) : AreaGuard(other, begin, end) + { + } + + ReadGuard(std::shared_ptr obj) : AreaGuard(obj) + { + } + }; + + template + struct WriteGuard : ReadGuard + { + static constexpr size_t dim = trait::Field::dim; + using typename ReadGuard::Index; + using typename ReadGuard::Item; + + WriteGuard write() const noexcept + { + return *this; + } + + WriteGuard area(Index begin, Index end) const + { + return WriteGuard(*this, begin, end); + } + + WriteGuard at(Index pos) const + { + Index end = pos; + for(size_t d = 0; d < dim; ++d) + end[d]++; + return WriteGuard(*this, pos, end); + } + + Item& operator[](Index index) const + { + return this->get(index); + } + + Container* operator->() const noexcept + { + return this->obj.get(); + } + + operator ResourceAccess() const noexcept + { + return this->make_access(access::FieldAccess(access::IOAccess::write, this->m_area)); + } + + protected: + WriteGuard(WriteGuard const& other, Index begin, Index end) : ReadGuard(other, begin, end) + { + } + + WriteGuard(std::shared_ptr obj) : ReadGuard(obj) + { + } + }; + + } // namespace fieldresource + + template + struct FieldResource : fieldresource::WriteGuard { - return this->make_access( access::FieldAccess(access::IOAccess::write, this->m_area) ); - } - -protected: - WriteGuard( WriteGuard const & other, Index begin, Index end ) - : ReadGuard< Container >( other, begin, end ) - {} - - WriteGuard( std::shared_ptr< Container > obj ) - : ReadGuard< Container >( obj ) {} -}; - -} // namespace fieldresource - + static constexpr size_t dim = trait::Field::dim; -template < typename Container > -struct FieldResource : fieldresource::WriteGuard< Container > -{ - static constexpr size_t dim = trait::Field< Container >::dim; - - FieldResource( Container * c ) - : fieldresource::WriteGuard( std::shared_ptr(c)) - {} + FieldResource(Container* c) : fieldresource::WriteGuard(std::shared_ptr(c)) + { + } - template - FieldResource( Args&&... args ) - : fieldresource::WriteGuard< Container >( - memory::alloc_shared< Container >( std::forward(args)... ) - ) - {} -}; + template + FieldResource(Args&&... args) + : fieldresource::WriteGuard(memory::alloc_shared(std::forward(args)...)) + { + } + }; }; // namespace redGrapes - diff --git a/redGrapes/resource/ioresource.hpp b/redGrapes/resource/ioresource.hpp index 95981058..07efea4a 100644 --- a/redGrapes/resource/ioresource.hpp +++ b/redGrapes/resource/ioresource.hpp @@ -11,64 +11,98 @@ #pragma once -#include -#include #include #include +#include +#include namespace redGrapes { -namespace ioresource -{ - -template < typename T > -struct ReadGuard : public SharedResourceObject< T, access::IOAccess > -{ - operator ResourceAccess() const noexcept { return this->make_access(access::IOAccess::read); } - - ReadGuard read() const noexcept { return *this; } - - T const & operator* () const noexcept { return *this->obj; } - T const * operator-> () const noexcept { return this->obj.get(); } - - T const * get() const noexcept { return this->obj.get(); } - -protected: - ReadGuard( std::shared_ptr obj ) : SharedResourceObject( obj ) {} -}; - -template < typename T > -struct WriteGuard : public ReadGuard< T > -{ - operator ResourceAccess() const noexcept { return this->make_access(access::IOAccess::write); } - - WriteGuard write() const noexcept { return *this; } - - T & operator* () const noexcept { return *this->obj; } - T * operator-> () const noexcept { return this->obj.get(); } - - T * get() const noexcept { return this->obj.get(); } - -protected: - WriteGuard( std::shared_ptr obj ) : ReadGuard( obj ) {} -}; - -} // namespace ioresource - -template < typename T > -struct IOResource : public ioresource::WriteGuard< T > -{ - template < typename... Args > - IOResource( Args&&... args ) - : ioresource::WriteGuard< T >( - memory::alloc_shared< T >( std::forward(args)... ) - ) - {} - - IOResource( std::shared_ptr o ) - : ioresource::WriteGuard( o ) - {} - -}; // struct IOResource + namespace ioresource + { + + template + struct ReadGuard : public SharedResourceObject + { + operator ResourceAccess() const noexcept + { + return this->make_access(access::IOAccess::read); + } + + ReadGuard read() const noexcept + { + return *this; + } + + T const& operator*() const noexcept + { + return *this->obj; + } + + T const* operator->() const noexcept + { + return this->obj.get(); + } + + T const* get() const noexcept + { + return this->obj.get(); + } + + protected: + ReadGuard(std::shared_ptr obj) : SharedResourceObject(obj) + { + } + }; + + template + struct WriteGuard : public ReadGuard + { + operator ResourceAccess() const noexcept + { + return this->make_access(access::IOAccess::write); + } + + WriteGuard write() const noexcept + { + return *this; + } + + T& operator*() const noexcept + { + return *this->obj; + } + + T* operator->() const noexcept + { + return this->obj.get(); + } + + T* get() const noexcept + { + return this->obj.get(); + } + + protected: + WriteGuard(std::shared_ptr obj) : ReadGuard(obj) + { + } + }; + + } // namespace ioresource + + template + struct IOResource : public ioresource::WriteGuard + { + template + IOResource(Args&&... args) : ioresource::WriteGuard(memory::alloc_shared(std::forward(args)...)) + { + } + + IOResource(std::shared_ptr o) : ioresource::WriteGuard(o) + { + } + + }; // struct IOResource } // namespace redGrapes diff --git a/redGrapes/resource/resource.cpp b/redGrapes/resource/resource.cpp index 2b892df7..3fc1d6ec 100644 --- a/redGrapes/resource/resource.cpp +++ b/redGrapes/resource/resource.cpp @@ -5,29 +5,31 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include #include +#include + +#include namespace redGrapes { -struct Task; + struct Task; -unsigned int ResourceBase::generateID() -{ - static std::atomic< unsigned int > id_counter; - return id_counter.fetch_add(1); -} + unsigned int ResourceBase::generateID() + { + static std::atomic id_counter; + return id_counter.fetch_add(1); + } -ResourceBase::ResourceBase() - : id( generateID() ) - , scope_level( scope_depth() ) - , users( memory::Allocator( get_arena_id() ) ) -{} + ResourceBase::ResourceBase() + : id(generateID()) + , scope_level(scope_depth()) + , users(memory::Allocator(get_arena_id())) + { + } -unsigned ResourceBase::get_arena_id() const { - return id % SingletonContext::get().worker_pool->size(); -} + unsigned ResourceBase::get_arena_id() const + { + return id % SingletonContext::get().worker_pool->size(); + } } // namespace redGrapes - diff --git a/redGrapes/resource/resource.hpp b/redGrapes/resource/resource.hpp index 96f9d7e0..52b23265 100644 --- a/redGrapes/resource/resource.hpp +++ b/redGrapes/resource/resource.hpp @@ -11,378 +11,356 @@ #pragma once +#include +#include +#include +#include + #include + +#include +#include #include // std::unique_ptr<> -#include #include -#include -#include - -#include -#include -#include -#include -//#include +#include +// #include #include -//#include +// #include #include #ifndef REDGRAPES_RUL_CHUNKSIZE -#define REDGRAPES_RUL_CHUNKSIZE 128 +# define REDGRAPES_RUL_CHUNKSIZE 128 #endif namespace redGrapes { -template -class Resource; - -struct Task; + template + class Resource; -class ResourceBase -{ -protected: - static unsigned int generateID(); + struct Task; -public: - unsigned int id; - unsigned int scope_level; + class ResourceBase + { + protected: + static unsigned int generateID(); - SpinLock users_mutex; - ChunkedList< Task*, REDGRAPES_RUL_CHUNKSIZE > users; + public: + unsigned int id; + unsigned int scope_level; - /** - * Create a new resource with an unused ID. - */ - ResourceBase(); + SpinLock users_mutex; + ChunkedList users; - unsigned get_arena_id() const; -}; + /** + * Create a new resource with an unused ID. + */ + ResourceBase(); -template -class Resource; + unsigned get_arena_id() const; + }; -class ResourceAccess -{ - template - friend class Resource; + template + class Resource; - private: - struct AccessBase + class ResourceAccess { - AccessBase( boost::typeindex::type_index access_type, std::shared_ptr resource ) - : access_type( access_type ) - , resource( resource ) + template + friend class Resource; + + private: + struct AccessBase + { + AccessBase(boost::typeindex::type_index access_type, std::shared_ptr resource) + : access_type(access_type) + , resource(resource) + { + } + + AccessBase(AccessBase&& other) : access_type(other.access_type), resource(std::move(other.resource)) + { + } + + virtual ~AccessBase(){}; + virtual bool operator==(AccessBase const& r) const = 0; + + bool is_same_resource(ResourceAccess::AccessBase const& a) const + { + return this->resource == a.resource; + } + + virtual bool is_synchronizing() const = 0; + virtual bool is_serial(AccessBase const& r) const = 0; + virtual bool is_superset_of(AccessBase const& r) const = 0; + virtual std::string mode_format() const = 0; + + boost::typeindex::type_index access_type; + std::shared_ptr resource; + }; // AccessBase + + // todo use allocator!! + std::shared_ptr obj; + + public: + ResourceAccess(std::shared_ptr obj) : obj(obj) { } - AccessBase( AccessBase && other ) - : access_type( other.access_type ) - , resource( std::move(other.resource) ) - {} - - virtual ~AccessBase() {}; - virtual bool operator==( AccessBase const & r ) const = 0; - - bool - is_same_resource( ResourceAccess::AccessBase const & a ) const + ResourceAccess(ResourceAccess const& other) : obj(other.obj) { - return this->resource == a.resource; } - virtual bool is_synchronizing() const = 0; - virtual bool is_serial( AccessBase const & r ) const = 0; - virtual bool is_superset_of( AccessBase const & r ) const = 0; - virtual std::string mode_format() const = 0; - - boost::typeindex::type_index access_type; - std::shared_ptr< ResourceBase > resource; - }; // AccessBase - - // todo use allocator!! - std::shared_ptr< AccessBase > obj; - - public: - ResourceAccess( std::shared_ptr< AccessBase > obj ) : obj( obj ) {} - ResourceAccess( ResourceAccess const & other ) : obj( other.obj ) {} - ResourceAccess( ResourceAccess && other ) : obj( std::move(other.obj) ) { - other.obj.reset(); - } - - ResourceAccess& operator= (ResourceAccess const & other ) - { - this->obj = other.obj; - return *this; - } - - static bool - is_serial( ResourceAccess const & a, ResourceAccess const & b ) - { - if ( a.obj->access_type == b.obj->access_type ) - return a.obj->is_serial( *b.obj ); - else - return false; - } - - bool - is_superset_of( ResourceAccess const & a ) const - { - //if ( this->obj->resource.scope_level < a.obj->resource.scope_level ) - // return true; - if ( this->obj->access_type == a.obj->access_type ) - return this->obj->is_superset_of( *a.obj ); - else - return false; - } - - bool is_synchronizing() const - { - return this->obj->is_synchronizing(); - } - - unsigned int scope_level() const - { - return this->obj->resource->scope_level; - } - - unsigned int resource_id() const - { - return this->obj->resource->id; - } - - std::string mode_format() const - { - return this->obj->mode_format(); - } - - std::shared_ptr< ResourceBase > get_resource() - { - return obj->resource; - } - - /** - * Check if the associated resource is the same - * - * @param a another ResourceAccess - * @return true if `a` is associated with the same resource as `this` - */ - bool - is_same_resource( ResourceAccess const & a ) const - { - if ( this->obj->access_type == a.obj->access_type ) - return this->obj->is_same_resource( *a.obj ); - return false; - } - - bool - operator== ( ResourceAccess const & a ) const - { - if ( this->obj->access_type == a.obj->access_type ) - return *(this->obj) == *(a.obj); - return false; - } -}; // class ResourceAccess - -namespace trait -{ - -/** - * implements BuildProperties for any type which - * can be casted to a ResourceAccess - */ -template < typename T > -struct BuildProperties< - T, - typename std::enable_if< - std::is_convertible::value - >::type -> -{ - template < typename Builder > - inline static void build( Builder & builder, T const & obj ) - { - builder.add_resource( obj ); - } -}; -} // namespace trait - -struct DefaultAccessPolicy -{ - static bool is_serial(DefaultAccessPolicy, DefaultAccessPolicy) - { - return true; - } -}; - -/** - * @defgroup AccessPolicy - * - * @{ - * - * @par Description - * An implementation of the concept AccessPolicy creates a new resource-type (`Resource`) - * and should define the possible access modes / configurations for this resource-type (e.g. read/write) - * - * @par Required public member functions - * - `static bool is_serial(AccessPolicy, AccessPolicy)` - * check if the two accesses have to be **in order**. (e.g. two reads return false, an occuring write always true) - * - * - `static bool is_superset(AccessPolicy a, AccessPolicy b)` - * check if access `a` is a superset of access `b` (e.g. accessing [0,3] is a superset of accessing [1,2]) - * - * @} - */ - -/** - * @class Resource - * @tparam AccessPolicy Defines the access-modes (e.g. read/write) that are possible - * with this resource. Required to implement the concept @ref AccessPolicy - * - * Represents a concrete resource. - * Copied objects represent the same resource. - */ -template -class Resource -{ -protected: - struct Access : public ResourceAccess::AccessBase - { - Access( std::shared_ptr< ResourceBase > resource, AccessPolicy policy ) - : ResourceAccess::AccessBase( - boost::typeindex::type_id(), - resource - ) - , policy( policy ) - {} - - Access( Access && other ) - : AccessBase(std::move((AccessBase&&)other)) - , policy( std::move(other.policy)) + ResourceAccess(ResourceAccess&& other) : obj(std::move(other.obj)) { + other.obj.reset(); } - ~Access() {} + ResourceAccess& operator=(ResourceAccess const& other) + { + this->obj = other.obj; + return *this; + } - bool is_synchronizing() const + static bool is_serial(ResourceAccess const& a, ResourceAccess const& b) { - return policy.is_synchronizing(); + if(a.obj->access_type == b.obj->access_type) + return a.obj->is_serial(*b.obj); + else + return false; } - - bool - is_serial( ResourceAccess::AccessBase const & a_ ) const + + bool is_superset_of(ResourceAccess const& a) const { - Access const & a = *static_cast( - &a_ ); // no dynamic cast needed, type checked in ResourceAccess - return this->is_same_resource( a ) && - AccessPolicy::is_serial( this->policy, a.policy ); + // if ( this->obj->resource.scope_level < a.obj->resource.scope_level ) + // return true; + if(this->obj->access_type == a.obj->access_type) + return this->obj->is_superset_of(*a.obj); + else + return false; } - bool - is_superset_of( ResourceAccess::AccessBase const & a_ ) const + bool is_synchronizing() const { - Access const & a = *static_cast( - &a_ ); // no dynamic cast needed, type checked in ResourceAccess - return this->is_same_resource( a ) && - this->policy.is_superset_of( a.policy ); + return this->obj->is_synchronizing(); } - bool - operator==( ResourceAccess::AccessBase const & a_ ) const + unsigned int scope_level() const { - Access const & a = *static_cast( - &a_ ); // no dynamic cast needed, type checked in ResourceAccess + return this->obj->resource->scope_level; + } - return ( this->is_same_resource(a_) && this->policy == a.policy ); + unsigned int resource_id() const + { + return this->obj->resource->id; } std::string mode_format() const { - return fmt::format("{}", policy); + return this->obj->mode_format(); } - AccessPolicy policy; - }; // struct ThisResourceAccess + std::shared_ptr get_resource() + { + return obj->resource; + } - friend class ResourceBase; + /** + * Check if the associated resource is the same + * + * @param a another ResourceAccess + * @return true if `a` is associated with the same resource as `this` + */ + bool is_same_resource(ResourceAccess const& a) const + { + if(this->obj->access_type == a.obj->access_type) + return this->obj->is_same_resource(*a.obj); + return false; + } - std::shared_ptr< ResourceBase > base; + bool operator==(ResourceAccess const& a) const + { + if(this->obj->access_type == a.obj->access_type) + return *(this->obj) == *(a.obj); + return false; + } + }; // class ResourceAccess - Resource( std::shared_ptr base ) - : base( base ) + namespace trait { - } - public: - Resource() + /** + * implements BuildProperties for any type which + * can be casted to a ResourceAccess + */ + template + struct BuildProperties::value>::type> + { + template + static inline void build(Builder& builder, T const& obj) + { + builder.add_resource(obj); + } + }; + } // namespace trait + + struct DefaultAccessPolicy { - static unsigned i = 0; + static bool is_serial(DefaultAccessPolicy, DefaultAccessPolicy) + { + return true; + } + }; - /* NOTE: Because of #include loops we cannot access Context and thus not worker_pool->size(). - * for this reason the modulo is done in constructor of Allocator() - */ - dispatch::thread::WorkerId worker_id = i++; // % SingletonContext::get().worker_pool->size(); - base = redGrapes::memory::alloc_shared_bind< ResourceBase >( worker_id ); - } + /** + * @defgroup AccessPolicy + * + * @{ + * + * @par Description + * An implementation of the concept AccessPolicy creates a new resource-type (`Resource`) + * and should define the possible access modes / configurations for this resource-type (e.g. read/write) + * + * @par Required public member functions + * - `static bool is_serial(AccessPolicy, AccessPolicy)` + * check if the two accesses have to be **in order**. (e.g. two reads return false, an occuring write always true) + * + * - `static bool is_superset(AccessPolicy a, AccessPolicy b)` + * check if access `a` is a superset of access `b` (e.g. accessing [0,3] is a superset of accessing [1,2]) + * + * @} + */ /** - * Create an ResourceAccess, which represents an concrete - * access configuration associated with this resource. + * @class Resource + * @tparam AccessPolicy Defines the access-modes (e.g. read/write) that are possible + * with this resource. Required to implement the concept @ref AccessPolicy * - * @param pol AccessPolicy object, containing all access information - * @return ResourceAccess on this resource + * Represents a concrete resource. + * Copied objects represent the same resource. */ - ResourceAccess - make_access( AccessPolicy pol ) const + template + class Resource { - auto a = redGrapes::memory::alloc_shared_bind< Access >( base->get_arena_id(), base, pol ); - return ResourceAccess( a ); - } -}; // class Resource + protected: + struct Access : public ResourceAccess::AccessBase + { + Access(std::shared_ptr resource, AccessPolicy policy) + : ResourceAccess::AccessBase(boost::typeindex::type_id(), resource) + , policy(policy) + { + } + + Access(Access&& other) : AccessBase(std::move((AccessBase&&) other)), policy(std::move(other.policy)) + { + } + + ~Access() + { + } + + bool is_synchronizing() const + { + return policy.is_synchronizing(); + } + + bool is_serial(ResourceAccess::AccessBase const& a_) const + { + Access const& a + = *static_cast(&a_); // no dynamic cast needed, type checked in ResourceAccess + return this->is_same_resource(a) && AccessPolicy::is_serial(this->policy, a.policy); + } + + bool is_superset_of(ResourceAccess::AccessBase const& a_) const + { + Access const& a + = *static_cast(&a_); // no dynamic cast needed, type checked in ResourceAccess + return this->is_same_resource(a) && this->policy.is_superset_of(a.policy); + } + + bool operator==(ResourceAccess::AccessBase const& a_) const + { + Access const& a + = *static_cast(&a_); // no dynamic cast needed, type checked in ResourceAccess + + return (this->is_same_resource(a_) && this->policy == a.policy); + } + + std::string mode_format() const + { + return fmt::format("{}", policy); + } + + AccessPolicy policy; + }; // struct ThisResourceAccess + + friend class ResourceBase; + + std::shared_ptr base; + + Resource(std::shared_ptr base) : base(base) + { + } + public: + Resource() + { + static unsigned i = 0; -template < - typename T, - typename AccessPolicy -> -struct SharedResourceObject : Resource< AccessPolicy > -{ - //protected: - std::shared_ptr< T > obj; + /* NOTE: Because of #include loops we cannot access Context and thus not worker_pool->size(). + * for this reason the modulo is done in constructor of Allocator() + */ + dispatch::thread::WorkerId worker_id = i++; // % SingletonContext::get().worker_pool->size(); + base = redGrapes::memory::alloc_shared_bind(worker_id); + } - SharedResourceObject( std::shared_ptr obj ) - : obj(obj) {} + /** + * Create an ResourceAccess, which represents an concrete + * access configuration associated with this resource. + * + * @param pol AccessPolicy object, containing all access information + * @return ResourceAccess on this resource + */ + ResourceAccess make_access(AccessPolicy pol) const + { + auto a = redGrapes::memory::alloc_shared_bind(base->get_arena_id(), base, pol); + return ResourceAccess(a); + } + }; // class Resource - SharedResourceObject( SharedResourceObject const & other ) - : Resource< AccessPolicy >( other ) - , obj( other.obj ) - {} -}; // struct SharedResourceObject + template + struct SharedResourceObject : Resource + { + // protected: + std::shared_ptr obj; + + SharedResourceObject(std::shared_ptr obj) : obj(obj) + { + } + + SharedResourceObject(SharedResourceObject const& other) : Resource(other), obj(other.obj) + { + } + }; // struct SharedResourceObject } // namespace redGrapes -template <> -struct fmt::formatter< - redGrapes::ResourceAccess -> +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::ResourceAccess const & acc, - FormatContext & ctx - ) + template + auto format(redGrapes::ResourceAccess const& acc, FormatContext& ctx) { return fmt::format_to( - ctx.out(), - "{{ \"resourceID\" : {}, \"scopeLevel\" : {}, \"mode\" : {} }}", - acc.resource_id(), - acc.scope_level(), - acc.mode_format()); + ctx.out(), + "{{ \"resourceID\" : {}, \"scopeLevel\" : {}, \"mode\" : {} }}", + acc.resource_id(), + acc.scope_level(), + acc.mode_format()); } }; - diff --git a/redGrapes/resource/resource_user.cpp b/redGrapes/resource/resource_user.cpp index 2e89534e..489bd05e 100644 --- a/redGrapes/resource/resource_user.cpp +++ b/redGrapes/resource/resource_user.cpp @@ -1,113 +1,106 @@ -#include +#include #include - +#include #include -#include +#include namespace redGrapes { - bool ResourceUsageEntry::operator==( ResourceUsageEntry const & other ) const - { + bool ResourceUsageEntry::operator==(ResourceUsageEntry const& other) const + { return resource == other.resource; - } + } - ResourceUser::ResourceUser() - : scope_level( SingletonContext::get().scope_depth() ) - , access_list( memory::Allocator() ) - , unique_resources( memory::Allocator() ) + : scope_level(SingletonContext::get().scope_depth()) + , access_list(memory::Allocator()) + , unique_resources(memory::Allocator()) { } - ResourceUser::ResourceUser( ResourceUser const& other ) - : scope_level( other.scope_level ) - , access_list( memory::Allocator(), other.access_list ) - , unique_resources( memory::Allocator(), other.unique_resources ) + ResourceUser::ResourceUser(ResourceUser const& other) + : scope_level(other.scope_level) + , access_list(memory::Allocator(), other.access_list) + , unique_resources(memory::Allocator(), other.unique_resources) { } - ResourceUser::ResourceUser( std::initializer_list< ResourceAccess > list ) - : scope_level( scope_depth() ) - , access_list( memory::Allocator() ) - , unique_resources( memory::Allocator() ) + ResourceUser::ResourceUser(std::initializer_list list) + : scope_level(scope_depth()) + , access_list(memory::Allocator()) + , unique_resources(memory::Allocator()) { - for( auto & ra : list ) + for(auto& ra : list) add_resource_access(ra); } - void ResourceUser::add_resource_access( ResourceAccess ra ) + void ResourceUser::add_resource_access(ResourceAccess ra) { this->access_list.push(ra); std::shared_ptr r = ra.get_resource(); - //unique_resources.erase(ResourceEntry{ r, r->users.end() }); - unique_resources.push(ResourceUsageEntry{ r, r->users.rend() }); + // unique_resources.erase(ResourceEntry{ r, r->users.end() }); + unique_resources.push(ResourceUsageEntry{r, r->users.rend()}); } - void ResourceUser::rm_resource_access( ResourceAccess ra ) + void ResourceUser::rm_resource_access(ResourceAccess ra) { this->access_list.erase(ra); } void ResourceUser::build_unique_resource_list() { - for( auto ra = access_list.rbegin(); ra != access_list.rend(); ++ra ) + for(auto ra = access_list.rbegin(); ra != access_list.rend(); ++ra) { std::shared_ptr r = ra->get_resource(); - unique_resources.erase(ResourceUsageEntry{ r, r->users.rend() }); - unique_resources.push(ResourceUsageEntry{ r, r->users.rend() }); + unique_resources.erase(ResourceUsageEntry{r, r->users.rend()}); + unique_resources.push(ResourceUsageEntry{r, r->users.rend()}); } } - bool ResourceUser::has_sync_access( std::shared_ptr< ResourceBase > res ) + bool ResourceUser::has_sync_access(std::shared_ptr res) { - for( auto ra = access_list.rbegin(); ra != access_list.rend(); ++ra ) + for(auto ra = access_list.rbegin(); ra != access_list.rend(); ++ra) { - if( - ra->get_resource() == res && - ra->is_synchronizing() - ) + if(ra->get_resource() == res && ra->is_synchronizing()) return true; } return false; } - bool - ResourceUser::is_serial( ResourceUser const & a, ResourceUser const & b ) + bool ResourceUser::is_serial(ResourceUser const& a, ResourceUser const& b) { TRACE_EVENT("ResourceUser", "is_serial"); - for( auto ra = a.access_list.crbegin(); ra != a.access_list.crend(); ++ra ) - for( auto rb = b.access_list.crbegin(); rb != b.access_list.crend(); ++rb ) + for(auto ra = a.access_list.crbegin(); ra != a.access_list.crend(); ++ra) + for(auto rb = b.access_list.crbegin(); rb != b.access_list.crend(); ++rb) { TRACE_EVENT("ResourceUser", "RA::is_serial"); - if ( ResourceAccess::is_serial( *ra, *rb ) ) + if(ResourceAccess::is_serial(*ra, *rb)) return true; } return false; } - bool - ResourceUser::is_superset_of( ResourceUser const & a ) const + bool ResourceUser::is_superset_of(ResourceUser const& a) const { TRACE_EVENT("ResourceUser", "is_superset"); - for( auto ra = a.access_list.rbegin(); ra != a.access_list.rend(); ++ra ) + for(auto ra = a.access_list.rbegin(); ra != a.access_list.rend(); ++ra) { bool found = false; - for( auto r = access_list.rbegin(); r != access_list.rend(); ++r ) - if ( r->is_superset_of( *ra ) ) + for(auto r = access_list.rbegin(); r != access_list.rend(); ++r) + if(r->is_superset_of(*ra)) found = true; - if ( !found && ra->scope_level() <= scope_level ) + if(!found && ra->scope_level() <= scope_level) // a introduced a new resource return false; } return true; } - bool ResourceUser::is_superset( ResourceUser const & a, ResourceUser const & b ) + bool ResourceUser::is_superset(ResourceUser const& a, ResourceUser const& b) { return a.is_superset_of(b); } -} - +} // namespace redGrapes diff --git a/redGrapes/resource/resource_user.hpp b/redGrapes/resource/resource_user.hpp index 8839e45f..3edf6082 100644 --- a/redGrapes/resource/resource_user.hpp +++ b/redGrapes/resource/resource_user.hpp @@ -11,83 +11,76 @@ #pragma once -#include -#include - #include +#include #include -#include -#include +#include + +#include namespace redGrapes { -unsigned scope_depth(); + unsigned scope_depth(); -struct Task; -struct ResourceBase; -struct ResourceAccess; + struct Task; + struct ResourceBase; + struct ResourceAccess; -struct ResourceUsageEntry -{ - std::shared_ptr< ResourceBase > resource; - typename ChunkedList< Task*, REDGRAPES_RUL_CHUNKSIZE >::MutBackwardIterator task_entry; + struct ResourceUsageEntry + { + std::shared_ptr resource; + typename ChunkedList::MutBackwardIterator task_entry; - bool operator==( ResourceUsageEntry const & other ) const; -}; + bool operator==(ResourceUsageEntry const& other) const; + }; -class ResourceUser -{ - public: - ResourceUser(); - ResourceUser( ResourceUser const& other ); - ResourceUser( std::initializer_list< ResourceAccess > list ); - - void add_resource_access( ResourceAccess ra ); - void rm_resource_access( ResourceAccess ra ); - void build_unique_resource_list(); - bool has_sync_access( std::shared_ptr< ResourceBase > res ); - bool is_superset_of( ResourceUser const & a ) const; - static bool is_superset( ResourceUser const & a, ResourceUser const & b ); - static bool is_serial( ResourceUser const & a, ResourceUser const & b ); - - uint8_t scope_level; - - ChunkedList access_list; - ChunkedList unique_resources; -}; // class ResourceUser + class ResourceUser + { + public: + ResourceUser(); + ResourceUser(ResourceUser const& other); + ResourceUser(std::initializer_list list); + + void add_resource_access(ResourceAccess ra); + void rm_resource_access(ResourceAccess ra); + void build_unique_resource_list(); + bool has_sync_access(std::shared_ptr res); + bool is_superset_of(ResourceUser const& a) const; + static bool is_superset(ResourceUser const& a, ResourceUser const& b); + static bool is_serial(ResourceUser const& a, ResourceUser const& b); + + uint8_t scope_level; + + ChunkedList access_list; + ChunkedList unique_resources; + }; // class ResourceUser } // namespace redGrapes -template <> -struct fmt::formatter< - redGrapes::ResourceUser -> +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::ResourceUser const & r, - FormatContext & ctx - ) + template + auto format(redGrapes::ResourceUser const& r, FormatContext& ctx) { auto out = ctx.out(); - out = fmt::format_to( out, "[" ); + out = fmt::format_to(out, "["); - for( auto it = r.access_list.rbegin(); it != r.access_list.rend(); ) + for(auto it = r.access_list.rbegin(); it != r.access_list.rend();) { - out = fmt::format_to( out, "{}", *it ); - if( ++it != r.access_list.rend() ) - out = fmt::format_to( out, "," ); + out = fmt::format_to(out, "{}", *it); + if(++it != r.access_list.rend()) + out = fmt::format_to(out, ","); } - out = fmt::format_to( out, "]" ); + out = fmt::format_to(out, "]"); return out; } }; - diff --git a/redGrapes/scheduler/default_scheduler.cpp b/redGrapes/scheduler/default_scheduler.cpp index ab4a686d..bf2366fe 100644 --- a/redGrapes/scheduler/default_scheduler.cpp +++ b/redGrapes/scheduler/default_scheduler.cpp @@ -1,209 +1,211 @@ #include +#include #include #include + #include -#include namespace redGrapes { -namespace scheduler -{ + namespace scheduler + { -DefaultScheduler::DefaultScheduler( ) -{ -} + DefaultScheduler::DefaultScheduler() + { + } -void DefaultScheduler::idle() -{ - SPDLOG_TRACE("DefaultScheduler::idle()"); - - /* the main thread shall not do any busy waiting - * and always sleep right away in order to - * not block any worker threads (those however should - * busy-wait to improve latency) - */ - cv.timeout = 0; - cv.wait(); -} - -/* send the new task to a worker - */ -void DefaultScheduler::emplace_task( Task & task ) -{ - // todo: properly store affinity information in task - dispatch::thread::WorkerId worker_id = task.arena_id % SingletonContext::get().worker_pool->size(); + void DefaultScheduler::idle() + { + SPDLOG_TRACE("DefaultScheduler::idle()"); + + /* the main thread shall not do any busy waiting + * and always sleep right away in order to + * not block any worker threads (those however should + * busy-wait to improve latency) + */ + cv.timeout = 0; + cv.wait(); + } + + /* send the new task to a worker + */ + void DefaultScheduler::emplace_task(Task& task) + { + // todo: properly store affinity information in task + dispatch::thread::WorkerId worker_id = task.arena_id % SingletonContext::get().worker_pool->size(); - SingletonContext::get().worker_pool->get_worker(worker_id).emplace_task( task ); + SingletonContext::get().worker_pool->get_worker(worker_id).emplace_task(task); - /* hack as of 2023/11/17 - * - * Additionally to the worker who got the new task above, - * we will now notify another, available (idling) worker, - * in trying to avoid stale tasks in cases where new tasks - * are assigned to an already busy worker. - */ + /* hack as of 2023/11/17 + * + * Additionally to the worker who got the new task above, + * we will now notify another, available (idling) worker, + * in trying to avoid stale tasks in cases where new tasks + * are assigned to an already busy worker. + */ #ifndef REDGRAPES_EMPLACE_NOTIFY_NEXT -#define REDGRAPES_EMPLACE_NOTIFY_NEXT 0 +# define REDGRAPES_EMPLACE_NOTIFY_NEXT 0 #endif #if REDGRAPES_EMPLACE_NOTIFY_NEXT - auto id = SingletonContext::get() - .worker_pool->probe_worker_by_state( - [](unsigned idx) - { - SingletonContext::get().worker_pool->get_worker(idx).wake(); - return idx; - }, - dispatch::thread::WorkerState::AVAILABLE, - worker_id, - true); + auto id = SingletonContext::get().worker_pool->probe_worker_by_state( + [](unsigned idx) + { + SingletonContext::get().worker_pool->get_worker(idx).wake(); + return idx; + }, + dispatch::thread::WorkerState::AVAILABLE, + worker_id, + true); #endif -} + } -/* send this already existing task to a worker, - * but only through follower-list so it is not assigned to a worker yet. - * since this task is now ready, send find a worker for it - */ -void DefaultScheduler::activate_task( Task & task ) -{ - //! worker id to use in case all workers are busy - static thread_local std::atomic< unsigned int > next_worker(SingletonContext::get().current_worker ? - SingletonContext::get().current_worker->get_worker_id() + 1 : 0); - TRACE_EVENT("Scheduler", "activate_task"); - SPDLOG_TRACE("DefaultScheduler::activate_task({})", task.task_id); - - int worker_id = SingletonContext::get().worker_pool->find_free_worker(); - if( worker_id < 0 ) - { - worker_id = next_worker.fetch_add(1) % SingletonContext::get().worker_pool->size(); - if( worker_id == SingletonContext::get().current_worker->get_worker_id() ) - worker_id = next_worker.fetch_add(1) % SingletonContext::get().worker_pool->size(); - } - - SingletonContext::get().worker_pool->get_worker( worker_id ).ready_queue.push(&task); - SingletonContext::get().worker_pool->set_worker_state( worker_id, dispatch::thread::WorkerState::BUSY ); - SingletonContext::get().worker_pool->get_worker( worker_id ).wake(); -} - -/* tries to find a task with uninialized dependency edges in the - * task-graph in the emplacement queues of other workers - * and removes it from there - */ -Task * DefaultScheduler::steal_new_task( dispatch::thread::Worker & worker ) -{ - std::optional task = SingletonContext::get().worker_pool->probe_worker_by_state( - [&worker](unsigned idx) -> std::optional + /* send this already existing task to a worker, + * but only through follower-list so it is not assigned to a worker yet. + * since this task is now ready, send find a worker for it + */ + void DefaultScheduler::activate_task(Task& task) { - // we have a candidate of a busy worker, - // now check its queue - if(Task* t = SingletonContext::get().worker_pool->get_worker(idx).emplacement_queue.pop()) - return t; - - // otherwise check own queue again - else if(Task* t = worker.emplacement_queue.pop()) - return t; - - // else continue search - else - return std::nullopt; - }, - - // find a busy worker - dispatch::thread::WorkerState::BUSY, - - // start next to current worker - worker.get_worker_id()); - - return task ? *task : nullptr; - } - - /* tries to find a ready task in any queue of other workers - * and removes it from the queue - */ - Task * DefaultScheduler::steal_ready_task( dispatch::thread::Worker & worker ) - { - std::optional task = SingletonContext::get().worker_pool->probe_worker_by_state( - [&worker](unsigned idx) -> std::optional + //! worker id to use in case all workers are busy + static thread_local std::atomic next_worker( + SingletonContext::get().current_worker ? SingletonContext::get().current_worker->get_worker_id() + 1 + : 0); + TRACE_EVENT("Scheduler", "activate_task"); + SPDLOG_TRACE("DefaultScheduler::activate_task({})", task.task_id); + + int worker_id = SingletonContext::get().worker_pool->find_free_worker(); + if(worker_id < 0) { - // we have a candidate of a busy worker, - // now check its queue - if(Task* t = SingletonContext::get().worker_pool->get_worker(idx).ready_queue.pop()) - return t; - - // otherwise check own queue again - else if(Task* t = worker.ready_queue.pop()) - return t; + worker_id = next_worker.fetch_add(1) % SingletonContext::get().worker_pool->size(); + if(worker_id == SingletonContext::get().current_worker->get_worker_id()) + worker_id = next_worker.fetch_add(1) % SingletonContext::get().worker_pool->size(); + } + + SingletonContext::get().worker_pool->get_worker(worker_id).ready_queue.push(&task); + SingletonContext::get().worker_pool->set_worker_state(worker_id, dispatch::thread::WorkerState::BUSY); + SingletonContext::get().worker_pool->get_worker(worker_id).wake(); + } - // else continue search - else - return std::nullopt; - }, + /* tries to find a task with uninialized dependency edges in the + * task-graph in the emplacement queues of other workers + * and removes it from there + */ + Task* DefaultScheduler::steal_new_task(dispatch::thread::Worker& worker) + { + std::optional task = SingletonContext::get().worker_pool->probe_worker_by_state( + [&worker](unsigned idx) -> std::optional + { + // we have a candidate of a busy worker, + // now check its queue + if(Task* t = SingletonContext::get().worker_pool->get_worker(idx).emplacement_queue.pop()) + return t; + + // otherwise check own queue again + else if(Task* t = worker.emplacement_queue.pop()) + return t; + + // else continue search + else + return std::nullopt; + }, + + // find a busy worker + dispatch::thread::WorkerState::BUSY, + + // start next to current worker + worker.get_worker_id()); + + return task ? *task : nullptr; + } - // find a busy worker - dispatch::thread::WorkerState::BUSY, + /* tries to find a ready task in any queue of other workers + * and removes it from the queue + */ + Task* DefaultScheduler::steal_ready_task(dispatch::thread::Worker& worker) + { + std::optional task = SingletonContext::get().worker_pool->probe_worker_by_state( + [&worker](unsigned idx) -> std::optional + { + // we have a candidate of a busy worker, + // now check its queue + if(Task* t = SingletonContext::get().worker_pool->get_worker(idx).ready_queue.pop()) + return t; + + // otherwise check own queue again + else if(Task* t = worker.ready_queue.pop()) + return t; + + // else continue search + else + return std::nullopt; + }, + + // find a busy worker + dispatch::thread::WorkerState::BUSY, + + // start next to current worker + worker.get_worker_id()); + + return task ? *task : nullptr; + } - // start next to current worker - worker.get_worker_id()); + // give worker a ready task if available + // @return task if a new task was found, nullptr otherwise + Task* DefaultScheduler::steal_task(dispatch::thread::Worker& worker) + { + unsigned worker_id = worker.get_worker_id(); - return task ? *task : nullptr; - } + SPDLOG_INFO("steal task for worker {}", worker_id); - // give worker a ready task if available - // @return task if a new task was found, nullptr otherwise - Task * DefaultScheduler::steal_task( dispatch::thread::Worker & worker ) - { - unsigned worker_id = worker.get_worker_id(); + if(Task* task = steal_ready_task(worker)) + { + SingletonContext::get().worker_pool->set_worker_state(worker_id, dispatch::thread::WorkerState::BUSY); + return task; + } - SPDLOG_INFO("steal task for worker {}", worker_id); + if(Task* task = steal_new_task(worker)) + { + task->pre_event.up(); + task->init_graph(); + + if(task->get_pre_event().notify(true)) + { + SingletonContext::get().worker_pool->set_worker_state( + worker_id, + dispatch::thread::WorkerState::BUSY); + return task; + } + } + + return nullptr; + } - if( Task * task = steal_ready_task( worker ) ) + /* Wakeup some worker or the main thread + * + * WakerId = 0 for main thread + * WakerId = WorkerId + 1 + * + * @return true if thread was indeed asleep + */ + bool DefaultScheduler::wake(WakerId id) { - SingletonContext::get().worker_pool->set_worker_state( worker_id, dispatch::thread::WorkerState::BUSY ); - return task; + if(id == 0) + return cv.notify(); + else if(id > 0 && id <= SingletonContext::get().worker_pool->size()) + return SingletonContext::get().worker_pool->get_worker(id - 1).wake(); + else + return false; } - if( Task * task = steal_new_task( worker ) ) + /* wakeup all wakers (workers + main thread) + */ + void DefaultScheduler::wake_all() { - task->pre_event.up(); - task->init_graph(); - - if( task->get_pre_event().notify( true ) ) - { - SingletonContext::get().worker_pool->set_worker_state( worker_id, dispatch::thread::WorkerState::BUSY ); - return task; - } + for(uint16_t i = 0; i <= SingletonContext::get().worker_pool->size(); ++i) + this->wake(i); } - return nullptr; - } - - /* Wakeup some worker or the main thread - * - * WakerId = 0 for main thread - * WakerId = WorkerId + 1 - * - * @return true if thread was indeed asleep - */ - bool DefaultScheduler::wake( WakerId id ) - { - if( id == 0 ) - return cv.notify(); - else if( id > 0 && id <= SingletonContext::get().worker_pool->size() ) - return SingletonContext::get().worker_pool->get_worker(id - 1).wake(); - else - return false; - } - - /* wakeup all wakers (workers + main thread) - */ - void DefaultScheduler::wake_all() - { - for( uint16_t i = 0; i <= SingletonContext::get().worker_pool->size(); ++i ) - this->wake( i ); - } - -} // namespace scheduler + } // namespace scheduler } // namespace redGrapes - diff --git a/redGrapes/scheduler/default_scheduler.hpp b/redGrapes/scheduler/default_scheduler.hpp index 7c6c2574..02ddc1ca 100644 --- a/redGrapes/scheduler/default_scheduler.hpp +++ b/redGrapes/scheduler/default_scheduler.hpp @@ -1,74 +1,72 @@ #pragma once -#include -#include -#include - #include #include - +#include #include -#include +#include -namespace redGrapes -{ -namespace scheduler -{ +#include +#include -/* - * Uses simple round-robin algorithm to distribute tasks to workers - * and implements work-stealing - */ -struct DefaultScheduler : public IScheduler +namespace redGrapes { - CondVar cv; - - DefaultScheduler(); - - void idle(); - - /* send the new task to a worker - */ - void emplace_task( Task & task ); - - /* send this already existing, - * but only through follower-list so it is not assigned to a worker yet. - * since this task is now ready, send find a worker for it - */ - void activate_task( Task & task ); - - /* tries to find a task with uninialized dependency edges in the - * task-graph in the emplacement queues of other workers - * and removes it from there - */ - Task * steal_new_task( dispatch::thread::Worker & worker ); - - /* tries to find a ready task in any queue of other workers - * and removes it from the queue - */ - Task * steal_ready_task( dispatch::thread::Worker & worker ); - - // give worker a ready task if available - // @return task if a new task was found, nullptr otherwise - Task * steal_task( dispatch::thread::Worker & worker ); - - /* Wakeup some worker or the main thread - * - * WakerId = 0 for main thread - * WakerId = WorkerId + 1 - * - * @return true if thread was indeed asleep - */ - bool wake( WakerId id = 0 ); - - /* wakeup all wakers (workers + main thread) - */ - void wake_all(); -}; - -} // namespace scheduler + namespace scheduler + { + + /* + * Uses simple round-robin algorithm to distribute tasks to workers + * and implements work-stealing + */ + struct DefaultScheduler : public IScheduler + { + CondVar cv; + + DefaultScheduler(); + + void idle(); + + /* send the new task to a worker + */ + void emplace_task(Task& task); + + /* send this already existing, + * but only through follower-list so it is not assigned to a worker yet. + * since this task is now ready, send find a worker for it + */ + void activate_task(Task& task); + + /* tries to find a task with uninialized dependency edges in the + * task-graph in the emplacement queues of other workers + * and removes it from there + */ + Task* steal_new_task(dispatch::thread::Worker& worker); + + /* tries to find a ready task in any queue of other workers + * and removes it from the queue + */ + Task* steal_ready_task(dispatch::thread::Worker& worker); + + // give worker a ready task if available + // @return task if a new task was found, nullptr otherwise + Task* steal_task(dispatch::thread::Worker& worker); + + /* Wakeup some worker or the main thread + * + * WakerId = 0 for main thread + * WakerId = WorkerId + 1 + * + * @return true if thread was indeed asleep + */ + bool wake(WakerId id = 0); + + /* wakeup all wakers (workers + main thread) + */ + void wake_all(); + }; + + } // namespace scheduler } // namespace redGrapes - diff --git a/redGrapes/scheduler/event.cpp b/redGrapes/scheduler/event.cpp index a84c44cd..566c9c49 100644 --- a/redGrapes/scheduler/event.cpp +++ b/redGrapes/scheduler/event.cpp @@ -5,155 +5,178 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include -#include -#include -#include - #include -#include +#include +#include #include +#include #include -#include -#include #include -namespace redGrapes -{ -namespace scheduler -{ +#include -Event::Event() - : state(1) - , waker_id(-1) - , followers( memory::Allocator() ) -{ -} +#include +#include +#include +#include +#include -Event::Event(Event & other) - : state((uint16_t)other.state) - , waker_id( other.waker_id ) - , followers( memory::Allocator() ) +namespace redGrapes { -} + namespace scheduler + { -Event::Event(Event && other) - : state((uint16_t)other.state) - , waker_id(other.waker_id) - , followers( memory::Allocator() ) -{ -} + Event::Event() : state(1), waker_id(-1), followers(memory::Allocator()) + { + } -bool Event::is_reached() { return state == 0; } -bool Event::is_ready() { return state == 1; } -void Event::up() { state++; } -void Event::dn() { state--; } + Event::Event(Event& other) + : state((uint16_t) other.state) + , waker_id(other.waker_id) + , followers(memory::Allocator()) + { + } -void Event::add_follower( EventPtr follower ) -{ - TRACE_EVENT("Event", "add_follower"); + Event::Event(Event&& other) + : state((uint16_t) other.state) + , waker_id(other.waker_id) + , followers(memory::Allocator()) + { + } - if( !is_reached() ) - { - SPDLOG_TRACE("Event add follower"); - followers.push(follower); - follower->state++; - } -} - -//! note: follower has to be notified separately! -void Event::remove_follower( EventPtr follower ) -{ - TRACE_EVENT("Event", "remove_follower"); + bool Event::is_reached() + { + return state == 0; + } - followers.erase( follower ); -} + bool Event::is_ready() + { + return state == 1; + } -void Event::notify_followers() -{ - TRACE_EVENT("Event", "notify_followers"); + void Event::up() + { + state++; + } - for( auto follower = followers.rbegin(); follower != followers.rend(); ++follower ) - follower->notify(); -} + void Event::dn() + { + state--; + } -/*! A preceding event was reached and thus an incoming edge got removed. - * This events state is decremented and recursively notifies its followers - * in case it is now also reached. - * - * @param claimed if true, the scheduler already knows about the task, - * if false, activate task is called - * - * @return true if event is ready - */ -bool EventPtr::notify( bool claimed ) -{ - TRACE_EVENT("Event", "notify"); + void Event::add_follower(EventPtr follower) + { + TRACE_EVENT("Event", "add_follower"); - int old_state = this->get_event().state.fetch_sub(1); - int state = old_state - 1; + if(!is_reached()) + { + SPDLOG_TRACE("Event add follower"); + followers.push(follower); + follower->state++; + } + } - std::string tag_string; - switch( this->tag ) - { - case EventPtrTag::T_EVT_PRE: tag_string = "pre"; break; - case EventPtrTag::T_EVT_POST: tag_string = "post"; break; - case EventPtrTag::T_EVT_RES_SET: tag_string = "result-set"; break; - case EventPtrTag::T_EVT_RES_GET: tag_string = "result-get"; break; - case EventPtrTag::T_EVT_EXT: tag_string = "external"; break; - } + //! note: follower has to be notified separately! + void Event::remove_follower(EventPtr follower) + { + TRACE_EVENT("Event", "remove_follower"); - if( this->task ) - SPDLOG_TRACE("notify event {} ({}-event of task {}) ~~> state = {}", - (void *)&this->get_event(), tag_string, this->task->task_id, state); + followers.erase(follower); + } - assert(old_state > 0); + void Event::notify_followers() + { + TRACE_EVENT("Event", "notify_followers"); - bool remove_task = false; + for(auto follower = followers.rbegin(); follower != followers.rend(); ++follower) + follower->notify(); + } - if(task) - { - // pre event ready - if(tag == scheduler::T_EVT_PRE && state == 1) + /*! A preceding event was reached and thus an incoming edge got removed. + * This events state is decremented and recursively notifies its followers + * in case it is now also reached. + * + * @param claimed if true, the scheduler already knows about the task, + * if false, activate task is called + * + * @return true if event is ready + */ + bool EventPtr::notify(bool claimed) { - if(!claimed) - SingletonContext::get().scheduler->activate_task(*task); - } + TRACE_EVENT("Event", "notify"); - // post event reached: - // no other task can now create dependencies to this - // task after deleting it from the resource list - if( state == 0 && tag == scheduler::T_EVT_POST ) - task->delete_from_resources(); - } + int old_state = this->get_event().state.fetch_sub(1); + int state = old_state - 1; - // if event is ready or reached (state ∈ {0,1}) - if( state <= 1 && this->get_event().waker_id >= 0 ) - SingletonContext::get().scheduler->wake( this->get_event().waker_id ); + std::string tag_string; + switch(this->tag) + { + case EventPtrTag::T_EVT_PRE: + tag_string = "pre"; + break; + case EventPtrTag::T_EVT_POST: + tag_string = "post"; + break; + case EventPtrTag::T_EVT_RES_SET: + tag_string = "result-set"; + break; + case EventPtrTag::T_EVT_RES_GET: + tag_string = "result-get"; + break; + case EventPtrTag::T_EVT_EXT: + tag_string = "external"; + break; + } - if( state == 0 ) - { - this->get_event().notify_followers(); + if(this->task) + SPDLOG_TRACE( + "notify event {} ({}-event of task {}) ~~> state = {}", + (void*) &this->get_event(), + tag_string, + this->task->task_id, + state); - // the second one of either post-event or result-get-event shall destroy the task - if( task ) - if( tag == scheduler::T_EVT_POST - || tag == scheduler::T_EVT_RES_GET ) + assert(old_state > 0); + + bool remove_task = false; + + if(task) { - if( task->removal_countdown.fetch_sub(1) == 1 ) - task->space->free_task( task ); + // pre event ready + if(tag == scheduler::T_EVT_PRE && state == 1) + { + if(!claimed) + SingletonContext::get().scheduler->activate_task(*task); + } + + // post event reached: + // no other task can now create dependencies to this + // task after deleting it from the resource list + if(state == 0 && tag == scheduler::T_EVT_POST) + task->delete_from_resources(); } - } - // return true if event is ready (state == 1) - return state == 1; -} + // if event is ready or reached (state ∈ {0,1}) + if(state <= 1 && this->get_event().waker_id >= 0) + SingletonContext::get().scheduler->wake(this->get_event().waker_id); -} // namespace scheduler - -} // namespace redGrapes + if(state == 0) + { + this->get_event().notify_followers(); + + // the second one of either post-event or result-get-event shall destroy the task + if(task) + if(tag == scheduler::T_EVT_POST || tag == scheduler::T_EVT_RES_GET) + { + if(task->removal_countdown.fetch_sub(1) == 1) + task->space->free_task(task); + } + } + // return true if event is ready (state == 1) + return state == 1; + } + } // namespace scheduler +} // namespace redGrapes diff --git a/redGrapes/scheduler/event.hpp b/redGrapes/scheduler/event.hpp index 8b3ead4e..e3470373 100644 --- a/redGrapes/scheduler/event.hpp +++ b/redGrapes/scheduler/event.hpp @@ -7,17 +7,19 @@ #pragma once +#include +#include + +#include + #include -#include -#include #include #include -#include -#include -#include +#include +#include #ifndef REDGRAPES_EVENT_FOLLOWER_LIST_CHUNKSIZE -#define REDGRAPES_EVENT_FOLLOWER_LIST_CHUNKSIZE 16 +# define REDGRAPES_EVENT_FOLLOWER_LIST_CHUNKSIZE 16 #endif namespace std @@ -28,96 +30,95 @@ namespace std namespace redGrapes { -struct Task; - -namespace scheduler -{ - -struct Event; - -enum EventPtrTag { - T_UNINITIALIZED = 0, - T_EVT_PRE, - T_EVT_POST, - T_EVT_RES_SET, - T_EVT_RES_GET, - T_EVT_EXT, -}; - -struct EventPtr -{ - enum EventPtrTag tag; - Task * task; - std::shared_ptr< Event > external_event; - - inline bool operator==( EventPtr const & other ) const - { - return this->tag == other.tag && this->task == other.task; - } + struct Task; - Event & get_event() const; - - inline Event & operator*() const + namespace scheduler { - return get_event(); - } - - inline Event * operator->() const - { - return &get_event(); - } - - /*! A preceding event was reached and thus an incoming edge got removed. - * This events state is decremented and recursively notifies its followers - * in case it is now also reached. - * @return true if event was ready - */ - bool notify( bool claimed = false ); -}; - -/*! - * An event is the abstraction of the programs execution state. - * They form a flat/non-recursive graph of events. - * During runtime, each thread encounters a sequence of events. - * The goal is to synchronize these events in the manner - * "Event A must occur before Event B". - * - * Multiple events need to be related, so that they - * form a partial order. - * This order is an homomorphic image from the timeline of - * execution states. - */ -struct Event -{ - /*! number of incoming edges - * state == 0: event is reached and can be removed - */ - std::atomic_uint16_t state; - //! waker that is waiting for this event - WakerId waker_id; - - //! the set of subsequent events - ChunkedList< EventPtr, REDGRAPES_EVENT_FOLLOWER_LIST_CHUNKSIZE > followers; - - Event(); - Event(Event &); - Event(Event &&); - - bool is_reached(); - bool is_ready(); - void up(); - void dn(); - - //! note: follower has to be notified separately! - void remove_follower( EventPtr follower ); - void add_follower( EventPtr follower ); - - void notify_followers(); -}; - -} // namespace scheduler + struct Event; + + enum EventPtrTag + { + T_UNINITIALIZED = 0, + T_EVT_PRE, + T_EVT_POST, + T_EVT_RES_SET, + T_EVT_RES_GET, + T_EVT_EXT, + }; + + struct EventPtr + { + enum EventPtrTag tag; + Task* task; + std::shared_ptr external_event; + + inline bool operator==(EventPtr const& other) const + { + return this->tag == other.tag && this->task == other.task; + } + + Event& get_event() const; + + inline Event& operator*() const + { + return get_event(); + } + + inline Event* operator->() const + { + return &get_event(); + } + + /*! A preceding event was reached and thus an incoming edge got removed. + * This events state is decremented and recursively notifies its followers + * in case it is now also reached. + * @return true if event was ready + */ + bool notify(bool claimed = false); + }; + + /*! + * An event is the abstraction of the programs execution state. + * They form a flat/non-recursive graph of events. + * During runtime, each thread encounters a sequence of events. + * The goal is to synchronize these events in the manner + * "Event A must occur before Event B". + * + * Multiple events need to be related, so that they + * form a partial order. + * This order is an homomorphic image from the timeline of + * execution states. + */ + struct Event + { + /*! number of incoming edges + * state == 0: event is reached and can be removed + */ + std::atomic_uint16_t state; + + //! waker that is waiting for this event + WakerId waker_id; + + //! the set of subsequent events + ChunkedList followers; + + Event(); + Event(Event&); + Event(Event&&); + + bool is_reached(); + bool is_ready(); + void up(); + void dn(); + + //! note: follower has to be notified separately! + void remove_follower(EventPtr follower); + void add_follower(EventPtr follower); + + void notify_followers(); + }; + + } // namespace scheduler } // namespace redGrapes - - diff --git a/redGrapes/scheduler/event_ptr.cpp b/redGrapes/scheduler/event_ptr.cpp index 55be80f6..fcad9710 100644 --- a/redGrapes/scheduler/event_ptr.cpp +++ b/redGrapes/scheduler/event_ptr.cpp @@ -5,37 +5,36 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include - #include #include #include +#include + namespace redGrapes { -namespace scheduler -{ - -Event & EventPtr::get_event() const + namespace scheduler { - switch( tag ) + + Event& EventPtr::get_event() const { - case T_EVT_PRE: - return task->pre_event; - case T_EVT_POST: - return task->post_event; - case T_EVT_RES_SET: - return task->result_set_event; - case T_EVT_RES_GET: - return task->result_get_event; - case T_EVT_EXT: - return *external_event; - default: - throw std::runtime_error("invalid event tag"); + switch(tag) + { + case T_EVT_PRE: + return task->pre_event; + case T_EVT_POST: + return task->post_event; + case T_EVT_RES_SET: + return task->result_set_event; + case T_EVT_RES_GET: + return task->result_get_event; + case T_EVT_EXT: + return *external_event; + default: + throw std::runtime_error("invalid event tag"); + } } - } -} // namespace scheduler + } // namespace scheduler } // namespace redGrapes - diff --git a/redGrapes/scheduler/scheduler.hpp b/redGrapes/scheduler/scheduler.hpp index d1316210..68897639 100644 --- a/redGrapes/scheduler/scheduler.hpp +++ b/redGrapes/scheduler/scheduler.hpp @@ -7,65 +7,75 @@ #pragma once -#include #include -namespace redGrapes -{ - -struct Task; - -namespace dispatch -{ -namespace thread -{ -struct Worker; -} -} - -namespace scheduler -{ - -using WakerId = int16_t; +#include -/*! Scheduler Interface - */ -struct IScheduler +namespace redGrapes { - virtual ~IScheduler() - { - } - /*! whats the task dependency type for the edge a -> b (task a precedes task b) - * @return true if task b depends on the pre event of task a, false if task b depends on the post event of task b. - */ - virtual bool task_dependency_type( Task const & a, Task const & b ) - { - return false; - } + struct Task; - virtual void idle(){} - - //! add task to the set of to-initialize tasks - virtual void emplace_task( Task & task ) {} - - //! add task to ready set - virtual void activate_task( Task & task ) {} - - //! give worker work if available - virtual Task * steal_task( dispatch::thread::Worker & worker ) + namespace dispatch { - return nullptr; - } + namespace thread + { + struct Worker; + } // namespace thread + } // namespace dispatch - virtual void wake_all() {} - virtual bool wake( WakerId id = 0 ) + namespace scheduler { - return false; - } -}; -} // namespace scheduler + using WakerId = int16_t; + + /*! Scheduler Interface + */ + struct IScheduler + { + virtual ~IScheduler() + { + } + + /*! whats the task dependency type for the edge a -> b (task a precedes task b) + * @return true if task b depends on the pre event of task a, false if task b depends on the post event of + * task b. + */ + virtual bool task_dependency_type(Task const& a, Task const& b) + { + return false; + } + + virtual void idle() + { + } + + //! add task to the set of to-initialize tasks + virtual void emplace_task(Task& task) + { + } + + //! add task to ready set + virtual void activate_task(Task& task) + { + } + + //! give worker work if available + virtual Task* steal_task(dispatch::thread::Worker& worker) + { + return nullptr; + } + + virtual void wake_all() + { + } + + virtual bool wake(WakerId id = 0) + { + return false; + } + }; + + } // namespace scheduler } // namespace redGrapes - diff --git a/redGrapes/scheduler/tag_match.hpp b/redGrapes/scheduler/tag_match.hpp index d381f034..4fd95333 100644 --- a/redGrapes/scheduler/tag_match.hpp +++ b/redGrapes/scheduler/tag_match.hpp @@ -8,18 +8,19 @@ #pragma once -#include -#include - -#include -#include #include #include +#include +#include + +#include + +#include namespace redGrapes { -namespace scheduler -{ + namespace scheduler + { template struct TagMatch : IScheduler @@ -27,7 +28,7 @@ namespace scheduler struct SubScheduler { std::bitset supported_tags; - std::shared_ptr< IScheduler > s; + std::shared_ptr s; }; std::vector sub_schedulers; @@ -45,24 +46,24 @@ namespace scheduler this->add_scheduler(supported_tags, s); } - Task * steal_task( dispatch::thread::Worker & worker ) + Task* steal_task(dispatch::thread::Worker& worker) { - for( auto& s : sub_schedulers ) - if( Task * t = s.s->steal_task( worker ) ) - return t; + for(auto& s : sub_schedulers) + if(Task* t = s.s->steal_task(worker)) + return t; - return nullptr; + return nullptr; } - void emplace_task( Task & task ) + void emplace_task(Task& task) { if(auto sub_scheduler = get_matching_scheduler(task.required_scheduler_tags)) return (*sub_scheduler)->emplace_task(task); else - throw std::runtime_error("no scheduler found for task"); + throw std::runtime_error("no scheduler found for task"); } - void activate_task(Task & task) + void activate_task(Task& task) { if(auto sub_scheduler = get_matching_scheduler(task.required_scheduler_tags)) return (*sub_scheduler)->activate_task(task); @@ -80,7 +81,7 @@ namespace scheduler return std::nullopt; } - bool task_dependency_type(Task const & a, Task & b) + bool task_dependency_type(Task const& a, Task& b) { /// fixme: b or a ? if(auto sub_scheduler = get_matching_scheduler(b.required_scheduler_tags)) @@ -95,10 +96,10 @@ namespace scheduler s.s->wake_all(); } - bool wake( WakerId waker_id ) + bool wake(WakerId waker_id) { for(auto const& s : sub_schedulers) - if( s.s->wake( waker_id ) ) + if(s.s->wake(waker_id)) return true; return false; @@ -107,7 +108,7 @@ namespace scheduler /*! Factory function to easily create a tag-match-scheduler object */ - template< std::size_t T_tag_count = 64 > + template struct TagMatchBuilder { std::shared_ptr> tag_match; @@ -134,4 +135,3 @@ namespace scheduler } // namespace scheduler } // namespace redGrapes - diff --git a/redGrapes/scheduler/tag_match_property.hpp b/redGrapes/scheduler/tag_match_property.hpp index 4491a5e0..bff1ae11 100644 --- a/redGrapes/scheduler/tag_match_property.hpp +++ b/redGrapes/scheduler/tag_match_property.hpp @@ -1,6 +1,8 @@ #pragma once +#include + #include #include #include @@ -23,7 +25,7 @@ namespace redGrapes { } - PropertiesBuilder & scheduling_tags(std::initializer_list tags) + PropertiesBuilder& scheduling_tags(std::initializer_list tags) { std::bitset tags_bitset; for(auto tag : tags) @@ -31,7 +33,7 @@ namespace redGrapes return scheduling_tags(tags_bitset); } - PropertiesBuilder & scheduling_tags(std::bitset tags) + PropertiesBuilder& scheduling_tags(std::bitset tags) { builder.task->required_scheduler_tags |= tags; return builder; @@ -40,17 +42,21 @@ namespace redGrapes struct Patch { - template + template struct Builder { - Builder( PatchBuilder & ) {} + Builder(PatchBuilder&) + { + } }; }; - void apply_patch( Patch const & ) {} + void apply_patch(Patch const&) + { + } }; - } -} + } // namespace scheduler +} // namespace redGrapes template struct fmt::formatter> @@ -84,6 +90,3 @@ struct fmt::formatter #ifndef REDGRAPES_CONDVAR_TIMEOUT -#define REDGRAPES_CONDVAR_TIMEOUT 0x200000 +# define REDGRAPES_CONDVAR_TIMEOUT 0x20'0000 #endif namespace redGrapes { - CondVar::CondVar() - : CondVar( REDGRAPES_CONDVAR_TIMEOUT ) - {} + CondVar::CondVar() : CondVar(REDGRAPES_CONDVAR_TIMEOUT) + { + } - CondVar::CondVar( unsigned timeout ) - : should_wait( true ) - , timeout(timeout) + CondVar::CondVar(unsigned timeout) : should_wait(true), timeout(timeout) { } void CondVar::wait() { unsigned count = 0; - while( should_wait.load(std::memory_order_acquire) ) + while(should_wait.load(std::memory_order_acquire)) { - if( ++count > timeout ) + if(++count > timeout) { // TODO: check this opmitization - //busy.clear(std::memory_order_release); + // busy.clear(std::memory_order_release); - if( should_wait.load(std::memory_order_acquire) ) - { - std::unique_lock< CVMutex > l( m ); - cv.wait( l, [this]{ return ! should_wait.load(std::memory_order_acquire); } ); + if(should_wait.load(std::memory_order_acquire)) + { + std::unique_lock l(m); + cv.wait(l, [this] { return !should_wait.load(std::memory_order_acquire); }); } } } @@ -45,9 +43,9 @@ namespace redGrapes should_wait.compare_exchange_strong(w, false, std::memory_order_release); // TODO: check this optimization - //if( ! busy.test_and_set(std::memory_order_acquire) ) + // if( ! busy.test_and_set(std::memory_order_acquire) ) { - std::unique_lock< std::mutex > l( m ); + std::unique_lock l(m); cv.notify_all(); } @@ -55,4 +53,3 @@ namespace redGrapes } } // namespace redGrapes - diff --git a/redGrapes/sync/cv.hpp b/redGrapes/sync/cv.hpp index 2e2d05f0..3621ee42 100644 --- a/redGrapes/sync/cv.hpp +++ b/redGrapes/sync/cv.hpp @@ -1,36 +1,41 @@ #pragma once +#include + #include #include -#include namespace redGrapes { -struct PhantomLock -{ - inline void lock() {} - inline void unlock() {} -}; + struct PhantomLock + { + inline void lock() + { + } -struct CondVar -{ - std::atomic should_wait; - std::condition_variable cv; - std::atomic_flag busy; + inline void unlock() + { + } + }; - using CVMutex = std::mutex; - CVMutex m; + struct CondVar + { + std::atomic should_wait; + std::condition_variable cv; + std::atomic_flag busy; - unsigned timeout; + using CVMutex = std::mutex; + CVMutex m; - CondVar(); - CondVar( unsigned timeout ); - - void wait(); - bool notify(); -}; + unsigned timeout; -} // namespace redGrapes + CondVar(); + CondVar(unsigned timeout); + void wait(); + bool notify(); + }; + +} // namespace redGrapes diff --git a/redGrapes/sync/spinlock.hpp b/redGrapes/sync/spinlock.hpp index bcaef391..7ed5b914 100644 --- a/redGrapes/sync/spinlock.hpp +++ b/redGrapes/sync/spinlock.hpp @@ -10,103 +10,103 @@ namespace redGrapes #define SPIN 1 -struct /*alignas(64)*/ SpinLock -{ + struct /*alignas(64)*/ SpinLock + { #if SPIN - std::atomic state; + std::atomic state; #else - std::mutex m; + std::mutex m; #endif - SpinLock() + SpinLock() #if SPIN - : state(false) + : state(false) #endif - { - } - - inline void lock() - { -#if SPIN - while( true ) { - bool s = false; - if( state.compare_exchange_weak(s, true, std::memory_order_acquire) ) - if( s == false ) - return; - - while( state.load(std::memory_order_relaxed) ); } -#else - m.lock(); -#endif - } - inline void unlock() - { + inline void lock() + { #if SPIN - state.store(false, std::memory_order_release); + while(true) + { + bool s = false; + if(state.compare_exchange_weak(s, true, std::memory_order_acquire)) + if(s == false) + return; + + while(state.load(std::memory_order_relaxed)) + ; + } #else - m.unlock(); + m.lock(); #endif - } -}; + } -/* -struct alignas(64) RWSpinLock -{ + inline void unlock() + { #if SPIN - alignas(64) std::atomic reader_count; - alignas(64) std::atomic write; + state.store(false, std::memory_order_release); #else - std::shared_mutex m; + m.unlock(); #endif + } + }; - SpinLock() -#if SPIN - : reader_count(0) - , write(0) -#endif + /* + struct alignas(64) RWSpinLock { - } + #if SPIN + alignas(64) std::atomic reader_count; + alignas(64) std::atomic write; + #else + std::shared_mutex m; + #endif + + SpinLock() + #if SPIN + : reader_count(0) + , write(0) + #endif + { + } - inline void lock_shared() - { - reader_count.fetch_add(1); - } + inline void lock_shared() + { + reader_count.fetch_add(1); + } - inline void unlock_shared() - { - reader_count.fetch_sub(1); - } - - inline void lock() - { -#if SPIN - while( true ) + inline void unlock_shared() { - bool s = false; - if( write.compare_exchange_weak(s, true, std::memory_order_acquire) ) - if( s == false ) - return; + reader_count.fetch_sub(1); + } - while( state.load(std::memory_order_relaxed) ); + inline void lock() + { + #if SPIN + while( true ) + { + bool s = false; + if( write.compare_exchange_weak(s, true, std::memory_order_acquire) ) + if( s == false ) + return; + + while( state.load(std::memory_order_relaxed) ); + } + #else + m.lock(); + #endif } -#else - m.lock(); -#endif - } - inline void unlock() - { -#if SPIN - write.store(false, std::memory_order_release); -#else - m.unlock(); -#endif - } -}; -*/ + inline void unlock() + { + #if SPIN + write.store(false, std::memory_order_release); + #else + m.unlock(); + #endif + } + }; + */ } // namespace redGrapes - diff --git a/redGrapes/task/future.hpp b/redGrapes/task/future.hpp index a4248b2e..653d7206 100644 --- a/redGrapes/task/future.hpp +++ b/redGrapes/task/future.hpp @@ -16,119 +16,113 @@ namespace redGrapes { -void yield( scheduler::EventPtr event ); + void yield(scheduler::EventPtr event); -/*! - * Wrapper for std::future which consumes jobs - * instead of waiting in get() - */ -template -struct Future -{ - Future(Task & task) - : task( task ), - taken(false) - {} - - Future( Future && other ) - : task( other.task ), - taken( other.taken ) - { - SPDLOG_TRACE("MOVE future"); - other.taken = true; - } - - ~Future() + /*! + * Wrapper for std::future which consumes jobs + * instead of waiting in get() + */ + template + struct Future { - if(!taken) + Future(Task& task) : task(task), taken(false) { - SPDLOG_TRACE("notify in destruct of future"); - task.get_result_get_event().notify(); } - } - /*! - * yields until the task has a valid result - * and retrieves it. - * - * @return the result - */ - T get(void) - { - // wait until result is set - yield( task.get_result_set_event() ); - - // take result - T result = std::move(*reinterpret_cast(task.get_result_data())); - taken = true; - task.get_result_get_event().notify(); - - return std::move(result); - } - - /*! check if the result is already computed - */ - bool is_ready(void) const - { - return task.result_set_event.is_reached(); - } + Future(Future&& other) : task(other.task), taken(other.taken) + { + SPDLOG_TRACE("MOVE future"); + other.taken = true; + } -private: - bool taken; - Task & task; -}; // struct Future + ~Future() + { + if(!taken) + { + SPDLOG_TRACE("notify in destruct of future"); + task.get_result_get_event().notify(); + } + } -template<> -struct Future -{ - Future(Task & task) - : task( task ), - taken(false) - {} - - Future( Future && other ) - : task( other.task ), - taken( other.taken ) - { - SPDLOG_TRACE("MOVE future"); - other.taken = true; - } - - ~Future() - { - if(!taken) + /*! + * yields until the task has a valid result + * and retrieves it. + * + * @return the result + */ + T get(void) { - SPDLOG_TRACE("notify in destruct of future"); + // wait until result is set + yield(task.get_result_set_event()); + + // take result + T result = std::move(*reinterpret_cast(task.get_result_data())); + taken = true; task.get_result_get_event().notify(); + + return std::move(result); } - } - /*! - * yields until the task has a valid result - * and retrieves it. - * - * @return the result - */ - void get(void) - { - // wait until result is set - yield( task.get_result_set_event() ); + /*! check if the result is already computed + */ + bool is_ready(void) const + { + return task.result_set_event.is_reached(); + } - // take result - taken = true; - task.get_result_get_event().notify(); - } + private: + bool taken; + Task& task; + }; // struct Future - /*! check if the result is already computed - */ - bool is_ready(void) const + template<> + struct Future { - return task.result_set_event.is_reached(); - } + Future(Task& task) : task(task), taken(false) + { + } + + Future(Future&& other) : task(other.task), taken(other.taken) + { + SPDLOG_TRACE("MOVE future"); + other.taken = true; + } + + ~Future() + { + if(!taken) + { + SPDLOG_TRACE("notify in destruct of future"); + task.get_result_get_event().notify(); + } + } + + /*! + * yields until the task has a valid result + * and retrieves it. + * + * @return the result + */ + void get(void) + { + // wait until result is set + yield(task.get_result_set_event()); + + // take result + taken = true; + task.get_result_get_event().notify(); + } + + /*! check if the result is already computed + */ + bool is_ready(void) const + { + return task.result_set_event.is_reached(); + } -private: - bool taken; - Task & task; -}; + private: + bool taken; + Task& task; + }; } // namespace redGrapes diff --git a/redGrapes/task/property/graph.cpp b/redGrapes/task/property/graph.cpp index 8ce4b66f..3680c795 100644 --- a/redGrapes/task/property/graph.cpp +++ b/redGrapes/task/property/graph.cpp @@ -5,135 +5,131 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include - +#include +#include #include #include -#include #include -#include +#include #include -#include + +#include +#include namespace redGrapes { -/*! create a new (external) event which precedes the tasks post-event - */ -scheduler::EventPtr GraphProperty::make_event() -{ - auto event = memory::alloc_shared< scheduler::Event >(); - event->add_follower( get_post_event() ); - return scheduler::EventPtr{ scheduler::T_EVT_EXT, nullptr, event }; -} - -/*! - * Insert a new task and add the same dependencies as in the precedence graph. - * Note that tasks must be added in order, since only preceding tasks are considered! - * - * The precedence graph containing the task is assumed to be locked. - */ -void GraphProperty::init_graph() -{ - TRACE_EVENT("Graph", "init_graph"); - for( auto r = this->task->unique_resources.rbegin(); r != this->task->unique_resources.rend(); ++r ) + /*! create a new (external) event which precedes the tasks post-event + */ + scheduler::EventPtr GraphProperty::make_event() { - if( r->task_entry != r->resource->users.rend() ) + auto event = memory::alloc_shared(); + event->add_follower(get_post_event()); + return scheduler::EventPtr{scheduler::T_EVT_EXT, nullptr, event}; + } + + /*! + * Insert a new task and add the same dependencies as in the precedence graph. + * Note that tasks must be added in order, since only preceding tasks are considered! + * + * The precedence graph containing the task is assumed to be locked. + */ + void GraphProperty::init_graph() + { + TRACE_EVENT("Graph", "init_graph"); + for(auto r = this->task->unique_resources.rbegin(); r != this->task->unique_resources.rend(); ++r) { - // TODO: can this lock be avoided? - // - // even though the container supports - // lock free iteration and removal, - // with out this lock, its still possible, - // that the iterator points at an element, - // which will get removed AFTER iterating - // and BEFORE adding the dependency. - std::unique_lock< SpinLock > lock( r->resource->users_mutex ); - - TRACE_EVENT("Graph", "CheckPredecessors"); - auto it = r->task_entry; - - ++it; - for(; it != r->resource->users.rend(); ++it ) + if(r->task_entry != r->resource->users.rend()) { - TRACE_EVENT("Graph", "Check Pred"); - Task * preceding_task = *it; - - if( preceding_task == this->space->parent ) - break; - - if( - preceding_task->space == this->space && - this->space->is_serial( *preceding_task, *this->task ) - ) + // TODO: can this lock be avoided? + // + // even though the container supports + // lock free iteration and removal, + // with out this lock, its still possible, + // that the iterator points at an element, + // which will get removed AFTER iterating + // and BEFORE adding the dependency. + std::unique_lock lock(r->resource->users_mutex); + + TRACE_EVENT("Graph", "CheckPredecessors"); + auto it = r->task_entry; + + ++it; + for(; it != r->resource->users.rend(); ++it) { - add_dependency( *preceding_task ); - if( preceding_task->has_sync_access( r->resource ) ) + TRACE_EVENT("Graph", "Check Pred"); + Task* preceding_task = *it; + + if(preceding_task == this->space->parent) break; + + if(preceding_task->space == this->space && this->space->is_serial(*preceding_task, *this->task)) + { + add_dependency(*preceding_task); + if(preceding_task->has_sync_access(r->resource)) + break; + } } } } - } - // add dependency to parent - if( auto parent = this->space->parent ) - { - SPDLOG_TRACE("add event dep to parent"); - this->post_event.add_follower( parent->get_post_event() ); + // add dependency to parent + if(auto parent = this->space->parent) + { + SPDLOG_TRACE("add event dep to parent"); + this->post_event.add_follower(parent->get_post_event()); + } } -} -void GraphProperty::delete_from_resources() -{ - TRACE_EVENT("Graph", "delete_from_resources"); - for( auto r = this->task->unique_resources.rbegin(); r != this->task->unique_resources.rend(); ++r ) + void GraphProperty::delete_from_resources() { - // TODO: can this lock be avoided? - // corresponding lock to init_graph() - std::unique_lock< SpinLock > lock( r->resource->users_mutex ); + TRACE_EVENT("Graph", "delete_from_resources"); + for(auto r = this->task->unique_resources.rbegin(); r != this->task->unique_resources.rend(); ++r) + { + // TODO: can this lock be avoided? + // corresponding lock to init_graph() + std::unique_lock lock(r->resource->users_mutex); - if( r->task_entry != r->resource->users.rend() ) - r->resource->users.remove( r->task_entry ); + if(r->task_entry != r->resource->users.rend()) + r->resource->users.remove(r->task_entry); + } } -} -void GraphProperty::add_dependency( Task & preceding_task ) -{ - // precedence graph - //in_edges.push_back(&preceding_task); - - // scheduling graph - auto preceding_event = - SingletonContext::get().scheduler->task_dependency_type(preceding_task, *this->task) - ? preceding_task->get_pre_event() : preceding_task->get_post_event(); + void GraphProperty::add_dependency(Task& preceding_task) + { + // precedence graph + // in_edges.push_back(&preceding_task); - if( ! preceding_event->is_reached() ) - preceding_event->add_follower( this->get_pre_event() ); -} + // scheduling graph + auto preceding_event = SingletonContext::get().scheduler->task_dependency_type(preceding_task, *this->task) + ? preceding_task->get_pre_event() + : preceding_task->get_post_event(); -void GraphProperty::update_graph( ) -{ - //std::unique_lock< SpinLock > lock( post_event.followers_mutex ); + if(!preceding_event->is_reached()) + preceding_event->add_follower(this->get_pre_event()); + } - // for( auto follower : post_event.followers ) - for( auto it = post_event.followers.rbegin(); it != post_event.followers.rend(); ++it ) + void GraphProperty::update_graph() { - scheduler::EventPtr follower = *it; - if( follower.task ) + // std::unique_lock< SpinLock > lock( post_event.followers_mutex ); + + // for( auto follower : post_event.followers ) + for(auto it = post_event.followers.rbegin(); it != post_event.followers.rend(); ++it) { - if( ! space->is_serial(*this->task, *follower.task) ) + scheduler::EventPtr follower = *it; + if(follower.task) { - // remove dependency - //follower.task->in_edges.erase(std::find(std::begin(follower.task->in_edges), std::end(follower.task->in_edges), this)); - post_event.followers.erase( follower ); + if(!space->is_serial(*this->task, *follower.task)) + { + // remove dependency + // follower.task->in_edges.erase(std::find(std::begin(follower.task->in_edges), + // std::end(follower.task->in_edges), this)); + post_event.followers.erase(follower); - follower.notify(); + follower.notify(); + } } } } -} } // namespace redGrapes - - diff --git a/redGrapes/task/property/graph.hpp b/redGrapes/task/property/graph.hpp index a68f6e7e..d7d7cadc 100644 --- a/redGrapes/task/property/graph.hpp +++ b/redGrapes/task/property/graph.hpp @@ -7,193 +7,198 @@ #pragma once -#include -#include -#include -#include -#include #include +#include +#include #include +#include +#include +#include -//#include -#include +// #include #include +#include namespace redGrapes { -struct Task; -struct TaskSpace; - -/*! - * Each task associates with two events: - * A Pre-Event and a Post-Event. - \verbatim - +------+ - >>> / Pre- \ >>> | Task | >>> / Post- \ >>> - \ Event / +------+ \ Event / + struct Task; + struct TaskSpace; - \endverbatim - * - * Edges between Events determine in which order tasks - * can be scheduled. - * - * Data-dependencies between tasks are assured by - * edges from post-events to pre-events. - * - * With child-tasks, the post-event of the child task - * precedes the parent tasks post-event. - */ -struct GraphProperty -{ - Task & operator*() - { - return *task; - } - Task * operator->() + /*! + * Each task associates with two events: + * A Pre-Event and a Post-Event. + \verbatim + +------+ + >>> / Pre- \ >>> | Task | >>> / Post- \ >>> + \ Event / +------+ \ Event / + + \endverbatim + * + * Edges between Events determine in which order tasks + * can be scheduled. + * + * Data-dependencies between tasks are assured by + * edges from post-events to pre-events. + * + * With child-tasks, the post-event of the child task + * precedes the parent tasks post-event. + */ + struct GraphProperty { - return task; - } + Task& operator*() + { + return *task; + } - Task * task; + Task* operator->() + { + return task; + } - //! number of parents - uint8_t scope_depth; + Task* task; - //! task space that contains this task, must not be null - std::shared_ptr< TaskSpace > space; + //! number of parents + uint8_t scope_depth; - //! task space for children, may be null - std::shared_ptr< TaskSpace > children; + //! task space that contains this task, must not be null + std::shared_ptr space; - /* - // in edges dont need a mutex because they are initialized - // once by `init_dependencies()` and only read afterwards. - // expired pointers (null) must be ignored - std::vector in_edges; - */ + //! task space for children, may be null + std::shared_ptr children; - scheduler::Event pre_event; - scheduler::Event post_event; - scheduler::Event result_set_event; - scheduler::Event result_get_event; + /* + // in edges dont need a mutex because they are initialized + // once by `init_dependencies()` and only read afterwards. + // expired pointers (null) must be ignored + std::vector in_edges; + */ - inline scheduler::EventPtr get_pre_event() - { - return scheduler::EventPtr { scheduler::T_EVT_PRE, this->task }; - } - inline scheduler::EventPtr get_post_event() - { - return scheduler::EventPtr { scheduler::T_EVT_POST, this->task }; - } - inline scheduler::EventPtr get_result_set_event() - { - return scheduler::EventPtr { scheduler::T_EVT_RES_SET, this->task }; - } - inline scheduler::EventPtr get_result_get_event() - { - return scheduler::EventPtr { scheduler::T_EVT_RES_GET, this->task }; - } + scheduler::Event pre_event; + scheduler::Event post_event; + scheduler::Event result_set_event; + scheduler::Event result_get_event; - inline bool is_ready() - { - return pre_event.is_ready(); - } - inline bool is_running() - { - return pre_event.is_reached(); - } - inline bool is_finished() - { - return post_event.is_reached(); - } - inline bool is_dead() - { - return post_event.is_reached() && result_get_event.is_reached(); - } + inline scheduler::EventPtr get_pre_event() + { + return scheduler::EventPtr{scheduler::T_EVT_PRE, this->task}; + } - /*! create a new event which precedes the tasks post-event - */ - scheduler::EventPtr make_event(); + inline scheduler::EventPtr get_post_event() + { + return scheduler::EventPtr{scheduler::T_EVT_POST, this->task}; + } - /*! - * represent ›pausation of the task until event is reached‹ - * in the scheduling graph - */ - inline void sg_pause( scheduler::EventPtr event ) - { - pre_event.state = 1; - event->add_follower( get_pre_event() ); - } + inline scheduler::EventPtr get_result_set_event() + { + return scheduler::EventPtr{scheduler::T_EVT_RES_SET, this->task}; + } - /*! - * Insert a new task and add the same dependencies as in the precedence graph. - * Note that tasks must be added in order, since only preceding tasks are considered! - * - * The precedence graph containing the task is assumed to be locked. - */ - void init_graph(); + inline scheduler::EventPtr get_result_get_event() + { + return scheduler::EventPtr{scheduler::T_EVT_RES_GET, this->task}; + } - /*! - * Abstractly adds a dependeny from preceding task to this, - * by setting up an edge from the post-event of the - * preceding task to the pre-event of this task. - * Additionally, an edge to the post-event of the parent is added. - */ - void add_dependency( Task & preceding_task ); + inline bool is_ready() + { + return pre_event.is_ready(); + } - /*! - * checks all incoming edges if they are still required and - * removes them if possible. - */ - void update_graph(); + inline bool is_running() + { + return pre_event.is_reached(); + } - /*! - * removes this task from all resource-user-lists, so from now on - * no new dependencies to this task will be created. - */ - void delete_from_resources(); + inline bool is_finished() + { + return post_event.is_reached(); + } - template < typename PropertiesBuilder > - struct Builder - { - PropertiesBuilder & builder; + inline bool is_dead() + { + return post_event.is_reached() && result_get_event.is_reached(); + } + + /*! create a new event which precedes the tasks post-event + */ + scheduler::EventPtr make_event(); + + /*! + * represent ›pausation of the task until event is reached‹ + * in the scheduling graph + */ + inline void sg_pause(scheduler::EventPtr event) + { + pre_event.state = 1; + event->add_follower(get_pre_event()); + } + + /*! + * Insert a new task and add the same dependencies as in the precedence graph. + * Note that tasks must be added in order, since only preceding tasks are considered! + * + * The precedence graph containing the task is assumed to be locked. + */ + void init_graph(); + + /*! + * Abstractly adds a dependeny from preceding task to this, + * by setting up an edge from the post-event of the + * preceding task to the pre-event of this task. + * Additionally, an edge to the post-event of the parent is added. + */ + void add_dependency(Task& preceding_task); + + /*! + * checks all incoming edges if they are still required and + * removes them if possible. + */ + void update_graph(); + + /*! + * removes this task from all resource-user-lists, so from now on + * no new dependencies to this task will be created. + */ + void delete_from_resources(); + + template + struct Builder + { + PropertiesBuilder& builder; - Builder( PropertiesBuilder & b ) - : builder(b) - {} - }; + Builder(PropertiesBuilder& b) : builder(b) + { + } + }; - struct Patch - { - template - struct Builder + struct Patch { - Builder( PatchBuilder & ) {} + template + struct Builder + { + Builder(PatchBuilder&) + { + } + }; }; - }; - void apply_patch( Patch const & ) {}; -}; + void apply_patch(Patch const&){}; + }; } // namespace redGrapes -template <> -struct fmt::formatter< redGrapes::GraphProperty > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::GraphProperty const & sg_prop, - FormatContext & ctx - ) + template + auto format(redGrapes::GraphProperty const& sg_prop, FormatContext& ctx) { return ctx.out(); } }; - diff --git a/redGrapes/task/property/id.hpp b/redGrapes/task/property/id.hpp index 74b268f2..f24f5c19 100644 --- a/redGrapes/task/property/id.hpp +++ b/redGrapes/task/property/id.hpp @@ -11,93 +11,86 @@ #pragma once -#include #include -namespace redGrapes -{ - -using TaskID = unsigned int; +#include -struct IDProperty +namespace redGrapes { -private: - static std::atomic_int & id_counter() - { - static std::atomic_int x; - return x; - } - -public: - TaskID task_id; + using TaskID = unsigned int; - IDProperty() - : task_id( -1)//id_counter().fetch_add( 1, std::memory_order_seq_cst ) ) + struct IDProperty { - } + private: + static std::atomic_int& id_counter() + { + static std::atomic_int x; + return x; + } - IDProperty( IDProperty && other ) : task_id( other.task_id ) - { - } - IDProperty( IDProperty const & other ) : task_id( other.task_id ) - { - } + public: + TaskID task_id; - IDProperty & operator=(IDProperty const & other) - { - return *this; - } + IDProperty() : task_id(-1) // id_counter().fetch_add( 1, std::memory_order_seq_cst ) ) + { + } - template < typename PropertiesBuilder > - struct Builder - { - PropertiesBuilder & b; + IDProperty(IDProperty&& other) : task_id(other.task_id) + { + } - Builder( PropertiesBuilder & b ) - : b(b) + IDProperty(IDProperty const& other) : task_id(other.task_id) { } - void init_id() + IDProperty& operator=(IDProperty const& other) { - b.task->task_id = id_counter().fetch_add( 1, std::memory_order_seq_cst ); + return *this; } - }; - struct Patch - { - template + template struct Builder { - Builder( PatchBuilder & ) {} + PropertiesBuilder& b; + + Builder(PropertiesBuilder& b) : b(b) + { + } + + void init_id() + { + b.task->task_id = id_counter().fetch_add(1, std::memory_order_seq_cst); + } }; - }; - void apply_patch( Patch const & ) {}; -}; + struct Patch + { + template + struct Builder + { + Builder(PatchBuilder&) + { + } + }; + }; + + void apply_patch(Patch const&){}; + }; } // namespace redGrapes -template <> -struct fmt::formatter< redGrapes::IDProperty > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::IDProperty const & id_prop, - FormatContext & ctx - ) + template + auto format(redGrapes::IDProperty const& id_prop, FormatContext& ctx) { - return format_to( - ctx.out(), - "\"id\" : {}", - id_prop.task_id - ); + return format_to(ctx.out(), "\"id\" : {}", id_prop.task_id); } }; - diff --git a/redGrapes/task/property/inherit.hpp b/redGrapes/task/property/inherit.hpp index d7374d1d..fc277b4d 100644 --- a/redGrapes/task/property/inherit.hpp +++ b/redGrapes/task/property/inherit.hpp @@ -11,229 +11,198 @@ #pragma once -#include -#include #include -namespace redGrapes -{ +#include -struct Task; +#include -template < - typename T_Head, - typename... T_Tail -> -struct TaskPropertiesInherit - : T_Head - , TaskPropertiesInherit< T_Tail ... > +namespace redGrapes { - template < typename B > - struct Builder - : T_Head::template Builder< B > - , TaskPropertiesInherit< T_Tail ... >::template Builder< B > - { - Builder( B & b ) - : T_Head::template Builder< B >{ b } - , TaskPropertiesInherit< T_Tail ... >::template Builder< B >( b ) - {} - }; - struct Patch - : T_Head::Patch - , TaskPropertiesInherit< T_Tail ... >::Patch + struct Task; + + template + struct TaskPropertiesInherit + : T_Head + , TaskPropertiesInherit { - template < typename PatchBuilder > + template struct Builder - : T_Head::Patch::template Builder< PatchBuilder > - , TaskPropertiesInherit< T_Tail ... >::Patch::template Builder< PatchBuilder > + : T_Head::template Builder + , TaskPropertiesInherit::template Builder { - Builder( PatchBuilder & p ) - : T_Head::Patch::template Builder< PatchBuilder >{ p } - , TaskPropertiesInherit< T_Tail ... >::Patch::template Builder< PatchBuilder >( p ) - {} + Builder(B& b) : T_Head::template Builder{b}, TaskPropertiesInherit::template Builder(b) + { + } }; - }; - - void apply_patch( Patch const & patch ) - { - T_Head::apply_patch( patch ); - TaskPropertiesInherit< T_Tail ... >::apply_patch( patch ); - } -}; -struct PropEnd_t {}; + struct Patch + : T_Head::Patch + , TaskPropertiesInherit::Patch + { + template + struct Builder + : T_Head::Patch::template Builder + , TaskPropertiesInherit::Patch::template Builder + { + Builder(PatchBuilder& p) + : T_Head::Patch::template Builder{p} + , TaskPropertiesInherit::Patch::template Builder(p) + { + } + }; + }; -template<> -struct TaskPropertiesInherit< PropEnd_t > -{ - template < typename PropertiesBuilder > - struct Builder + void apply_patch(Patch const& patch) + { + T_Head::apply_patch(patch); + TaskPropertiesInherit::apply_patch(patch); + } + }; + + struct PropEnd_t { - Builder( PropertiesBuilder & ) {} }; - struct Patch + template<> + struct TaskPropertiesInherit { - template < typename PatchBuilder > + template struct Builder { - Builder( PatchBuilder & ) {} - }; - }; - - void apply_patch( Patch const & ) {} -}; + Builder(PropertiesBuilder&) + { + } + }; -template < typename... Policies > -struct TaskProperties1 - : public TaskPropertiesInherit< Policies..., PropEnd_t > -{ - template < typename B > - struct Builder - : TaskPropertiesInherit< Policies..., PropEnd_t >::template Builder< B > - { - Builder( B & b ) - : TaskPropertiesInherit< Policies..., PropEnd_t >::template Builder< B >( b ) - {} + struct Patch + { + template + struct Builder + { + Builder(PatchBuilder&) + { + } + }; + }; - template < typename T > - inline void add( T const & obj ) + void apply_patch(Patch const&) { - trait::BuildProperties::build( *this, obj ); } }; - struct Patch - : TaskPropertiesInherit< Policies..., PropEnd_t >::Patch + template + struct TaskProperties1 : public TaskPropertiesInherit { - struct Builder - : TaskPropertiesInherit< Policies..., PropEnd_t >::Patch::template Builder< Builder > + template + struct Builder : TaskPropertiesInherit::template Builder { - Patch patch; - - Builder() - : TaskPropertiesInherit< Policies..., PropEnd_t >::Patch::template Builder< Builder >( *this ) - {} - - Builder( Builder const & b ) - : patch( b.patch ) - , TaskPropertiesInherit< Policies..., PropEnd_t >::Patch::template Builder< Builder >( *this ) - {} + Builder(B& b) : TaskPropertiesInherit::template Builder(b) + { + } - operator Patch () const + template + inline void add(T const& obj) { - return patch; + trait::BuildProperties::build(*this, obj); } }; + + struct Patch : TaskPropertiesInherit::Patch + { + struct Builder : TaskPropertiesInherit::Patch::template Builder + { + Patch patch; + + Builder() : TaskPropertiesInherit::Patch::template Builder(*this) + { + } + + Builder(Builder const& b) + : patch(b.patch) + , TaskPropertiesInherit::Patch::template Builder(*this) + { + } + + operator Patch() const + { + return patch; + } + }; + }; + + void apply_patch(Patch const& patch) + { + TaskPropertiesInherit::apply_patch(patch); + } }; - - void apply_patch( Patch const & patch ) - { - TaskPropertiesInherit< Policies..., PropEnd_t >::apply_patch( patch ); - } -}; } // namespace redGrapes -template <> -struct fmt::formatter< - redGrapes::TaskPropertiesInherit< redGrapes::PropEnd_t > -> +template<> +struct fmt::formatter> { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - - template < typename FormatContext > - auto format( - redGrapes::TaskPropertiesInherit< redGrapes::PropEnd_t > const & prop, - FormatContext & ctx - ) + + template + auto format(redGrapes::TaskPropertiesInherit const& prop, FormatContext& ctx) { return ctx.out(); } }; -template < - typename T_Head -> -struct fmt::formatter< - redGrapes::TaskPropertiesInherit< T_Head, redGrapes::PropEnd_t > -> +template +struct fmt::formatter> { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::TaskPropertiesInherit< T_Head, redGrapes::PropEnd_t > const & prop, - FormatContext & ctx - ) + template + auto format(redGrapes::TaskPropertiesInherit const& prop, FormatContext& ctx) { - return fmt::format_to( - ctx.out(), - "{}", - (T_Head const &) prop - ); + return fmt::format_to(ctx.out(), "{}", (T_Head const&) prop); } }; -template < - typename T_Head, - typename... T_Tail -> -struct fmt::formatter< - redGrapes::TaskPropertiesInherit< T_Head, T_Tail... > -> +template +struct fmt::formatter> { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::TaskPropertiesInherit< T_Head, T_Tail... > const & prop, - FormatContext & ctx - ) + template + auto format(redGrapes::TaskPropertiesInherit const& prop, FormatContext& ctx) { return fmt::format_to( - ctx.out(), - "{}, {}", - (T_Head const &) prop, - (redGrapes::TaskPropertiesInherit< T_Tail... > const &) prop - ); + ctx.out(), + "{}, {}", + (T_Head const&) prop, + (redGrapes::TaskPropertiesInherit const&) prop); } - }; -template < - typename... Policies -> -struct fmt::formatter< - redGrapes::TaskProperties1< Policies... > -> +template +struct fmt::formatter> { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::TaskProperties1< Policies... > const & prop, - FormatContext & ctx - ) + template + auto format(redGrapes::TaskProperties1 const& prop, FormatContext& ctx) { return fmt::format_to( - ctx.out(), - "{{ {} }}", - ( typename redGrapes::TaskPropertiesInherit< Policies..., redGrapes::PropEnd_t > const & ) prop - ); + ctx.out(), + "{{ {} }}", + (typename redGrapes::TaskPropertiesInherit const&) prop); } }; - diff --git a/redGrapes/task/property/label.hpp b/redGrapes/task/property/label.hpp index b0729ff0..2c046585 100644 --- a/redGrapes/task/property/label.hpp +++ b/redGrapes/task/property/label.hpp @@ -11,68 +11,66 @@ #pragma once -#include #include + #include -namespace redGrapes -{ +#include -struct LabelProperty +namespace redGrapes { - using string = std::basic_string< char, std::char_traits, memory::StdAllocator>; - string label; - - template < typename TaskBuilder > - struct Builder + struct LabelProperty { - TaskBuilder & builder; + using string = std::basic_string, memory::StdAllocator>; - Builder( TaskBuilder & builder ) - : builder(builder) - {} + string label; - TaskBuilder & label( string const & l ) + template + struct Builder { - builder.task->label = l; - return builder; - } - }; + TaskBuilder& builder; - struct Patch - { - template - struct Builder + Builder(TaskBuilder& builder) : builder(builder) + { + } + + TaskBuilder& label(string const& l) + { + builder.task->label = l; + return builder; + } + }; + + struct Patch { - Builder( PatchBuilder & ) {} + template + struct Builder + { + Builder(PatchBuilder&) + { + } + }; }; - }; - void apply_patch( Patch const & ) {} -}; + void apply_patch(Patch const&) + { + } + }; } // namespace redGrapes -template <> -struct fmt::formatter< redGrapes::LabelProperty > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::LabelProperty const & label_prop, - FormatContext & ctx - ) + template + auto format(redGrapes::LabelProperty const& label_prop, FormatContext& ctx) { - return format_to( - ctx.out(), - "\"label\" : \"{}\"", - label_prop.label - ); + return format_to(ctx.out(), "\"label\" : \"{}\"", label_prop.label); } }; - diff --git a/redGrapes/task/property/queue.hpp b/redGrapes/task/property/queue.hpp index 6b4ab8bf..d69d880a 100644 --- a/redGrapes/task/property/queue.hpp +++ b/redGrapes/task/property/queue.hpp @@ -10,54 +10,49 @@ namespace redGrapes { -struct Task; + struct Task; -struct QueueProperty -{ - //Task volatile * volatile next; - - template < typename PropertiesBuilder > - struct Builder + struct QueueProperty { - PropertiesBuilder & builder; - - Builder( PropertiesBuilder & b ) - : builder(b) - {} - }; + // Task volatile * volatile next; - struct Patch - { - template + template struct Builder { - Builder( PatchBuilder & ) {} + PropertiesBuilder& builder; + + Builder(PropertiesBuilder& b) : builder(b) + { + } }; - }; - void apply_patch( Patch const & ) {}; -}; + struct Patch + { + template + struct Builder + { + Builder(PatchBuilder&) + { + } + }; + }; + + void apply_patch(Patch const&){}; + }; } // namespace redGrapes -template <> -struct fmt::formatter< redGrapes::QueueProperty > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::QueueProperty const & label_prop, - FormatContext & ctx - ) + template + auto format(redGrapes::QueueProperty const& label_prop, FormatContext& ctx) { - return format_to( - ctx.out(), - "" - ); + return format_to(ctx.out(), ""); } }; - diff --git a/redGrapes/task/property/resource.hpp b/redGrapes/task/property/resource.hpp index 3a19dff2..9a2f57e8 100644 --- a/redGrapes/task/property/resource.hpp +++ b/redGrapes/task/property/resource.hpp @@ -8,162 +8,163 @@ /** * @file redGrapes/property/resource.hpp */ - + #pragma once -#include -#include +#include +#include + #include #include -#include -#include + +#include +#include namespace redGrapes { -struct ResourceProperty : ResourceUser -{ - template < typename PropertiesBuilder > - struct Builder + struct ResourceProperty : ResourceUser { - PropertiesBuilder & builder; - Builder( PropertiesBuilder & b ) - : builder( b ) - {} - - PropertiesBuilder & resources( std::initializer_list list ) - { - for( ResourceAccess const & ra : list ) - builder.task->access_list.push(ra); - builder.task->build_unique_resource_list(); - - return builder; - } - - inline PropertiesBuilder & add_resource( ResourceAccess access ) - { - (*builder.task) += access; - return builder; - } - }; - - struct Patch - { - template + template struct Builder { - PatchBuilder & builder; - Builder( PatchBuilder & b ) - : builder( b ) - {} + PropertiesBuilder& builder; - PatchBuilder add_resources( std::initializer_list list ) + Builder(PropertiesBuilder& b) : builder(b) { - Patch & p = builder.patch; - for( auto const & acc : list ) - p += acc; + } + + PropertiesBuilder& resources(std::initializer_list list) + { + for(ResourceAccess const& ra : list) + builder.task->access_list.push(ra); + builder.task->build_unique_resource_list(); + return builder; } - PatchBuilder remove_resources( std::initializer_list list ) + + inline PropertiesBuilder& add_resource(ResourceAccess access) { - Patch & p = builder.patch; - for( auto const & acc : list ) - p -= acc; + (*builder.task) += access; return builder; } }; - enum DiffType { ADD, REMOVE }; - std::list> diff; - - void operator+= (Patch const& other) + struct Patch { - this->diff.insert(std::end(this->diff), std::begin(other.diff), std::end(other.diff)); - } + template + struct Builder + { + PatchBuilder& builder; + + Builder(PatchBuilder& b) : builder(b) + { + } + + PatchBuilder add_resources(std::initializer_list list) + { + Patch& p = builder.patch; + for(auto const& acc : list) + p += acc; + return builder; + } + + PatchBuilder remove_resources(std::initializer_list list) + { + Patch& p = builder.patch; + for(auto const& acc : list) + p -= acc; + return builder; + } + }; + + enum DiffType + { + ADD, + REMOVE + }; + + std::list> diff; + + void operator+=(Patch const& other) + { + this->diff.insert(std::end(this->diff), std::begin(other.diff), std::end(other.diff)); + } - void operator+= (ResourceAccess const & ra) + void operator+=(ResourceAccess const& ra) + { + this->diff.push_back(std::make_pair(DiffType::ADD, ra)); + } + + void operator-=(ResourceAccess const& ra) + { + this->diff.push_back(std::make_pair(DiffType::REMOVE, ra)); + } + }; + + inline void operator+=(ResourceAccess const& ra) { - this->diff.push_back(std::make_pair(DiffType::ADD, ra)); + this->add_resource_access(ra); } - void operator-= (ResourceAccess const & ra) + inline void operator-=(ResourceAccess const& ra) { - this->diff.push_back(std::make_pair(DiffType::REMOVE, ra)); + this->rm_resource_access(ra); } - }; - inline void operator+= (ResourceAccess const & ra) - { - this->add_resource_access(ra); - } - - inline void operator-= (ResourceAccess const & ra) - { - this->rm_resource_access(ra); - } - - void apply_patch(Patch const & patch) - { - ResourceUser before( *this ); - - for( auto x : patch.diff ) + void apply_patch(Patch const& patch) { - switch(x.first) + ResourceUser before(*this); + + for(auto x : patch.diff) { - case Patch::DiffType::ADD: - (*this) += x.second; - break; - case Patch::DiffType::REMOVE: - (*this) -= x.second; - break; + switch(x.first) + { + case Patch::DiffType::ADD: + (*this) += x.second; + break; + case Patch::DiffType::REMOVE: + (*this) -= x.second; + break; + } } - } - if( ! before.is_superset_of(*this) ) - throw std::runtime_error("redGrapes: ResourceUserPolicy: updated access list is no subset!"); - } -}; + if(!before.is_superset_of(*this)) + throw std::runtime_error("redGrapes: ResourceUserPolicy: updated access list is no subset!"); + } + }; -struct ResourcePrecedencePolicy -{ - static bool is_serial(ResourceProperty const & a, ResourceProperty const & b) + struct ResourcePrecedencePolicy { - return redGrapes::ResourceUser::is_serial( a, b ); - } + static bool is_serial(ResourceProperty const& a, ResourceProperty const& b) + { + return redGrapes::ResourceUser::is_serial(a, b); + } - static void assert_superset(ResourceProperty const & super, ResourceProperty const & sub) - { - if(! redGrapes::ResourceUser::is_superset( super, sub )) + static void assert_superset(ResourceProperty const& super, ResourceProperty const& sub) { - auto msg = fmt::format("Not allowed: {} is no superset of {}\n", super, sub); - spdlog::error(msg); - throw std::runtime_error(msg); + if(!redGrapes::ResourceUser::is_superset(super, sub)) + { + auto msg = fmt::format("Not allowed: {} is no superset of {}\n", super, sub); + spdlog::error(msg); + throw std::runtime_error(msg); + } } - } -}; + }; } // namespace redGrapes - -template <> -struct fmt::formatter< redGrapes::ResourceProperty > +template<> +struct fmt::formatter { - constexpr auto parse( format_parse_context& ctx ) + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - template < typename FormatContext > - auto format( - redGrapes::ResourceProperty const & label_prop, - FormatContext & ctx - ) + template + auto format(redGrapes::ResourceProperty const& label_prop, FormatContext& ctx) { - return format_to( - ctx.out(), - "\"resources\" : {}", - ( redGrapes::ResourceUser const & ) label_prop - ); + return format_to(ctx.out(), "\"resources\" : {}", (redGrapes::ResourceUser const&) label_prop); } }; - diff --git a/redGrapes/task/property/trait.hpp b/redGrapes/task/property/trait.hpp index eaaf6d1a..292d7ffc 100644 --- a/redGrapes/task/property/trait.hpp +++ b/redGrapes/task/property/trait.hpp @@ -2,84 +2,77 @@ #pragma once -#include #include - #include -namespace redGrapes -{ -namespace trait -{ +#include -template < - typename T, - typename Sfinae = void -> -struct BuildProperties +namespace redGrapes { - template - static void build(Builder & builder, T const & t) + namespace trait { - spdlog::warn("trait `redGrapes::BuildProperties` is not implemented for {}", boost::core::demangle(typeid(T).name())); - } -}; -template < - typename T -> -struct BuildProperties< std::reference_wrapper< T > > -{ - template - inline static void build(Builder & builder, std::reference_wrapper< T > const & t) - { - builder.add( t.get() ); - } -}; + template + struct BuildProperties + { + template + static void build(Builder& builder, T const& t) + { + spdlog::warn( + "trait `redGrapes::BuildProperties` is not implemented for {}", + boost::core::demangle(typeid(T).name())); + } + }; -template < - typename T -> -struct BuildProperties< T & > -{ - template - inline static void build(Builder & builder, T const & t) - { - builder.add( t ); - } -}; + template + struct BuildProperties> + { + template + static inline void build(Builder& builder, std::reference_wrapper const& t) + { + builder.add(t.get()); + } + }; -template < - typename T -> -struct BuildProperties< T const & > -{ - template - inline static void build(Builder & builder, T const & t) - { - builder.add( t ); - } -}; + template + struct BuildProperties + { + template + static inline void build(Builder& builder, T const& t) + { + builder.add(t); + } + }; + template + struct BuildProperties + { + template + static inline void build(Builder& builder, T const& t) + { + builder.add(t); + } + }; -// to avoid warnings -template <> -struct BuildProperties< int > -{ - template - inline static void build(Builder & builder, int const & t) - {} -}; + // to avoid warnings + template<> + struct BuildProperties + { + template + static inline void build(Builder& builder, int const& t) + { + } + }; -template <> -struct BuildProperties< unsigned int > -{ - template - inline static void build(Builder & builder, unsigned int const & t) - {} -}; + template<> + struct BuildProperties + { + template + static inline void build(Builder& builder, unsigned int const& t) + { + } + }; -} // namespace trait + } // namespace trait } // namespace redGrapes - diff --git a/redGrapes/task/queue.cpp b/redGrapes/task/queue.cpp index 096a370e..7b148fa8 100644 --- a/redGrapes/task/queue.cpp +++ b/redGrapes/task/queue.cpp @@ -9,58 +9,58 @@ namespace redGrapes { -namespace task -{ - - Queue::Queue() - { - } -/* - inline void Queue::push(Task* item) + namespace task { - item->next = nullptr; - Task * n = nullptr; - while(! tail->next.compare_exchange_weak(n, item)); - if( tail ) - tail.load()->next = item; - tail = item; + Queue::Queue() + { + } - //std::lock_guard lock(m); + /* + inline void Queue::push(Task* item) + { + item->next = nullptr; + Task * n = nullptr; + while(! tail->next.compare_exchange_weak(n, item)); - item->next = nullptr; + if( tail ) + tail.load()->next = item; + tail = item; - if(tail) - while(!__sync_bool_compare_and_swap(&(tail->next), nullptr, item)) - break; + //std::lock_guard lock(m); - tail = item; + item->next = nullptr; - __sync_bool_compare_and_swap(&head, 0, item); + if(tail) + while(!__sync_bool_compare_and_swap(&(tail->next), nullptr, item)) + break; - SPDLOG_TRACE("push: head = {}, tail = {}", (void*) head, (void*) tail); - } + tail = item; - inline Task * Queue::pop() - { - std::lock_guard lock(m); + __sync_bool_compare_and_swap(&head, 0, item); - while(Task * volatile t = head) - if(__sync_bool_compare_and_swap(&head, t, t->next)) + SPDLOG_TRACE("push: head = {}, tail = {}", (void*) head, (void*) tail); + } + + inline Task * Queue::pop() { - SPDLOG_TRACE("queue pop: item={}, new head = {}", (void*) t, (void*) t->next); + std::lock_guard lock(m); - if(t->next == nullptr) - tail = nullptr; - else - t->next = nullptr; - return t; - } + while(Task * volatile t = head) + if(__sync_bool_compare_and_swap(&head, t, t->next)) + { + SPDLOG_TRACE("queue pop: item={}, new head = {}", (void*) t, (void*) t->next); - SPDLOG_TRACE("pop: head = {}, tail = {}", (void*) head, (void*) tail); -, return nullptr; - } -*/ -} -} + if(t->next == nullptr) + tail = nullptr; + else + t->next = nullptr; + return t; + } + SPDLOG_TRACE("pop: head = {}, tail = {}", (void*) head, (void*) tail); + , return nullptr; + } + */ + } // namespace task +} // namespace redGrapes diff --git a/redGrapes/task/queue.hpp b/redGrapes/task/queue.hpp index e02189d2..edb25e58 100644 --- a/redGrapes/task/queue.hpp +++ b/redGrapes/task/queue.hpp @@ -7,135 +7,142 @@ #pragma once -#include -#include #include -#include +#include #include -namespace redGrapes -{ - -struct Task; +#include -namespace task -{ +#include -// Default traits for the ConcurrentQueue. To change some of the -// traits without re-implementing all of them, inherit from this -// struct and shadow the declarations you wish to be different; -// since the traits are used as a template type parameter, the -// shadowed declarations will be used where defined, and the defaults -// otherwise. -struct TaskQueueTraits -{ - // General-purpose size type. std::size_t is strongly recommended. - typedef std::size_t size_t; - - // The type used for the enqueue and dequeue indices. Must be at least as - // large as size_t. Should be significantly larger than the number of elements - // you expect to hold at once, especially if you have a high turnover rate; - // for example, on 32-bit x86, if you expect to have over a hundred million - // elements or pump several million elements through your queue in a very - // short space of time, using a 32-bit type *may* trigger a race condition. - // A 64-bit int type is recommended in that case, and in practice will - // prevent a race condition no matter the usage of the queue. Note that - // whether the queue is lock-free with a 64-int type depends on the whether - // std::atomic is lock-free, which is platform-specific. - typedef std::size_t index_t; - - // Internally, all elements are enqueued and dequeued from multi-element - // blocks; this is the smallest controllable unit. If you expect few elements - // but many producers, a smaller block size should be favoured. For few producers - // and/or many elements, a larger block size is preferred. A sane default - // is provided. Must be a power of 2. - static const size_t BLOCK_SIZE = 64; - - // For explicit producers (i.e. when using a producer token), the block is - // checked for being empty by iterating through a list of flags, one per element. - // For large block sizes, this is too inefficient, and switching to an atomic - // counter-based approach is faster. The switch is made for block sizes strictly - // larger than this threshold. - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; - - // How many full blocks can be expected for a single explicit producer? This should - // reflect that number's maximum for optimal performance. Must be a power of 2. - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 128; - - // How many full blocks can be expected for a single implicit producer? This should - // reflect that number's maximum for optimal performance. Must be a power of 2. - static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 128; - - // The initial size of the hash table mapping thread IDs to implicit producers. - // Note that the hash is resized every time it becomes half full. - // Must be a power of two, and either 0 or at least 1. If 0, implicit production - // (using the enqueue methods without an explicit producer token) is disabled. - static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; - - // Controls the number of items that an explicit consumer (i.e. one with a token) - // must consume before it causes all consumers to rotate and move on to the next - // internal queue. - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; - - // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. - // Enqueue operations that would cause this limit to be surpassed will fail. Note - // that this limit is enforced at the block level (for performance reasons), i.e. - // it's rounded up to the nearest block size. - static const size_t MAX_SUBQUEUE_SIZE = moodycamel::details::const_numeric_max::value; - - // The number of times to spin before sleeping when waiting on a semaphore. - // Recommended values are on the order of 1000-10000 unless the number of - // consumer threads exceeds the number of idle cores (in which case try 0-100). - // Only affects instances of the BlockingConcurrentQueue. - static const int MAX_SEMA_SPINS = 10000; - - // Whether to recycle dynamically-allocated blocks into an internal free list or - // not. If false, only pre-allocated blocks (controlled by the constructor - // arguments) will be recycled, and all others will be `free`d back to the heap. - // Note that blocks consumed by explicit producers are only freed on destruction - // of the queue (not following destruction of the token) regardless of this trait. - static const bool RECYCLE_ALLOCATED_BLOCKS = false; - - static inline void* malloc(size_t size) { -// return std::malloc(size); - return (void*) memory::Allocator().allocate( size ).ptr; - } - static inline void free(void* ptr) { -// std::free( ptr ); - memory::Allocator().deallocate( memory::Block{ (uintptr_t)ptr, 1 } ); - } -}; - -struct Queue +namespace redGrapes { - /* - std::atomic< Task * > head; - std::atomic< Task * > tail; - - std::mutex m; -*/ - moodycamel::ConcurrentQueue< Task*/*, TaskQueueTraits */> cq; - Queue(); - Queue( unsigned capacity ) :cq(capacity) { - } + struct Task; - inline void push(Task * task) + namespace task { - TRACE_EVENT("Task", "TaskQueue::push()"); - this->cq.enqueue(task); - } - inline Task * pop() - { - TRACE_EVENT("Task", "TaskQueue::pop()"); - Task * t = nullptr; - if( this->cq.try_dequeue( t ) ) - return t; - else - return nullptr; - } -}; - -} -} + // Default traits for the ConcurrentQueue. To change some of the + // traits without re-implementing all of them, inherit from this + // struct and shadow the declarations you wish to be different; + // since the traits are used as a template type parameter, the + // shadowed declarations will be used where defined, and the defaults + // otherwise. + struct TaskQueueTraits + { + // General-purpose size type. std::size_t is strongly recommended. + typedef std::size_t size_t; + + // The type used for the enqueue and dequeue indices. Must be at least as + // large as size_t. Should be significantly larger than the number of elements + // you expect to hold at once, especially if you have a high turnover rate; + // for example, on 32-bit x86, if you expect to have over a hundred million + // elements or pump several million elements through your queue in a very + // short space of time, using a 32-bit type *may* trigger a race condition. + // A 64-bit int type is recommended in that case, and in practice will + // prevent a race condition no matter the usage of the queue. Note that + // whether the queue is lock-free with a 64-int type depends on the whether + // std::atomic is lock-free, which is platform-specific. + typedef std::size_t index_t; + + // Internally, all elements are enqueued and dequeued from multi-element + // blocks; this is the smallest controllable unit. If you expect few elements + // but many producers, a smaller block size should be favoured. For few producers + // and/or many elements, a larger block size is preferred. A sane default + // is provided. Must be a power of 2. + static const size_t BLOCK_SIZE = 64; + + // For explicit producers (i.e. when using a producer token), the block is + // checked for being empty by iterating through a list of flags, one per element. + // For large block sizes, this is too inefficient, and switching to an atomic + // counter-based approach is faster. The switch is made for block sizes strictly + // larger than this threshold. + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; + + // How many full blocks can be expected for a single explicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 128; + + // How many full blocks can be expected for a single implicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 128; + + // The initial size of the hash table mapping thread IDs to implicit producers. + // Note that the hash is resized every time it becomes half full. + // Must be a power of two, and either 0 or at least 1. If 0, implicit production + // (using the enqueue methods without an explicit producer token) is disabled. + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; + + // Controls the number of items that an explicit consumer (i.e. one with a token) + // must consume before it causes all consumers to rotate and move on to the next + // internal queue. + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; + + // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. + // Enqueue operations that would cause this limit to be surpassed will fail. Note + // that this limit is enforced at the block level (for performance reasons), i.e. + // it's rounded up to the nearest block size. + static const size_t MAX_SUBQUEUE_SIZE = moodycamel::details::const_numeric_max::value; + + // The number of times to spin before sleeping when waiting on a semaphore. + // Recommended values are on the order of 1000-10000 unless the number of + // consumer threads exceeds the number of idle cores (in which case try 0-100). + // Only affects instances of the BlockingConcurrentQueue. + static int const MAX_SEMA_SPINS = 10000; + + // Whether to recycle dynamically-allocated blocks into an internal free list or + // not. If false, only pre-allocated blocks (controlled by the constructor + // arguments) will be recycled, and all others will be `free`d back to the heap. + // Note that blocks consumed by explicit producers are only freed on destruction + // of the queue (not following destruction of the token) regardless of this trait. + static bool const RECYCLE_ALLOCATED_BLOCKS = false; + + static inline void* malloc(size_t size) + { + // return std::malloc(size); + return (void*) memory::Allocator().allocate(size).ptr; + } + + static inline void free(void* ptr) + { + // std::free( ptr ); + memory::Allocator().deallocate(memory::Block{(uintptr_t) ptr, 1}); + } + }; + + struct Queue + { + /* + std::atomic< Task * > head; + std::atomic< Task * > tail; + + std::mutex m; + */ + moodycamel::ConcurrentQueue cq; + + Queue(); + + Queue(unsigned capacity) : cq(capacity) + { + } + + inline void push(Task* task) + { + TRACE_EVENT("Task", "TaskQueue::push()"); + this->cq.enqueue(task); + } + + inline Task* pop() + { + TRACE_EVENT("Task", "TaskQueue::pop()"); + Task* t = nullptr; + if(this->cq.try_dequeue(t)) + return t; + else + return nullptr; + } + }; + + } // namespace task +} // namespace redGrapes diff --git a/redGrapes/task/task.hpp b/redGrapes/task/task.hpp index 44338bc9..a6038877 100644 --- a/redGrapes/task/task.hpp +++ b/redGrapes/task/task.hpp @@ -6,15 +6,15 @@ */ #pragma once -#include -#include -#include -#include +#include #include -#include +#include #include -#include -#include +#include +#include +#include + +#include // defines REDGRAPES_TASK_PROPERTIES #include @@ -22,86 +22,95 @@ namespace redGrapes { -using TaskProperties = TaskProperties1< - IDProperty, - ResourceProperty, - QueueProperty, - GraphProperty + using TaskProperties = TaskProperties1< + IDProperty, + ResourceProperty, + QueueProperty, + GraphProperty #ifdef REDGRAPES_TASK_PROPERTIES - , REDGRAPES_TASK_PROPERTIES + , + REDGRAPES_TASK_PROPERTIES #endif ->; + >; -struct Task : - TaskBase, - TaskProperties -{ - virtual ~Task() {} - - unsigned arena_id; - std::atomic_int removal_countdown; - - Task() - : removal_countdown(2) - {} - - virtual void * get_result_data() + struct Task + : TaskBase + , TaskProperties { - return nullptr; - } -}; + virtual ~Task() + { + } -// TODO: fuse ResultTask and FunTask into one template -// ---> removes one layer of virtual function calls + unsigned arena_id; + std::atomic_int removal_countdown; -template < typename Result > -struct ResultTask : Task -{ - Result result_data; + Task() : removal_countdown(2) + { + } + + virtual void* get_result_data() + { + return nullptr; + } + }; - virtual ~ResultTask() {} + // TODO: fuse ResultTask and FunTask into one template + // ---> removes one layer of virtual function calls - virtual void * get_result_data() + template + struct ResultTask : Task { - return &result_data; - } + Result result_data; - virtual Result run_result() = 0; + virtual ~ResultTask() + { + } - void run() final - { - result_data = run_result(); - get_result_set_event().notify(); // result event now ready - } -}; + virtual void* get_result_data() + { + return &result_data; + } -template<> -struct ResultTask : Task -{ - virtual ~ResultTask() {} - - virtual void run_result() {} + virtual Result run_result() = 0; + + void run() final + { + result_data = run_result(); + get_result_set_event().notify(); // result event now ready + } + }; - void run() final + template<> + struct ResultTask : Task { - run_result(); - get_result_set_event().notify(); - } -}; - -template< typename F > -struct FunTask - : ResultTask< typename std::result_of::type > -{ - std::optional< F > impl; + virtual ~ResultTask() + { + } + + virtual void run_result() + { + } + + void run() final + { + run_result(); + get_result_set_event().notify(); + } + }; + + template + struct FunTask : ResultTask::type> + { + std::optional impl; - virtual ~FunTask() {} + virtual ~FunTask() + { + } - typename std::result_of::type run_result() - { - return (*this->impl)(); - } -}; + typename std::result_of::type run_result() + { + return (*this->impl)(); + } + }; } // namespace redGrapes - diff --git a/redGrapes/task/task_base.hpp b/redGrapes/task/task_base.hpp index 5c8aea90..e9f40e5f 100644 --- a/redGrapes/task/task_base.hpp +++ b/redGrapes/task/task_base.hpp @@ -7,94 +7,98 @@ #pragma once -#include -#include -#include -#include +#include #include -#include -#include +#include +#include +#include +#include namespace redGrapes { -struct TaskBase -{ - bool finished; - bool enable_stack_switching; - - virtual ~TaskBase() {} - TaskBase() : finished(false), enable_stack_switching(false) {} - - virtual void run() = 0; - - std::optional< scheduler::EventPtr > operator() () + struct TaskBase { - if( enable_stack_switching ) + bool finished; + bool enable_stack_switching; + + virtual ~TaskBase() { - if(!resume_cont) - resume_cont = boost::context::callcc( - [this](boost::context::continuation&& c) - { - { - std::lock_guard< std::mutex > lock( yield_cont_mutex ); - this->yield_cont = std::move(c); - } - - this->run(); - this->event = std::nullopt; - - std::optional< boost::context::continuation > yield_cont; - - { - std::lock_guard< std::mutex > lock( yield_cont_mutex ); - this->yield_cont.swap(yield_cont); - } - - return std::move(*yield_cont); - }); - else - resume_cont = resume_cont->resume(); } - else + + TaskBase() : finished(false), enable_stack_switching(false) { - this->run(); } - return event; - } + virtual void run() = 0; - void yield( scheduler::EventPtr event ) - { - this->event = event; - - if( enable_stack_switching ) + std::optional operator()() { - std::optional< boost::context::continuation > old_yield; - this->yield_cont.swap( old_yield ); - - boost::context::continuation new_yield = old_yield->resume(); + if(enable_stack_switching) + { + if(!resume_cont) + resume_cont = boost::context::callcc( + [this](boost::context::continuation&& c) + { + { + std::lock_guard lock(yield_cont_mutex); + this->yield_cont = std::move(c); + } + + this->run(); + this->event = std::nullopt; + + std::optional yield_cont; + + { + std::lock_guard lock(yield_cont_mutex); + this->yield_cont.swap(yield_cont); + } + + return std::move(*yield_cont); + }); + else + resume_cont = resume_cont->resume(); + } + else + { + this->run(); + } - std::lock_guard< std::mutex > lock( yield_cont_mutex ); - if( ! yield_cont ) - yield_cont = std::move(new_yield); - // else: yield_cont already been set by another thread running this task + return event; } - else + + void yield(scheduler::EventPtr event) { - spdlog::error("called yield in task without stack switching!"); + this->event = event; + + if(enable_stack_switching) + { + std::optional old_yield; + this->yield_cont.swap(old_yield); + + boost::context::continuation new_yield = old_yield->resume(); + + std::lock_guard lock(yield_cont_mutex); + if(!yield_cont) + yield_cont = std::move(new_yield); + // else: yield_cont already been set by another thread running this task + } + else + { + spdlog::error("called yield in task without stack switching!"); + } } - } - std::optional< scheduler::EventPtr > event; + std::optional event; -private: - std::mutex yield_cont_mutex; + private: + std::mutex yield_cont_mutex; - std::optional< boost::context::continuation > yield_cont; - std::optional< boost::context::continuation > resume_cont; -}; + std::optional yield_cont; + std::optional resume_cont; + }; } // namespace redGrapes diff --git a/redGrapes/task/task_builder.hpp b/redGrapes/task/task_builder.hpp index cc93f853..6f6af0c4 100644 --- a/redGrapes/task/task_builder.hpp +++ b/redGrapes/task/task_builder.hpp @@ -6,138 +6,135 @@ */ #pragma once +#include #include +#include #include #include #include -#include + #include -#include -#include +#include namespace redGrapes { -/* HELPERS */ + /* HELPERS */ -template -static inline void pass(Args&&...) -{ -} - -template -struct PropBuildHelper -{ - typename TaskProperties::Builder& builder; - - template - inline int build(T const& x) + template + static inline void pass(Args&&...) { - trait::BuildProperties::build(builder, x); - return 0; } - void foo() + template + struct PropBuildHelper { - } -}; + typename TaskProperties::Builder& builder; -/* TASK BUILDER */ + template + inline int build(T const& x) + { + trait::BuildProperties::build(builder, x); + return 0; + } -template < typename Callable, typename... Args > -struct TaskBuilder - : TaskProperties::Builder< TaskBuilder > -{ - struct BindArgs - { - inline auto operator() ( Callable&& f, Args&&... args ) + void foo() { - return std::move([f=std::move(f), args...]() mutable { - return f(std::forward(args)...); - }); } }; - using Impl = typename std::result_of< BindArgs(Callable, Args...) >::type; - using Result = typename std::result_of< Callable(Args...)>::type; - - std::shared_ptr< TaskSpace > space; - FunTask< Impl > * task; + /* TASK BUILDER */ - TaskBuilder( Callable&& f, Args&&... args ) - : TaskProperties::Builder< TaskBuilder >( *this ) - , space( current_task_space() ) + template + struct TaskBuilder : TaskProperties::Builder> { - // allocate - redGrapes::memory::Allocator alloc; - memory::Block blk = alloc.allocate( sizeof(FunTask) ); - task = (FunTask*)blk.ptr; + struct BindArgs + { + inline auto operator()(Callable&& f, Args&&... args) + { + return std::move([f = std::move(f), args...]() mutable { return f(std::forward(args)...); }); + } + }; - if( ! task ) - throw std::runtime_error("out of memory"); + using Impl = typename std::result_of::type; + using Result = typename std::result_of::type; - // construct task in-place - new (task) FunTask< Impl >(); + std::shared_ptr space; + FunTask* task; - task->arena_id = SingletonContext::get().current_arena; + TaskBuilder(Callable&& f, Args&&... args) + : TaskProperties::Builder(*this) + , space(current_task_space()) + { + // allocate + redGrapes::memory::Allocator alloc; + memory::Block blk = alloc.allocate(sizeof(FunTask)); + task = (FunTask*) blk.ptr; - // init properties from args - PropBuildHelper build_helper{ *this }; - pass(build_helper.template build(std::forward(args))...); - build_helper.foo(); + if(!task) + throw std::runtime_error("out of memory"); - // init id - this->init_id(); + // construct task in-place + new(task) FunTask(); - // set impl - task->impl.emplace(BindArgs{}( std::move(f), std::forward(args)... )); - } + task->arena_id = SingletonContext::get().current_arena; - TaskBuilder( TaskBuilder & other ) - : TaskProperties::Builder< TaskBuilder >( *this ) - , space( other.space ) - , task( other.task ) - { - other.task = nullptr; - } + // init properties from args + PropBuildHelper build_helper{*this}; + pass(build_helper.template build(std::forward(args))...); + build_helper.foo(); - TaskBuilder( TaskBuilder && other ) - : TaskProperties::Builder< TaskBuilder >( *this ) - , space( std::move(other.space) ) - , task( std::move(other.task) ) - { - other.task = nullptr; - } + // init id + this->init_id(); - ~TaskBuilder() - { - if( task ) - submit(); - } + // set impl + task->impl.emplace(BindArgs{}(std::move(f), std::forward(args)...)); + } - TaskBuilder & enable_stack_switching() - { - task->enable_stack_switching = true; - return *this; - } + TaskBuilder(TaskBuilder& other) + : TaskProperties::Builder(*this) + , space(other.space) + , task(other.task) + { + other.task = nullptr; + } - auto submit() - { - Task * t = task; - task = nullptr; + TaskBuilder(TaskBuilder&& other) + : TaskProperties::Builder(*this) + , space(std::move(other.space)) + , task(std::move(other.task)) + { + other.task = nullptr; + } - SPDLOG_TRACE("submit task {}", (TaskProperties const &)*t); - space->submit( t ); - - return std::move(Future( *t )); - } + ~TaskBuilder() + { + if(task) + submit(); + } - auto get() - { - return submit().get(); - } -}; + TaskBuilder& enable_stack_switching() + { + task->enable_stack_switching = true; + return *this; + } -} // namespace redGrapes + auto submit() + { + Task* t = task; + task = nullptr; + + SPDLOG_TRACE("submit task {}", (TaskProperties const&) *t); + space->submit(t); + + return std::move(Future(*t)); + } + + auto get() + { + return submit().get(); + } + }; +} // namespace redGrapes diff --git a/redGrapes/task/task_space.cpp b/redGrapes/task/task_space.cpp index e1b6b65d..c889d539 100644 --- a/redGrapes/task/task_space.cpp +++ b/redGrapes/task/task_space.cpp @@ -5,14 +5,14 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include +#include #include -#include +#include +#include #include #include -#include -#include -#include -#include +#include namespace redGrapes { @@ -20,17 +20,13 @@ namespace redGrapes { } - TaskSpace::TaskSpace() - : depth(0) - , parent(nullptr) + TaskSpace::TaskSpace() : depth(0), parent(nullptr) { task_count = 0; } // sub space - TaskSpace::TaskSpace(Task * parent) - : depth(parent->space->depth + 1) - , parent(parent) + TaskSpace::TaskSpace(Task* parent) : depth(parent->space->depth + 1), parent(parent) { task_count = 0; } @@ -51,7 +47,7 @@ namespace redGrapes return tc == 0; } - void TaskSpace::free_task( Task * task ) + void TaskSpace::free_task(Task* task) { TRACE_EVENT("TaskSpace", "free_task()"); unsigned count = task_count.fetch_sub(1) - 1; @@ -60,33 +56,34 @@ namespace redGrapes task->~Task(); // FIXME: len of the Block is not correct since FunTask object is bigger than sizeof(Task) - SingletonContext::get().worker_pool->get_worker( arena_id ).alloc.deallocate( memory::Block{ (uintptr_t)task, sizeof(Task) } ); + SingletonContext::get().worker_pool->get_worker(arena_id).alloc.deallocate( + memory::Block{(uintptr_t) task, sizeof(Task)}); // TODO: implement this using post-event of root-task? // - event already has in_edge count // -> never have current_task = nullptr - //spdlog::info("kill task... {} remaining", count); - if( count == 0 ) + // spdlog::info("kill task... {} remaining", count); + if(count == 0) SingletonContext::get().scheduler->wake_all(); } - void TaskSpace::submit( Task * task ) + void TaskSpace::submit(Task* task) { TRACE_EVENT("TaskSpace", "submit()"); task->space = shared_from_this(); task->task = task; - ++ task_count; + ++task_count; - if( parent ) - assert( this->is_superset(*parent, *task) ); + if(parent) + assert(this->is_superset(*parent, *task)); - for( auto r = task->unique_resources.rbegin(); r != task->unique_resources.rend(); ++r ) + for(auto r = task->unique_resources.rbegin(); r != task->unique_resources.rend(); ++r) { - r->task_entry = r->resource->users.push( task ); + r->task_entry = r->resource->users.push(task); } - SingletonContext::get().scheduler->emplace_task( *task ); + SingletonContext::get().scheduler->emplace_task(*task); } } // namespace redGrapes diff --git a/redGrapes/task/task_space.hpp b/redGrapes/task/task_space.hpp index 8b134f73..71e7c987 100644 --- a/redGrapes/task/task_space.hpp +++ b/redGrapes/task/task_space.hpp @@ -7,48 +7,48 @@ #pragma once -#include -#include -#include - +#include #include -#include #include -#include +#include + +#include +#include +#include namespace redGrapes { -/*! TaskSpace handles sub-taskspaces of child tasks - */ -struct TaskSpace : std::enable_shared_from_this -{ - std::atomic< unsigned long > task_count; + /*! TaskSpace handles sub-taskspaces of child tasks + */ + struct TaskSpace : std::enable_shared_from_this + { + std::atomic task_count; + + unsigned depth; + Task* parent; - unsigned depth; - Task * parent; + std::shared_mutex active_child_spaces_mutex; + std::vector> active_child_spaces; - std::shared_mutex active_child_spaces_mutex; - std::vector< std::shared_ptr< TaskSpace > > active_child_spaces; + virtual ~TaskSpace(); - virtual ~TaskSpace(); - - // top space - TaskSpace(); + // top space + TaskSpace(); - // sub space - TaskSpace( Task * parent ); + // sub space + TaskSpace(Task* parent); - virtual bool is_serial( Task& a, Task& b ); - virtual bool is_superset( Task& a, Task& b ); + virtual bool is_serial(Task& a, Task& b); + virtual bool is_superset(Task& a, Task& b); - // add a new task to the task-space - void submit( Task * task ); + // add a new task to the task-space + void submit(Task* task); - // remove task from task-space - void free_task( Task * task ); + // remove task from task-space + void free_task(Task* task); - bool empty() const; -}; + bool empty() const; + }; } // namespace redGrapes diff --git a/redGrapes/util/atomic_list.hpp b/redGrapes/util/atomic_list.hpp index 37dbe98f..f67cc008 100644 --- a/redGrapes/util/atomic_list.hpp +++ b/redGrapes/util/atomic_list.hpp @@ -7,331 +7,315 @@ #pragma once +#include +#include + +#include +#include +#include + #include #include #include -#include #include #include -#include -#include - -#include - namespace redGrapes { -namespace memory -{ - -/* maintains a lockfree singly-linked list - * with the following allowed operations: - * - append new chunks at head - * - erase any chunk which is not current head - * - reversed iteration (starting at head) - * - * each chunk is managed through a `std::shared_ptr` which points to a - * contiguous block containing list-metadata, the chunk-control-object - * (`ChunkData`) and freely usable data. - * - * @tparam Item element type - * @tparam Allocator must satisfy `Allocator` concept - */ -template < - typename Item, - typename Allocator -> -struct AtomicList -{ -//private: - struct ItemControlBlock + namespace memory { - bool volatile deleted; - std::shared_ptr< ItemControlBlock > prev; - uintptr_t item_data_ptr; - - ItemControlBlock( memory::Block blk ) - : deleted( false ) - , item_data_ptr( blk.ptr ) - { - /* put Item at front and initialize it - * with the remaining memory region - */ - blk.ptr += sizeof(Item); - blk.len -= sizeof(Item); - new ( get() ) Item ( blk ); - } - ~ItemControlBlock() - { - get()->~Item(); - } - - /* flag this chunk as deleted and call ChunkData destructor - */ - void erase() - { - deleted = true; - } - - /* adjusts `prev` so that it points to a non-deleted chunk again - * this should free the shared_ptr of the original prev - * in case no iterators point to it + /* maintains a lockfree singly-linked list + * with the following allowed operations: + * - append new chunks at head + * - erase any chunk which is not current head + * - reversed iteration (starting at head) + * + * each chunk is managed through a `std::shared_ptr` which points to a + * contiguous block containing list-metadata, the chunk-control-object + * (`ChunkData`) and freely usable data. + * + * @tparam Item element type + * @tparam Allocator must satisfy `Allocator` concept */ - void skip_deleted_prev() + template + struct AtomicList { - std::shared_ptr p = std::atomic_load( &prev ); - while( p && p->deleted ) - p = std::atomic_load( &p->prev ); - - std::atomic_store( &prev, p ); - } - - Item * get() const - { - return (Item*)item_data_ptr; - } - }; - - Allocator alloc; - std::shared_ptr< ItemControlBlock > head; - size_t const chunk_capacity; - - /* keeps a single, predefined pointer - * and frees it on deallocate. - * used to spoof the allocated size to be bigger than requested. - */ - template - struct StaticAlloc - { - typedef T value_type; - - Allocator alloc; - T * ptr; - - StaticAlloc( Allocator alloc, size_t n_bytes ) - : alloc(alloc) - , ptr( (T*)alloc.allocate( n_bytes ) ) - {} - - template - constexpr StaticAlloc( StaticAlloc const & other ) noexcept - : alloc(other.alloc) - , ptr((T*)other.ptr) - {} - - T * allocate( size_t n ) noexcept - { - return ptr; - } - - void deallocate( T * p, std::size_t n ) noexcept - { - alloc.deallocate( Block{ .ptr=(uintptr_t)p, .len=sizeof(T)*n} ); - } - }; - -public: - AtomicList( Allocator && alloc, size_t chunk_capacity ) - : alloc( alloc ) - , head( nullptr ) - , chunk_capacity( chunk_capacity ) - { - } - - static constexpr size_t get_controlblock_size() - { - /* TODO: use sizeof( ...shared_ptr_inplace_something... ) - */ - size_t const shared_ptr_size = 512; - return sizeof(ItemControlBlock) + shared_ptr_size; - } - - constexpr size_t get_chunk_capacity() - { - return chunk_capacity; - } - - constexpr size_t get_chunk_allocsize() - { - return chunk_capacity + get_controlblock_size(); - } - - /* allocate a new item and add it to the list - * - * @{ - */ - auto allocate_item() - { - TRACE_EVENT("Allocator", "AtomicList::allocate_item()"); - - /* NOTE: we are relying on std::allocate_shared - * to do one *single* allocation which contains: - * - shared_ptr control block - * - chunk control block - * - chunk data - * whereby chunk data is not included by sizeof(ItemControlBlock), - * but reserved by StaticAlloc. - * This works because shared_ptr control block lies at lower address. - */ - StaticAlloc chunk_alloc( this->alloc, get_chunk_allocsize() ); - - // this block will contain the Item-data of ItemControlBlock - memory::Block blk{ - .ptr = (uintptr_t)chunk_alloc.ptr + get_controlblock_size(), - .len = chunk_capacity - get_controlblock_size() - }; - - return append_item( std::allocate_shared< ItemControlBlock >( chunk_alloc, blk ) ); - } + // private: + struct ItemControlBlock + { + bool volatile deleted; + std::shared_ptr prev; + uintptr_t item_data_ptr; + + ItemControlBlock(memory::Block blk) : deleted(false), item_data_ptr(blk.ptr) + { + /* put Item at front and initialize it + * with the remaining memory region + */ + blk.ptr += sizeof(Item); + blk.len -= sizeof(Item); + new(get()) Item(blk); + } + + ~ItemControlBlock() + { + get()->~Item(); + } + + /* flag this chunk as deleted and call ChunkData destructor + */ + void erase() + { + deleted = true; + } + + /* adjusts `prev` so that it points to a non-deleted chunk again + * this should free the shared_ptr of the original prev + * in case no iterators point to it + */ + void skip_deleted_prev() + { + std::shared_ptr p = std::atomic_load(&prev); + while(p && p->deleted) + p = std::atomic_load(&p->prev); + + std::atomic_store(&prev, p); + } + + Item* get() const + { + return (Item*) item_data_ptr; + } + }; + + Allocator alloc; + std::shared_ptr head; + size_t const chunk_capacity; + + /* keeps a single, predefined pointer + * and frees it on deallocate. + * used to spoof the allocated size to be bigger than requested. + */ + template + struct StaticAlloc + { + typedef T value_type; + + Allocator alloc; + T* ptr; + + StaticAlloc(Allocator alloc, size_t n_bytes) : alloc(alloc), ptr((T*) alloc.allocate(n_bytes)) + { + } + + template + constexpr StaticAlloc(StaticAlloc const& other) noexcept : alloc(other.alloc) + , ptr((T*) other.ptr) + { + } + + T* allocate(size_t n) noexcept + { + return ptr; + } + + void deallocate(T* p, std::size_t n) noexcept + { + alloc.deallocate(Block{.ptr = (uintptr_t) p, .len = sizeof(T) * n}); + } + }; + + public: + AtomicList(Allocator&& alloc, size_t chunk_capacity) + : alloc(alloc) + , head(nullptr) + , chunk_capacity(chunk_capacity) + { + } - /** allocate the first item if the list is empty - * - * If more than one thread tries to add the first item only one thread will successfully add an item. - */ - bool try_allocate_first_item() - { - TRACE_EVENT("Allocator", "AtomicList::allocate_first_item()"); - StaticAlloc chunk_alloc( this->alloc, get_chunk_allocsize() ); + static constexpr size_t get_controlblock_size() + { + /* TODO: use sizeof( ...shared_ptr_inplace_something... ) + */ + size_t const shared_ptr_size = 512; + return sizeof(ItemControlBlock) + shared_ptr_size; + } - // this block will contain the Item-data of ItemControlBlock - memory::Block blk{ - .ptr = (uintptr_t)chunk_alloc.ptr + get_controlblock_size(), - .len = chunk_capacity - get_controlblock_size() - }; + constexpr size_t get_chunk_capacity() + { + return chunk_capacity; + } - auto sharedChunk = std::allocate_shared< ItemControlBlock >( chunk_alloc, blk ); - return try_append_first_item( std::move(sharedChunk) ); - } - /** @} */ + constexpr size_t get_chunk_allocsize() + { + return chunk_capacity + get_controlblock_size(); + } - template < bool is_const = false > - struct BackwardIterator - { - std::shared_ptr< ItemControlBlock > c; + /* allocate a new item and add it to the list + * + * @{ + */ + auto allocate_item() + { + TRACE_EVENT("Allocator", "AtomicList::allocate_item()"); + + /* NOTE: we are relying on std::allocate_shared + * to do one *single* allocation which contains: + * - shared_ptr control block + * - chunk control block + * - chunk data + * whereby chunk data is not included by sizeof(ItemControlBlock), + * but reserved by StaticAlloc. + * This works because shared_ptr control block lies at lower address. + */ + StaticAlloc chunk_alloc(this->alloc, get_chunk_allocsize()); + + // this block will contain the Item-data of ItemControlBlock + memory::Block blk{ + .ptr = (uintptr_t) chunk_alloc.ptr + get_controlblock_size(), + .len = chunk_capacity - get_controlblock_size()}; + + return append_item(std::allocate_shared(chunk_alloc, blk)); + } - void erase() - { - c->erase(); - } + /** allocate the first item if the list is empty + * + * If more than one thread tries to add the first item only one thread will successfully add an item. + */ + bool try_allocate_first_item() + { + TRACE_EVENT("Allocator", "AtomicList::allocate_first_item()"); + StaticAlloc chunk_alloc(this->alloc, get_chunk_allocsize()); - bool operator!=(BackwardIterator const & other) const - { - return c != other.c; - } + // this block will contain the Item-data of ItemControlBlock + memory::Block blk{ + .ptr = (uintptr_t) chunk_alloc.ptr + get_controlblock_size(), + .len = chunk_capacity - get_controlblock_size()}; - operator bool() const - { - return (bool)c; - } - - typename std::conditional< - is_const, - Item const *, - Item * - >::type - operator->() const - { - return c->get(); - } - - typename std::conditional< - is_const, - Item const &, - Item & - >::type - operator*() const - { - return *c->get(); - } + auto sharedChunk = std::allocate_shared(chunk_alloc, blk); + return try_append_first_item(std::move(sharedChunk)); + } - void optimize() - { - if(c) - c->skip_deleted_prev(); - } + /** @} */ - BackwardIterator& operator++() - { - if( c ) + template + struct BackwardIterator { - c->skip_deleted_prev(); - c = c->prev; + std::shared_ptr c; + + void erase() + { + c->erase(); + } + + bool operator!=(BackwardIterator const& other) const + { + return c != other.c; + } + + operator bool() const + { + return (bool) c; + } + + typename std::conditional::type operator->() const + { + return c->get(); + } + + typename std::conditional::type operator*() const + { + return *c->get(); + } + + void optimize() + { + if(c) + c->skip_deleted_prev(); + } + + BackwardIterator& operator++() + { + if(c) + { + c->skip_deleted_prev(); + c = c->prev; + } + + return *this; + } + }; + + using ConstBackwardIterator = BackwardIterator; + using MutBackwardIterator = BackwardIterator; + + /* get iterator starting at current head, iterating backwards from + * most recently added to least recently added + */ + MutBackwardIterator rbegin() const + { + return MutBackwardIterator{std::atomic_load(&head)}; } - return *this; - } - }; - - using ConstBackwardIterator = BackwardIterator< true >; - using MutBackwardIterator = BackwardIterator< false >; - - /* get iterator starting at current head, iterating backwards from - * most recently added to least recently added - */ - MutBackwardIterator rbegin() const - { - return MutBackwardIterator{ std::atomic_load(&head) }; - } - - MutBackwardIterator rend() const - { - return MutBackwardIterator{ std::shared_ptr() }; - } + MutBackwardIterator rend() const + { + return MutBackwardIterator{std::shared_ptr()}; + } - ConstBackwardIterator crbegin() const - { - return ConstBackwardIterator{ std::atomic_load(&head) }; - } + ConstBackwardIterator crbegin() const + { + return ConstBackwardIterator{std::atomic_load(&head)}; + } - ConstBackwardIterator crend() const - { - return ConstBackwardIterator{ std::shared_ptr() }; - } - - /* Flags chunk at `pos` as erased. Actual removal is delayed until - * iterator stumbles over it. - * - * Since we only append to the end and `chunk` is not `head`, - * there wont occur any inserts after this chunk. - */ - void erase( MutBackwardIterator pos ) - { - pos.erase(); - } - - /* atomically appends a floating chunk to this list - * and returns the previous head to which the new_head - * is now linked. - */ - auto append_item( std::shared_ptr< ItemControlBlock > new_head ) - { - TRACE_EVENT("Allocator", "AtomicList::append_item()"); - std::shared_ptr< ItemControlBlock > old_head; + ConstBackwardIterator crend() const + { + return ConstBackwardIterator{std::shared_ptr()}; + } - bool append_successful = false; - while( ! append_successful ) - { - old_head = std::atomic_load( &head ); - std::atomic_store( &new_head->prev, old_head ); - append_successful = std::atomic_compare_exchange_strong( &head, &old_head, new_head ); - } + /* Flags chunk at `pos` as erased. Actual removal is delayed until + * iterator stumbles over it. + * + * Since we only append to the end and `chunk` is not `head`, + * there wont occur any inserts after this chunk. + */ + void erase(MutBackwardIterator pos) + { + pos.erase(); + } - return MutBackwardIterator{ old_head }; - } + /* atomically appends a floating chunk to this list + * and returns the previous head to which the new_head + * is now linked. + */ + auto append_item(std::shared_ptr new_head) + { + TRACE_EVENT("Allocator", "AtomicList::append_item()"); + std::shared_ptr old_head; + + bool append_successful = false; + while(!append_successful) + { + old_head = std::atomic_load(&head); + std::atomic_store(&new_head->prev, old_head); + append_successful + = std::atomic_compare_exchange_strong(&head, &old_head, new_head); + } + + return MutBackwardIterator{old_head}; + } - // append the first head item if not already exists - bool try_append_first_item( std::shared_ptr< ItemControlBlock > new_head ) - { - TRACE_EVENT("Allocator", "AtomicList::append_first_item()"); + // append the first head item if not already exists + bool try_append_first_item(std::shared_ptr new_head) + { + TRACE_EVENT("Allocator", "AtomicList::append_first_item()"); - std::shared_ptr< ItemControlBlock > expected( nullptr ); - std::shared_ptr< ItemControlBlock > const & desired = new_head; - return std::atomic_compare_exchange_strong( &head, &expected, desired ); - } -}; + std::shared_ptr expected(nullptr); + std::shared_ptr const& desired = new_head; + return std::atomic_compare_exchange_strong(&head, &expected, desired); + } + }; -} // namespace memory + } // namespace memory } // namespace redGrapes - diff --git a/redGrapes/util/bitfield.hpp b/redGrapes/util/bitfield.hpp index cd06a90e..716f42ad 100644 --- a/redGrapes/util/bitfield.hpp +++ b/redGrapes/util/bitfield.hpp @@ -7,196 +7,190 @@ #pragma once -#include #include #include +#include namespace redGrapes { -struct AtomicBitfield -{ - AtomicBitfield( size_t m_size ) - : m_size(m_size) - , state( ceil_div(m_size, bitfield_len) ) + struct AtomicBitfield { - - } - - size_t size() - { - return m_size; - } - -#define __INDEX_CALC(i, j, k, m) \ - unsigned j = i / bitfield_len; \ - unsigned k = i % bitfield_len; \ - uint64_t m = (uint64_t)1 << k; - - /* atomically update bit at index `idx` - * - * @return previous value - */ - inline bool set( unsigned idx, bool new_value ) - { - __INDEX_CALC(idx, chunk_idx, k, mask) - unsigned old_val; - - switch(new_value) { - case false: - old_val = state[chunk_idx].fetch_and(~mask, std::memory_order_acquire); - break; + AtomicBitfield(size_t m_size) : m_size(m_size), state(ceil_div(m_size, bitfield_len)) + { + } - case true: - old_val = state[chunk_idx].fetch_or(mask, std::memory_order_release); - break; + size_t size() + { + return m_size; } - return old_val & mask; - } +#define __INDEX_CALC(i, j, k, m) \ + unsigned j = i / bitfield_len; \ + unsigned k = i % bitfield_len; \ + uint64_t m = (uint64_t) 1 << k; - /* get current value of bit at `idx` - */ - inline bool get( unsigned idx ) - { - __INDEX_CALC(idx,chunk_idx,k,mask) - return state[chunk_idx] & mask; - } - - /* searches for a bit which is of state `expected_value` - * and suffices the condition given by `f`. - * - * @param start_idx gives a initial position where - * elements in the same chunk as `start_idx` are preferred over - * elements from differening chunks - * and elements following `start_idx` are preferred over preceding ones - * - * @return element given by `f(idx)` where `state[idx] == expected_value` - */ - template - inline std::optional< T > - probe_by_value( - F && f, - bool expected_value, - unsigned start_idx, - bool exclude_start = true) - { - uint64_t start_field_idx = start_idx / bitfield_len; - uint64_t first_mask = (uint64_t(-1) << (start_idx%bitfield_len)); - uint64_t second_mask = ~first_mask; - - /* probe second-half of current chunk + /* atomically update bit at index `idx` + * + * @return previous value */ - if( start_field_idx == state.size() - 1 && size() % bitfield_len != 0 ) - second_mask &= (uint64_t(1) << (size() % bitfield_len)) - 1; + inline bool set(unsigned idx, bool new_value) + { + __INDEX_CALC(idx, chunk_idx, k, mask) + unsigned old_val; - if( exclude_start ) - second_mask &= ~(uint64_t(1) << (start_idx%bitfield_len)); + switch(new_value) + { + case false: + old_val = state[chunk_idx].fetch_and(~mask, std::memory_order_acquire); + break; - if( auto x = probe_chunk_by_value( start_field_idx, second_mask, expected_value, f ) ) - return x; + case true: + old_val = state[chunk_idx].fetch_or(mask, std::memory_order_release); + break; + } - /* probe first half of current chunk + return old_val & mask; + } + + /* get current value of bit at `idx` */ - if( start_field_idx == state.size() - 1 && size() % bitfield_len != 0 ) - first_mask &= (uint64_t(1) << (size() % bitfield_len)) - 1; - if( auto x = probe_chunk_by_value( start_field_idx, first_mask, expected_value, f ) ) - return x; + inline bool get(unsigned idx) + { + __INDEX_CALC(idx, chunk_idx, k, mask) + return state[chunk_idx] & mask; + } - /* probe remaining chunks + /* searches for a bit which is of state `expected_value` + * and suffices the condition given by `f`. + * + * @param start_idx gives a initial position where + * elements in the same chunk as `start_idx` are preferred over + * elements from differening chunks + * and elements following `start_idx` are preferred over preceding ones + * + * @return element given by `f(idx)` where `state[idx] == expected_value` */ - for( - uint64_t b = 1; - b < ceil_div(size(), bitfield_len); - ++b - ) { - uint64_t field_idx = (start_field_idx + b) % state.size(); - uint64_t mask = ~0; - - if( field_idx == state.size() - 1 && size() % bitfield_len != 0 ) - mask &= (uint64_t(1) << (size() % bitfield_len)) - 1; - - if( auto x = probe_chunk_by_value( field_idx, mask, expected_value, f ) ) + template + inline std::optional probe_by_value( + F&& f, + bool expected_value, + unsigned start_idx, + bool exclude_start = true) + { + uint64_t start_field_idx = start_idx / bitfield_len; + uint64_t first_mask = (uint64_t(-1) << (start_idx % bitfield_len)); + uint64_t second_mask = ~first_mask; + + /* probe second-half of current chunk + */ + if(start_field_idx == state.size() - 1 && size() % bitfield_len != 0) + second_mask &= (uint64_t(1) << (size() % bitfield_len)) - 1; + + if(exclude_start) + second_mask &= ~(uint64_t(1) << (start_idx % bitfield_len)); + + if(auto x = probe_chunk_by_value(start_field_idx, second_mask, expected_value, f)) return x; - } - return std::nullopt; - } + /* probe first half of current chunk + */ + if(start_field_idx == state.size() - 1 && size() % bitfield_len != 0) + first_mask &= (uint64_t(1) << (size() % bitfield_len)) - 1; + if(auto x = probe_chunk_by_value(start_field_idx, first_mask, expected_value, f)) + return x; + /* probe remaining chunks + */ + for(uint64_t b = 1; b < ceil_div(size(), bitfield_len); ++b) + { + uint64_t field_idx = (start_field_idx + b) % state.size(); + uint64_t mask = ~0; + if(field_idx == state.size() - 1 && size() % bitfield_len != 0) + mask &= (uint64_t(1) << (size() % bitfield_len)) - 1; -private: - // TODO: try different values, e.g. 8 - // to add hierarchy matching the NUMA architecture - static constexpr uint64_t bitfield_len = 64; + if(auto x = probe_chunk_by_value(field_idx, mask, expected_value, f)) + return x; + } - size_t m_size; - std::vector< std::atomic< uint64_t > > state; + return std::nullopt; + } - - /*! calculates ceil( a / b ) - */ - static inline uint64_t ceil_div( uint64_t a, uint64_t b ) - { - return (a+b-1)/b; - } - // find index of first set bit - // taken from https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel - static inline unsigned int first_one_idx( uint64_t v ) - { - unsigned int c = 64; // c will be the number of zero bits on the right - v &= -int64_t(v); - if (v) c--; - if (v & 0x00000000FFFFFFFF) c -= 32; - if (v & 0x0000FFFF0000FFFF) c -= 16; - if (v & 0x00FF00FF00FF00FF) c -= 8; - if (v & 0x0F0F0F0F0F0F0F0F) c -= 4; - if (v & 0x3333333333333333) c -= 2; - if (v & 0x5555555555555555) c -= 1; - - return c; - } - - - /* searches for a bit which is of state `expected_value` - * and suffices the condition given by `f` in the chunk `j`. - * - * @return element given by `f(idx)` where `state[idx] == expected_value` - */ - template - inline std::optional< T > probe_chunk_by_value( unsigned j, uint64_t mask, bool expected_value, F && f ) - { - while( true ) - { - uint64_t field = expected_value ? - uint64_t(state[j]) : ~uint64_t(state[j]); + private: + // TODO: try different values, e.g. 8 + // to add hierarchy matching the NUMA architecture + static constexpr uint64_t bitfield_len = 64; - uint64_t masked_field = field & mask; - if( masked_field == 0 ) - break; + size_t m_size; + std::vector> state; + + /*! calculates ceil( a / b ) + */ + static inline uint64_t ceil_div(uint64_t a, uint64_t b) + { + return (a + b - 1) / b; + } - // find index of first worker - unsigned int k = first_one_idx( masked_field ); + // find index of first set bit + // taken from https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel + static inline unsigned int first_one_idx(uint64_t v) + { + unsigned int c = 64; // c will be the number of zero bits on the right + v &= -int64_t(v); + if(v) + c--; + if(v & 0x0000'0000'FFFF'FFFF) + c -= 32; + if(v & 0x0000'FFFF'0000'FFFF) + c -= 16; + if(v & 0x00FF'00FF'00FF'00FF) + c -= 8; + if(v & 0x0F0F'0F0F'0F0F'0F0F) + c -= 4; + if(v & 0x3333'3333'3333'3333) + c -= 2; + if(v & 0x5555'5555'5555'5555) + c -= 1; + + return c; + } - if( k < bitfield_len ) + /* searches for a bit which is of state `expected_value` + * and suffices the condition given by `f` in the chunk `j`. + * + * @return element given by `f(idx)` where `state[idx] == expected_value` + */ + template + inline std::optional probe_chunk_by_value(unsigned j, uint64_t mask, bool expected_value, F&& f) + { + while(true) { - unsigned int idx = j * bitfield_len + k; - //spdlog::info("find worker: j = {}, k = {}, idx= {}", j , k, idx); + uint64_t field = expected_value ? uint64_t(state[j]) : ~uint64_t(state[j]); - if( std::optional x = f( idx ) ) - return x; + uint64_t masked_field = field & mask; + if(masked_field == 0) + break; - // dont check this worker again - mask &= ~(uint64_t(1) << k); - } - } + // find index of first worker + unsigned int k = first_one_idx(masked_field); - return std::nullopt; - } - + if(k < bitfield_len) + { + unsigned int idx = j * bitfield_len + k; + // spdlog::info("find worker: j = {}, k = {}, idx= {}", j , k, idx); -}; + if(std::optional x = f(idx)) + return x; -} // namespace redGrapes + // dont check this worker again + mask &= ~(uint64_t(1) << k); + } + } + return std::nullopt; + } + }; + +} // namespace redGrapes diff --git a/redGrapes/util/chunked_list.hpp b/redGrapes/util/chunked_list.hpp index fe431749..84629df5 100644 --- a/redGrapes/util/chunked_list.hpp +++ b/redGrapes/util/chunked_list.hpp @@ -11,638 +11,624 @@ #pragma once -#include +#include +#include +#include + +#include + #include #include +#include #include #include #include #include #include -#include -#include -#include -#include namespace redGrapes { -/*! - * This container class supports two basic mutable, iterator-stable operations, - * both of which can be performed **concurrently** an in nearly **constant time**: - * - * - *push(item)*: append an element at the end, returns its index - * - *remove(idx)*: deletes the element given its index - * - * It is implemented as atomic linked list of chunks, - * which are fixed size arrays of elements. - * - * New elements can only be `push`ed to the end. - * They can not be inserted at random position. - * Depending on `chunk_size` , adding new elements - * is performed in constant time and without allocations, - * as long as the chunk still has capacity. - * Only one of chunk_size many calls of push() require - * memory allocation. - * - * Elements are removed in constant time. - * Removed elements are skipped by the iterators, - * however their memory is still occupied - * until all elements of the chunk are removed. - * The instances of items are kept alive until all - * iterators referencing that item have released the - * ownership. Then the element-destructor is called. - * - * Iteration can begin at a specific position that was - * returned by `push`. - * - * ## Example Usecases: - * - * - **Resource User List** (exist per resource) is used concurrently with: - * - push() from mostly one but potentially many task-creator-threads through emplace_task(), - * - reversed iteration and remove() from Worker threads - * from task.init_graph() and task.remove_from_resources(). - * - * - **Event Follower List** (exists per event): - * - push() concurrently by multiple Workers initializing new task dependenies, - * - remove() concurrently my mutliple Workers through update_graph(). - * - * - **Access List** (exists per task): - * - push() from only one single thread that is initializing the task - * and after that finished, - * - remove() from only one single thread. - * - concurrently to the first two, all Worker threads may iterate read-only. - */ -template < - typename T, - size_t T_chunk_size = 32, - class Allocator = memory::Allocator -> -struct ChunkedList -{ - using iter_offset_t = uint16_t; - using refcount_t = int16_t; - - struct Item + /*! + * This container class supports two basic mutable, iterator-stable operations, + * both of which can be performed **concurrently** an in nearly **constant time**: + * + * - *push(item)*: append an element at the end, returns its index + * - *remove(idx)*: deletes the element given its index + * + * It is implemented as atomic linked list of chunks, + * which are fixed size arrays of elements. + * + * New elements can only be `push`ed to the end. + * They can not be inserted at random position. + * Depending on `chunk_size` , adding new elements + * is performed in constant time and without allocations, + * as long as the chunk still has capacity. + * Only one of chunk_size many calls of push() require + * memory allocation. + * + * Elements are removed in constant time. + * Removed elements are skipped by the iterators, + * however their memory is still occupied + * until all elements of the chunk are removed. + * The instances of items are kept alive until all + * iterators referencing that item have released the + * ownership. Then the element-destructor is called. + * + * Iteration can begin at a specific position that was + * returned by `push`. + * + * ## Example Usecases: + * + * - **Resource User List** (exist per resource) is used concurrently with: + * - push() from mostly one but potentially many task-creator-threads through emplace_task(), + * - reversed iteration and remove() from Worker threads + * from task.init_graph() and task.remove_from_resources(). + * + * - **Event Follower List** (exists per event): + * - push() concurrently by multiple Workers initializing new task dependenies, + * - remove() concurrently my mutliple Workers through update_graph(). + * + * - **Access List** (exists per task): + * - push() from only one single thread that is initializing the task + * and after that finished, + * - remove() from only one single thread. + * - concurrently to the first two, all Worker threads may iterate read-only. + */ + template + struct ChunkedList { - struct TrivialInit_t{}; - union ItemStorage - { - char dummy; - T value; + using iter_offset_t = uint16_t; + using refcount_t = int16_t; - ItemStorage( TrivialInit_t ) noexcept - : dummy() - {} + struct Item + { + struct TrivialInit_t + { + }; - template < typename... Args > - ItemStorage( Args&&... args ) - : value(std::forward(args)...) - {} + union ItemStorage + { + char dummy; + T value; - ~ItemStorage() {} - }; + ItemStorage(TrivialInit_t) noexcept : dummy() + { + } - /* actual data - */ - ItemStorage storage; - - /* this variable tells the distance to the next initialized - * and not already deleted item, where a distance of `0` - * means that this item exists. - * In case this item is deleted, `iter_offset` gives an - * offset by which we can safely jump to find the next - * existing item. - * - * iter_offset = 0 means this item exists - * iter_offset = 1 means previous item exists - * ... - */ - std::atomic< iter_offset_t > iter_offset; - - /* counts the number of iterators pointing - * at this item currently. - * It is possible that iterators keep their - * reference to an item while this item is being - * deleted. In this case, iter_offset will already - * be set, so any new iterators will now skip - * this item but as long as some iterators referencing - * the already deleted item exist, the item data will - * not be destructed. - */ - std::atomic< refcount_t > refcount; + template + ItemStorage(Args&&... args) : value(std::forward(args)...) + { + } - Item() - // any item starts uninitialized - : iter_offset( 1 ) - , refcount( 0 ) - , storage( TrivialInit_t{} ) - {} + ~ItemStorage() + { + } + }; - ~Item() - { - release(); - } + /* actual data + */ + ItemStorage storage; + + /* this variable tells the distance to the next initialized + * and not already deleted item, where a distance of `0` + * means that this item exists. + * In case this item is deleted, `iter_offset` gives an + * offset by which we can safely jump to find the next + * existing item. + * + * iter_offset = 0 means this item exists + * iter_offset = 1 means previous item exists + * ... + */ + std::atomic iter_offset; + + /* counts the number of iterators pointing + * at this item currently. + * It is possible that iterators keep their + * reference to an item while this item is being + * deleted. In this case, iter_offset will already + * be set, so any new iterators will now skip + * this item but as long as some iterators referencing + * the already deleted item exist, the item data will + * not be destructed. + */ + std::atomic refcount; - /* initialize value of this item. - * only intended for new elements, - * re-assigning is not allowed. - * Per Item, only one thread is allowed to - * call the assignment operator once. - */ - T & operator=(T const & value) - { - assert( iter_offset != 0 ); - assert( refcount == 0 ); + Item() + // any item starts uninitialized + : iter_offset(1) + , refcount(0) + , storage(TrivialInit_t{}) + { + } - storage.value = value; + ~Item() + { + release(); + } - /* here, item.value is now fully initalized, - * so allow iterators to access this item now. + /* initialize value of this item. + * only intended for new elements, + * re-assigning is not allowed. + * Per Item, only one thread is allowed to + * call the assignment operator once. */ - iter_offset = 0; + T& operator=(T const& value) + { + assert(iter_offset != 0); + assert(refcount == 0); - return storage.value; - } + storage.value = value; - /* Try to increment `refcount` and check if this - * item is still alive. - * - * @return 0 if acquisition was successful, - * otherwise return iterator distance to the next - * valid item - */ - iter_offset_t acquire() - { - iter_offset_t off = iter_offset.load(); - refcount_t old_refcount = refcount.load(); + /* here, item.value is now fully initalized, + * so allow iterators to access this item now. + */ + iter_offset = 0; - if( off == 0 && old_refcount >= 0 ) + return storage.value; + } + + /* Try to increment `refcount` and check if this + * item is still alive. + * + * @return 0 if acquisition was successful, + * otherwise return iterator distance to the next + * valid item + */ + iter_offset_t acquire() { - old_refcount = refcount.fetch_add(1); - off = iter_offset.load(); + iter_offset_t off = iter_offset.load(); + refcount_t old_refcount = refcount.load(); - if( old_refcount >= 0 ) + if(off == 0 && old_refcount >= 0) { - /* The item data is not already destructed, - * but only when `iter_offset` is still set to `0` - * as initialized by `operator=`, the item still exists. - * In case `off > 0`, some thread already called `remove()` - * on this iterator position. - */ + old_refcount = refcount.fetch_add(1); + off = iter_offset.load(); - /* keep others from falsely trying to acquire this item - * if it is deleted already. - */ - if( off != 0 ) + if(old_refcount >= 0) + { + /* The item data is not already destructed, + * but only when `iter_offset` is still set to `0` + * as initialized by `operator=`, the item still exists. + * In case `off > 0`, some thread already called `remove()` + * on this iterator position. + */ + + /* keep others from falsely trying to acquire this item + * if it is deleted already. + */ + if(off != 0) + --refcount; + } + else + /* item data is already destructed. + * just decrement refcount to keep others from trying to + * acquire this item. + */ --refcount; } - else - /* item data is already destructed. - * just decrement refcount to keep others from trying to - * acquire this item. - */ - --refcount; + + return off; } - return off; - } + /* decrement refcount and in case this + * was the last reference, deconstruct the element + * @tparam fail_on_invalid if true, this function will + * throw if the item was already deleted + */ + template + void release() + { + refcount_t old_refcount = refcount.fetch_sub(1); + if(old_refcount == 0) + { + // item is now deleted, and refcount set to -1 + storage.value.~T(); + } + else if(old_refcount < 0) + { + if(fail_on_invalid) + throw std::runtime_error("ChunkedList: try to remove invalid item!"); + } + } + }; - /* decrement refcount and in case this - * was the last reference, deconstruct the element - * @tparam fail_on_invalid if true, this function will - * throw if the item was already deleted - */ - template < bool fail_on_invalid = true > - void release() + struct Chunk { - refcount_t old_refcount = refcount.fetch_sub(1); - if( old_refcount == 0 ) + /* beginning of the chunk + */ + std::atomic first_item; + + /* points to the next free storage slot, + * if available. Will be used to add a new element + */ + std::atomic next_item; + + std::atomic freed_items{0}; + + Chunk(memory::Block blk) : first_item((Item*) blk.ptr), next_item((Item*) blk.ptr) { - // item is now deleted, and refcount set to -1 - storage.value.~T(); + for(Item* item = this->first_item; item < (this->first_item + T_chunk_size); item++) + new(item) Item(); } - else if( old_refcount < 0 ) + + ~Chunk() { - if( fail_on_invalid ) - throw std::runtime_error("ChunkedList: try to remove invalid item!"); + for(Item* item = first_item; item < (this->first_item + T_chunk_size); item++) + item->~Item(); } - } - }; - - struct Chunk - { - /* beginning of the chunk - */ - std::atomic< Item * > first_item; - - /* points to the next free storage slot, - * if available. Will be used to add a new element - */ - std::atomic< Item * > next_item; - std::atomic< size_t > freed_items{ 0 }; + Item* items() + { + return first_item; + } - Chunk( memory::Block blk ) - : first_item( (Item*) blk.ptr ) - , next_item( (Item*) blk.ptr ) - { - for(Item * item = this->first_item; item < ( this->first_item + T_chunk_size ); item++ ) - new (item) Item(); - } + /* returns the latest item which was inserted + */ + Item* get_last_item() + { + Item* limit = first_item + T_chunk_size; + Item* last_item = next_item - 1; - ~Chunk() - { - for( Item * item = first_item; item < ( this->first_item + T_chunk_size ); item++ ) - item->~Item(); - } + if(last_item >= limit) + last_item = limit - 1; - Item * items() - { - return first_item; - } + return last_item; + } + }; - /* returns the latest item which was inserted - */ - Item * get_last_item() + template + struct ItemAccess { - Item * limit = first_item + T_chunk_size; - Item * last_item = next_item - 1; - - if( last_item >= limit ) - last_item = limit - 1; + private: + friend class ChunkedList; + typename memory::AtomicList::MutBackwardIterator chunk; + + /* this pointer packs the address of the current element + * and the `has_element` bit in its MSB (most significant bit). + * Pointers where the MSB is zero indicate an existing storage location + * but with uninitialized element. Pointers where MSB is set + * point to an existing element. + */ + uintptr_t cur_item; - return last_item; - } - }; + inline Item* get_item_ptr() const + { + return (Item*) (cur_item & (~(uintptr_t) 0 >> 1)); + } - template < bool is_const > - struct ItemAccess - { - private: - friend class ChunkedList; - typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk; - - /* this pointer packs the address of the current element - * and the `has_element` bit in its MSB (most significant bit). - * Pointers where the MSB is zero indicate an existing storage location - * but with uninitialized element. Pointers where MSB is set - * point to an existing element. - */ - uintptr_t cur_item; - - inline Item * get_item_ptr() const { return (Item *) (cur_item & (~(uintptr_t)0 >> 1)); } - inline bool has_item() const { return cur_item & ~(~(uintptr_t)0 >> 1); } - inline void set_item() { cur_item |= ~( ~(uintptr_t) 0 >> 1 ); } - inline void unset_item() { cur_item &= ~(uintptr_t)0 >> 1; } - - protected: - /*! - * checks whether the iterator points to an existing storage location. - * This storage location can be used, free or deleted. - * Only by `rend()`, and if the container is empty also `rbegin()` shall - * return an iterator with invalid idx. - */ - bool is_valid_idx() const - { - return ((bool)chunk) - && ( get_item_ptr() >= chunk->first_item ) - && ( get_item_ptr() < chunk->first_item+T_chunk_size ); - } + inline bool has_item() const + { + return cur_item & ~(~(uintptr_t) 0 >> 1); + } - /*! - * tries to acquire the element this iterator points to - * by incrementing the reference count, so it will not be - * deleted concurrently to the usage of this iterator. - * @return 0 if acquisition was successful, - * otherwise return iterator distance to the next - * valid item - */ - iter_offset_t try_acquire() - { - if( is_valid_idx() ) + inline void set_item() { - iter_offset_t off = item().acquire(); - if( off == 0 ) - set_item(); + cur_item |= ~(~(uintptr_t) 0 >> 1); + } - return off; + inline void unset_item() + { + cur_item &= ~(uintptr_t) 0 >> 1; } - else - return 1; - } - /*! - * release the storage location - */ - void release() - { - if( has_item() ) + protected: + /*! + * checks whether the iterator points to an existing storage location. + * This storage location can be used, free or deleted. + * Only by `rend()`, and if the container is empty also `rbegin()` shall + * return an iterator with invalid idx. + */ + bool is_valid_idx() const { - unset_item(); - item().release(); + return ((bool) chunk) && (get_item_ptr() >= chunk->first_item) + && (get_item_ptr() < chunk->first_item + T_chunk_size); } - } - /*! - * advance the position until we find a un-deleted item - * that is acquired successfully. - */ - void acquire_next_item() - { - while( is_valid_idx() ) + /*! + * tries to acquire the element this iterator points to + * by incrementing the reference count, so it will not be + * deleted concurrently to the usage of this iterator. + * @return 0 if acquisition was successful, + * otherwise return iterator distance to the next + * valid item + */ + iter_offset_t try_acquire() { - iter_offset_t step = try_acquire(); - if( step == 0 ) + if(is_valid_idx()) { - // item was successfully acquired. - assert( has_item() ); - return; + iter_offset_t off = item().acquire(); + if(off == 0) + set_item(); + + return off; } else - { - // item is not existent - assert( ! has_item() ); + return 1; + } - // jump to next valid item - cur_item = (uintptr_t) (get_item_ptr() - step); + /*! + * release the storage location + */ + void release() + { + if(has_item()) + { + unset_item(); + item().release(); + } + } - // goto next chunk if necessary - if( ! is_valid_idx() ) + /*! + * advance the position until we find a un-deleted item + * that is acquired successfully. + */ + void acquire_next_item() + { + while(is_valid_idx()) + { + iter_offset_t step = try_acquire(); + if(step == 0) { - ++chunk; - if( chunk ) - cur_item = (uintptr_t) chunk->get_last_item(); - else - cur_item = 0; + // item was successfully acquired. + assert(has_item()); + return; + } + else + { + // item is not existent + assert(!has_item()); + + // jump to next valid item + cur_item = (uintptr_t) (get_item_ptr() - step); + + // goto next chunk if necessary + if(!is_valid_idx()) + { + ++chunk; + if(chunk) + cur_item = (uintptr_t) chunk->get_last_item(); + else + cur_item = 0; + } } } - } - - // reached the end here - cur_item = 0; - } - public: - ItemAccess( ItemAccess const & other ) - : ItemAccess( other.chunk, other.get_item_ptr() ) - { - } + // reached the end here + cur_item = 0; + } - ItemAccess( - typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk, - Item * item_ptr - ) - : chunk(chunk) - , cur_item( (uintptr_t)item_ptr ) - { - acquire_next_item(); - } + public: + ItemAccess(ItemAccess const& other) : ItemAccess(other.chunk, other.get_item_ptr()) + { + } - inline ~ItemAccess() - { - release(); - } + ItemAccess(typename memory::AtomicList::MutBackwardIterator chunk, Item* item_ptr) + : chunk(chunk) + , cur_item((uintptr_t) item_ptr) + { + acquire_next_item(); + } - /*! True if the iterator points to a valid storage location, - * and the item was successfuly locked such that it will not - * be deleted until this iterator is released. - */ - inline bool is_valid() const - { - return has_item(); - } + inline ~ItemAccess() + { + release(); + } - inline Item & item() const - { - assert( is_valid_idx() ); - return *get_item_ptr(); - } + /*! True if the iterator points to a valid storage location, + * and the item was successfuly locked such that it will not + * be deleted until this iterator is released. + */ + inline bool is_valid() const + { + return has_item(); + } - /*! Access item value - */ - inline - typename std::conditional< - is_const, - T const *, - T * - >::type - operator->() const - { - return &item().storage.value; - } + inline Item& item() const + { + assert(is_valid_idx()); + return *get_item_ptr(); + } - inline - typename std::conditional< - is_const, - T const &, - T & - >::type - operator* () const - { - return item().storage.value; - } - }; + /*! Access item value + */ + inline typename std::conditional::type operator->() const + { + return &item().storage.value; + } - template < bool is_const > - struct BackwardIterator : ItemAccess< is_const > - { - BackwardIterator( - typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk, - Item * start_item - ) - : ItemAccess< is_const >( chunk, start_item ) - { - } + inline typename std::conditional::type operator*() const + { + return item().storage.value; + } + }; - inline bool operator!=(BackwardIterator< is_const > const& other) const + template + struct BackwardIterator : ItemAccess { - return this->get_item_ptr() != other.get_item_ptr(); - } + BackwardIterator( + typename memory::AtomicList::MutBackwardIterator chunk, + Item* start_item) + : ItemAccess(chunk, start_item) + { + } - BackwardIterator< is_const > & operator=( BackwardIterator< is_const > const & other ) - { - this->release(); - this->cur_item = (uintptr_t) other.get_item_ptr(); - this->chunk = other.chunk; - this->try_acquire(); - return *this; - } + inline bool operator!=(BackwardIterator const& other) const + { + return this->get_item_ptr() != other.get_item_ptr(); + } - BackwardIterator & operator++() - { - this->release(); + BackwardIterator& operator=(BackwardIterator const& other) + { + this->release(); + this->cur_item = (uintptr_t) other.get_item_ptr(); + this->chunk = other.chunk; + this->try_acquire(); + return *this; + } - if( (uintptr_t)(this->get_item_ptr() - 1u) >= (uintptr_t)this->chunk->first_item.load() ) - this->cur_item = (uintptr_t) (this->get_item_ptr() - 1); - else + BackwardIterator& operator++() { - ++ this->chunk; - if( this->chunk ) - this->cur_item = (uintptr_t) this->chunk->get_last_item(); + this->release(); + + if((uintptr_t) (this->get_item_ptr() - 1u) >= (uintptr_t) this->chunk->first_item.load()) + this->cur_item = (uintptr_t) (this->get_item_ptr() - 1); else - this->cur_item = 0; - } + { + ++this->chunk; + if(this->chunk) + this->cur_item = (uintptr_t) this->chunk->get_last_item(); + else + this->cur_item = 0; + } - this->acquire_next_item(); - return *this; - } - }; + this->acquire_next_item(); + return *this; + } + }; - using ConstBackwardIterator = BackwardIterator< true >; - using MutBackwardIterator = BackwardIterator< false >; + using ConstBackwardIterator = BackwardIterator; + using MutBackwardIterator = BackwardIterator; -private: - memory::AtomicList< Chunk, Allocator > chunks; + private: + memory::AtomicList chunks; -public: - ChunkedList( Allocator && alloc ) - : chunks( std::move(alloc), T_chunk_size * sizeof(Item) + sizeof(Chunk) ) - {} + public: + ChunkedList(Allocator&& alloc) : chunks(std::move(alloc), T_chunk_size * sizeof(Item) + sizeof(Chunk)) + { + } - ChunkedList( ChunkedList && other ) = default; - ChunkedList( Allocator && alloc, ChunkedList const & other ) - : ChunkedList( std::move(alloc) ) - { - spdlog::error("copy construct ChunkedList!!"); - } + ChunkedList(ChunkedList&& other) = default; - /* decrement item_count and in case all items of this chunk are deleted, - * and this chunk is not `head`, delete the chunk too - */ - void release_chunk( typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk ) - { - if( chunk->freed_items.fetch_add(1) == T_chunk_size - 1u ) - chunks.erase( chunk ); - } + ChunkedList(Allocator&& alloc, ChunkedList const& other) : ChunkedList(std::move(alloc)) + { + spdlog::error("copy construct ChunkedList!!"); + } - MutBackwardIterator push( T const& item ) - { - TRACE_EVENT("ChunkedList", "push"); + /* decrement item_count and in case all items of this chunk are deleted, + * and this chunk is not `head`, delete the chunk too + */ + void release_chunk(typename memory::AtomicList::MutBackwardIterator chunk) + { + if(chunk->freed_items.fetch_add(1) == T_chunk_size - 1u) + chunks.erase(chunk); + } - while( true ) + MutBackwardIterator push(T const& item) { - auto chunk = chunks.rbegin(); - if( chunk != chunks.rend() ) - { - Item * chunk_begin = chunk->first_item; - Item * chunk_end = chunk_begin + T_chunk_size; + TRACE_EVENT("ChunkedList", "push"); - /* check if there is a chance to get a slot in this chunk - */ - if( (uintptr_t)chunk->next_item.load() <= (uintptr_t)chunk_end ) + while(true) + { + auto chunk = chunks.rbegin(); + if(chunk != chunks.rend()) { - Item * next_item = chunk->next_item.fetch_add(1); + Item* chunk_begin = chunk->first_item; + Item* chunk_end = chunk_begin + T_chunk_size; - if( (uintptr_t)next_item < (uintptr_t)chunk_end ) - { - /* successfully allocated a slot in the current chunk - */ - *next_item = item; - return MutBackwardIterator( chunk, next_item ); - } - else if ( (uintptr_t)next_item == (uintptr_t)chunk_end ) + /* check if there is a chance to get a slot in this chunk + */ + if((uintptr_t) chunk->next_item.load() <= (uintptr_t) chunk_end) { - /* here we are the first thread that overflows - * the current chunk, so allocate a new chunk here - */ - chunks.allocate_item(); + Item* next_item = chunk->next_item.fetch_add(1); + + if((uintptr_t) next_item < (uintptr_t) chunk_end) + { + /* successfully allocated a slot in the current chunk + */ + *next_item = item; + return MutBackwardIterator(chunk, next_item); + } + else if((uintptr_t) next_item == (uintptr_t) chunk_end) + { + /* here we are the first thread that overflows + * the current chunk, so allocate a new chunk here + */ + chunks.allocate_item(); + } + else + { + /* another one, but not the first thread that overflowed + * this chunk. wait for the allocation now. + */ + } } else { - /* another one, but not the first thread that overflowed - * this chunk. wait for the allocation now. + /* `chunk` is already full, + * don't even attempt to increment `next_item` + * just wait for the allocation of the new chunk to happen... */ } } else { - /* `chunk` is already full, - * don't even attempt to increment `next_item` - * just wait for the allocation of the new chunk to happen... - */ + chunks.try_allocate_first_item(); } } - else - { - chunks.try_allocate_first_item(); - } } - } - void remove( MutBackwardIterator const & pos ) - { - if( pos.is_valid_idx() ) + void remove(MutBackwardIterator const& pos) { - /* first, set iter_offset, so that any iterator - * will skip this element from now on - */ + if(pos.is_valid_idx()) + { + /* first, set iter_offset, so that any iterator + * will skip this element from now on + */ - // first elements just goes back one step to reach last element of previous chunk - if( pos.get_item_ptr() == pos.chunk->first_item ) - pos.item().iter_offset = 1; + // first elements just goes back one step to reach last element of previous chunk + if(pos.get_item_ptr() == pos.chunk->first_item) + pos.item().iter_offset = 1; - // if we have a predecessor in this chunk, reuse their offset - else - pos.item().iter_offset = (pos.get_item_ptr() - 1)->iter_offset + 1; + // if we have a predecessor in this chunk, reuse their offset + else + pos.item().iter_offset = (pos.get_item_ptr() - 1)->iter_offset + 1; - /* TODO: scan in other direction for deleted items too, - and update their `iter_offset` - */ + /* TODO: scan in other direction for deleted items too, + and update their `iter_offset` + */ - /* decrement refcount once so the item will be deconstructed - * eventually, when all iterators drop their references - */ - pos.item().release(); + /* decrement refcount once so the item will be deconstructed + * eventually, when all iterators drop their references + */ + pos.item().release(); - release_chunk( pos.chunk ); + release_chunk(pos.chunk); + } + else + throw std::runtime_error("remove invalid position"); } - else - throw std::runtime_error("remove invalid position"); - } - void erase( T item ) - { - for( auto it = rbegin(); it != rend(); ++it ) - if( *it == item ) - remove( it ); - } + void erase(T item) + { + for(auto it = rbegin(); it != rend(); ++it) + if(*it == item) + remove(it); + } - MutBackwardIterator rbegin() const - { - auto c = chunks.rbegin(); - return MutBackwardIterator( - c, - ( c != chunks.rend() ) ? c->get_last_item() : nullptr - ); - } - - MutBackwardIterator rend() const - { - return MutBackwardIterator( - chunks.rend(), - nullptr - ); - } + MutBackwardIterator rbegin() const + { + auto c = chunks.rbegin(); + return MutBackwardIterator(c, (c != chunks.rend()) ? c->get_last_item() : nullptr); + } - ConstBackwardIterator crbegin() const - { - auto c = chunks.rbegin(); - return ConstBackwardIterator( - c, - ( c != chunks.rend() ) ? c->get_last_item() : nullptr - ); - } - - ConstBackwardIterator crend() const - { - return ConstBackwardIterator( - chunks.rend(), - nullptr - ); - } -}; + MutBackwardIterator rend() const + { + return MutBackwardIterator(chunks.rend(), nullptr); + } -} // namespace redGrapes + ConstBackwardIterator crbegin() const + { + auto c = chunks.rbegin(); + return ConstBackwardIterator(c, (c != chunks.rend()) ? c->get_last_item() : nullptr); + } + + ConstBackwardIterator crend() const + { + return ConstBackwardIterator(chunks.rend(), nullptr); + } + }; +} // namespace redGrapes diff --git a/redGrapes/util/trace.cpp b/redGrapes/util/trace.cpp index 8eb644eb..2586e75c 100644 --- a/redGrapes/util/trace.cpp +++ b/redGrapes/util/trace.cpp @@ -2,41 +2,42 @@ #if REDGRAPES_ENABLE_TRACE -#include -#include -#include - -std::shared_ptr StartTracing() { - // The trace config defines which types of data sources are enabled for - // recording. In this example we just need the "track_event" data source, - // which corresponds to the TRACE_EVENT trace points. - perfetto::TraceConfig cfg; - cfg.add_buffers()->set_size_kb(8192); - auto* ds_cfg = cfg.add_data_sources()->mutable_config(); - ds_cfg->set_name("track_event"); - - auto tracing_session = perfetto::Tracing::NewTrace(); - tracing_session->Setup(cfg); - tracing_session->StartBlocking(); - return tracing_session; +# include +# include +# include + +std::shared_ptr StartTracing() +{ + // The trace config defines which types of data sources are enabled for + // recording. In this example we just need the "track_event" data source, + // which corresponds to the TRACE_EVENT trace points. + perfetto::TraceConfig cfg; + cfg.add_buffers()->set_size_kb(8192); + auto* ds_cfg = cfg.add_data_sources()->mutable_config(); + ds_cfg->set_name("track_event"); + + auto tracing_session = perfetto::Tracing::NewTrace(); + tracing_session->Setup(cfg); + tracing_session->StartBlocking(); + return tracing_session; } -void StopTracing(std::shared_ptr tracing_session) { - // Make sure the last event is closed for this example. - perfetto::TrackEvent::Flush(); - - // Stop tracing and read the trace data. - tracing_session->StopBlocking(); - std::vector trace_data(tracing_session->ReadTraceBlocking()); - - // Write the result into a file. - // Note: To save memory with longer traces, you can tell Perfetto to write - // directly into a file by passing a file descriptor into Setup() above. - std::ofstream output; - output.open("redGrapes.pftrace", std::ios::out | std::ios::binary); - output.write(&trace_data[0], std::streamsize(trace_data.size())); - output.close(); +void StopTracing(std::shared_ptr tracing_session) +{ + // Make sure the last event is closed for this example. + perfetto::TrackEvent::Flush(); + + // Stop tracing and read the trace data. + tracing_session->StopBlocking(); + std::vector trace_data(tracing_session->ReadTraceBlocking()); + + // Write the result into a file. + // Note: To save memory with longer traces, you can tell Perfetto to write + // directly into a file by passing a file descriptor into Setup() above. + std::ofstream output; + output.open("redGrapes.pftrace", std::ios::out | std::ios::binary); + output.write(&trace_data[0], std::streamsize(trace_data.size())); + output.close(); } #endif - diff --git a/redGrapes/util/trace.hpp b/redGrapes/util/trace.hpp index 7947c816..0d6e6424 100644 --- a/redGrapes/util/trace.hpp +++ b/redGrapes/util/trace.hpp @@ -1,46 +1,42 @@ #pragma once -//#include +// #include #ifndef REDGRAPES_ENABLE_TRACE -#define REDGRAPES_ENABLE_TRACE 0 +# define REDGRAPES_ENABLE_TRACE 0 #endif #if REDGRAPES_ENABLE_TRACE -#include -#include +# include + +# include PERFETTO_DEFINE_CATEGORIES( - perfetto::Category("Worker"), - perfetto::Category("Scheduler"), - perfetto::Category("Event"), - perfetto::Category("TaskSpace"), - perfetto::Category("Graph"), - perfetto::Category("Task"), - perfetto::Category("Allocator"), - perfetto::Category("CondVar"), - perfetto::Category("ChunkedList"), - perfetto::Category("ResourceUser") -); + perfetto::Category("Worker"), + perfetto::Category("Scheduler"), + perfetto::Category("Event"), + perfetto::Category("TaskSpace"), + perfetto::Category("Graph"), + perfetto::Category("Task"), + perfetto::Category("Allocator"), + perfetto::Category("CondVar"), + perfetto::Category("ChunkedList"), + perfetto::Category("ResourceUser")); std::shared_ptr StartTracing(); void StopTracing(std::shared_ptr tracing_session); #else -#undef TRACE_EVENT -#define TRACE_EVENT +# undef TRACE_EVENT +# define TRACE_EVENT -#undef TRACE_EVENT_BEGIN -#define TRACE_EVENT_BEGIN +# undef TRACE_EVENT_BEGIN +# define TRACE_EVENT_BEGIN -#undef TRACE_EVENT_END -#define TRACE_EVENT_END +# undef TRACE_EVENT_END +# define TRACE_EVENT_END #endif - - - - diff --git a/redGrapes/version.hpp b/redGrapes/version.hpp index b90128fb..bfc4a3a0 100644 --- a/redGrapes/version.hpp +++ b/redGrapes/version.hpp @@ -11,4 +11,3 @@ #define REDGRAPES_VERSION_MINOR 1 #define REDGRAPES_VERSION_PATCH 0 #define REDGRAPES_VERSION_LABEL "" - diff --git a/redGrapes_config.hpp b/redGrapes_config.hpp index 45dcbb04..6f70f09b 100644 --- a/redGrapes_config.hpp +++ b/redGrapes_config.hpp @@ -1,3 +1 @@ #pragma once - - diff --git a/test/access.cpp b/test/access.cpp index da7d76ce..352a1346 100644 --- a/test/access.cpp +++ b/test/access.cpp @@ -1,229 +1,214 @@ -#include - -#include #include #include #include +#include + +#include using namespace redGrapes::access; TEST_CASE("IOAccess") { - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::read}, IOAccess{IOAccess::read} ) == false ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::read}, IOAccess{IOAccess::write} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::read}, IOAccess{IOAccess::aadd} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::read}, IOAccess{IOAccess::amul} ) == true ); - - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::write}, IOAccess{IOAccess::read} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::write}, IOAccess{IOAccess::write} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::write}, IOAccess{IOAccess::aadd} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::write}, IOAccess{IOAccess::amul} ) == true ); - - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::aadd}, IOAccess{IOAccess::read} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::aadd}, IOAccess{IOAccess::write} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::aadd}, IOAccess{IOAccess::aadd} ) == false ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::aadd}, IOAccess{IOAccess::amul} ) == true ); - - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::amul}, IOAccess{IOAccess::read} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::amul}, IOAccess{IOAccess::write} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::amul}, IOAccess{IOAccess::aadd} ) == true ); - REQUIRE( IOAccess::is_serial( IOAccess{IOAccess::amul}, IOAccess{IOAccess::amul} ) == false ); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::read}, IOAccess{IOAccess::read}) == false); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::read}, IOAccess{IOAccess::write}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::read}, IOAccess{IOAccess::aadd}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::read}, IOAccess{IOAccess::amul}) == true); + + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::write}, IOAccess{IOAccess::read}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::write}, IOAccess{IOAccess::write}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::write}, IOAccess{IOAccess::aadd}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::write}, IOAccess{IOAccess::amul}) == true); + + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::aadd}, IOAccess{IOAccess::read}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::aadd}, IOAccess{IOAccess::write}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::aadd}, IOAccess{IOAccess::aadd}) == false); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::aadd}, IOAccess{IOAccess::amul}) == true); + + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::amul}, IOAccess{IOAccess::read}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::amul}, IOAccess{IOAccess::write}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::amul}, IOAccess{IOAccess::aadd}) == true); + REQUIRE(IOAccess::is_serial(IOAccess{IOAccess::amul}, IOAccess{IOAccess::amul}) == false); // subsets - REQUIRE( IOAccess{IOAccess::read}.is_superset_of( IOAccess{IOAccess::read} ) == true ); - REQUIRE( IOAccess{IOAccess::read}.is_superset_of( IOAccess{IOAccess::write} ) == false ); - REQUIRE( IOAccess{IOAccess::read}.is_superset_of( IOAccess{IOAccess::aadd} ) == false ); - REQUIRE( IOAccess{IOAccess::read}.is_superset_of( IOAccess{IOAccess::amul} ) == false ); - - REQUIRE( IOAccess{IOAccess::write}.is_superset_of( IOAccess{IOAccess::read} ) == true ); - REQUIRE( IOAccess{IOAccess::write}.is_superset_of( IOAccess{IOAccess::write} ) == true ); - REQUIRE( IOAccess{IOAccess::write}.is_superset_of( IOAccess{IOAccess::aadd} ) == true ); - REQUIRE( IOAccess{IOAccess::write}.is_superset_of( IOAccess{IOAccess::amul} ) == true ); - - REQUIRE( IOAccess{IOAccess::aadd}.is_superset_of( IOAccess{IOAccess::read} ) == false ); - REQUIRE( IOAccess{IOAccess::aadd}.is_superset_of( IOAccess{IOAccess::write} ) == false ); - REQUIRE( IOAccess{IOAccess::aadd}.is_superset_of( IOAccess{IOAccess::aadd} ) == true ); - REQUIRE( IOAccess{IOAccess::aadd}.is_superset_of( IOAccess{IOAccess::amul} ) == false ); - - REQUIRE( IOAccess{IOAccess::amul}.is_superset_of( IOAccess{IOAccess::read} ) == false ); - REQUIRE( IOAccess{IOAccess::amul}.is_superset_of( IOAccess{IOAccess::write} ) == false ); - REQUIRE( IOAccess{IOAccess::amul}.is_superset_of( IOAccess{IOAccess::aadd} ) == false ); - REQUIRE( IOAccess{IOAccess::amul}.is_superset_of( IOAccess{IOAccess::amul} ) == true ); + REQUIRE(IOAccess{IOAccess::read}.is_superset_of(IOAccess{IOAccess::read}) == true); + REQUIRE(IOAccess{IOAccess::read}.is_superset_of(IOAccess{IOAccess::write}) == false); + REQUIRE(IOAccess{IOAccess::read}.is_superset_of(IOAccess{IOAccess::aadd}) == false); + REQUIRE(IOAccess{IOAccess::read}.is_superset_of(IOAccess{IOAccess::amul}) == false); + + REQUIRE(IOAccess{IOAccess::write}.is_superset_of(IOAccess{IOAccess::read}) == true); + REQUIRE(IOAccess{IOAccess::write}.is_superset_of(IOAccess{IOAccess::write}) == true); + REQUIRE(IOAccess{IOAccess::write}.is_superset_of(IOAccess{IOAccess::aadd}) == true); + REQUIRE(IOAccess{IOAccess::write}.is_superset_of(IOAccess{IOAccess::amul}) == true); + + REQUIRE(IOAccess{IOAccess::aadd}.is_superset_of(IOAccess{IOAccess::read}) == false); + REQUIRE(IOAccess{IOAccess::aadd}.is_superset_of(IOAccess{IOAccess::write}) == false); + REQUIRE(IOAccess{IOAccess::aadd}.is_superset_of(IOAccess{IOAccess::aadd}) == true); + REQUIRE(IOAccess{IOAccess::aadd}.is_superset_of(IOAccess{IOAccess::amul}) == false); + + REQUIRE(IOAccess{IOAccess::amul}.is_superset_of(IOAccess{IOAccess::read}) == false); + REQUIRE(IOAccess{IOAccess::amul}.is_superset_of(IOAccess{IOAccess::write}) == false); + REQUIRE(IOAccess{IOAccess::amul}.is_superset_of(IOAccess{IOAccess::aadd}) == false); + REQUIRE(IOAccess{IOAccess::amul}.is_superset_of(IOAccess{IOAccess::amul}) == true); } TEST_CASE("AreaAccess") { // --[-----]--(-----)-- - REQUIRE( AreaAccess::is_serial( AreaAccess({10, 20}), AreaAccess({30, 40}) ) == false ); - REQUIRE( AreaAccess({10, 20}).is_superset_of( AreaAccess({30, 40}) ) == false ); + REQUIRE(AreaAccess::is_serial(AreaAccess({10, 20}), AreaAccess({30, 40})) == false); + REQUIRE(AreaAccess({10, 20}).is_superset_of(AreaAccess({30, 40})) == false); // --(-----)--[-----]-- - REQUIRE( AreaAccess::is_serial( AreaAccess({30, 40}), AreaAccess({10, 20}) ) == false ); - REQUIRE( AreaAccess({30, 40}).is_superset_of( AreaAccess({10, 20}) ) == false ); + REQUIRE(AreaAccess::is_serial(AreaAccess({30, 40}), AreaAccess({10, 20})) == false); + REQUIRE(AreaAccess({30, 40}).is_superset_of(AreaAccess({10, 20})) == false); // --[--(--]--)-- - REQUIRE( AreaAccess::is_serial( AreaAccess({10, 20}), AreaAccess({15, 25}) ) == true ); - REQUIRE( AreaAccess({10, 20}).is_superset_of( AreaAccess({15, 25}) ) == false ); + REQUIRE(AreaAccess::is_serial(AreaAccess({10, 20}), AreaAccess({15, 25})) == true); + REQUIRE(AreaAccess({10, 20}).is_superset_of(AreaAccess({15, 25})) == false); // --(--[--)--]-- - REQUIRE( AreaAccess::is_serial( AreaAccess({15, 25}), AreaAccess({10, 20}) ) == true ); - REQUIRE( AreaAccess({15, 15}).is_superset_of( AreaAccess({10, 20}) ) == false ); + REQUIRE(AreaAccess::is_serial(AreaAccess({15, 25}), AreaAccess({10, 20})) == true); + REQUIRE(AreaAccess({15, 15}).is_superset_of(AreaAccess({10, 20})) == false); // --[--(--)--]-- - REQUIRE( AreaAccess::is_serial( AreaAccess({10, 30}), AreaAccess({15, 25}) ) == true ); - REQUIRE( AreaAccess({10, 30}).is_superset_of( AreaAccess({15, 25}) ) == true ); + REQUIRE(AreaAccess::is_serial(AreaAccess({10, 30}), AreaAccess({15, 25})) == true); + REQUIRE(AreaAccess({10, 30}).is_superset_of(AreaAccess({15, 25})) == true); // --(--[--]--)-- - REQUIRE( AreaAccess::is_serial( AreaAccess({15, 25}), AreaAccess({10, 30}) ) == true ); - REQUIRE( AreaAccess({15, 25}).is_superset_of( AreaAccess({10, 30}) ) == false ); + REQUIRE(AreaAccess::is_serial(AreaAccess({15, 25}), AreaAccess({10, 30})) == true); + REQUIRE(AreaAccess({15, 25}).is_superset_of(AreaAccess({10, 30})) == false); } TEST_CASE("CombineAccess") { - using A = CombineAccess< - IOAccess, - AreaAccess, - And_t - >; - - REQUIRE(A::is_serial( - A(IOAccess{IOAccess::read}, AreaAccess({10, 20})), - A(IOAccess{IOAccess::read}, AreaAccess({15, 25}))) - == false); - - REQUIRE(A::is_serial( - A(IOAccess{IOAccess::read}, AreaAccess({10, 20})), - A(IOAccess{IOAccess::write}, AreaAccess({15, 25}))) - == true); - - REQUIRE(A::is_serial( - A(IOAccess{IOAccess::read}, AreaAccess({10, 20})), - A(IOAccess{IOAccess::write}, AreaAccess({30, 40}))) - == false); - - REQUIRE(A(IOAccess{IOAccess::read}, AreaAccess({10, 20})) - .is_superset_of( - A(IOAccess{IOAccess::read}, AreaAccess({15, 25}))) - == false ); + using A = CombineAccess; - REQUIRE(A(IOAccess{IOAccess::write}, AreaAccess({10, 30})) - .is_superset_of( + REQUIRE( + A::is_serial( + A(IOAccess{IOAccess::read}, AreaAccess({10, 20})), A(IOAccess{IOAccess::read}, AreaAccess({15, 25}))) - == true ); - - using B = CombineAccess< - IOAccess, - AreaAccess, - Or_t - >; - - REQUIRE(B::is_serial( - B(IOAccess{IOAccess::read}, AreaAccess({10, 20})), - B(IOAccess{IOAccess::read}, AreaAccess({30, 40}))) - == false); - - REQUIRE(B::is_serial( - B(IOAccess{IOAccess::read}, AreaAccess({10, 20})), - B(IOAccess{IOAccess::read}, AreaAccess({15, 25}))) - == true); - - REQUIRE(B::is_serial( - B(IOAccess{IOAccess::read}, AreaAccess({10, 20})), - B(IOAccess{IOAccess::write}, AreaAccess({15, 25}))) - == true); - - REQUIRE(B::is_serial( - B(IOAccess{IOAccess::read}, AreaAccess({10, 20})), - B(IOAccess{IOAccess::write}, AreaAccess({30, 40}))) - == true); - + == false); + + REQUIRE( + A::is_serial( + A(IOAccess{IOAccess::read}, AreaAccess({10, 20})), + A(IOAccess{IOAccess::write}, AreaAccess({15, 25}))) + == true); + + REQUIRE( + A::is_serial( + A(IOAccess{IOAccess::read}, AreaAccess({10, 20})), + A(IOAccess{IOAccess::write}, AreaAccess({30, 40}))) + == false); + + REQUIRE( + A(IOAccess{IOAccess::read}, AreaAccess({10, 20})) + .is_superset_of(A(IOAccess{IOAccess::read}, AreaAccess({15, 25}))) + == false); + + REQUIRE( + A(IOAccess{IOAccess::write}, AreaAccess({10, 30})) + .is_superset_of(A(IOAccess{IOAccess::read}, AreaAccess({15, 25}))) + == true); + + using B = CombineAccess; + + REQUIRE( + B::is_serial( + B(IOAccess{IOAccess::read}, AreaAccess({10, 20})), + B(IOAccess{IOAccess::read}, AreaAccess({30, 40}))) + == false); + + REQUIRE( + B::is_serial( + B(IOAccess{IOAccess::read}, AreaAccess({10, 20})), + B(IOAccess{IOAccess::read}, AreaAccess({15, 25}))) + == true); + + REQUIRE( + B::is_serial( + B(IOAccess{IOAccess::read}, AreaAccess({10, 20})), + B(IOAccess{IOAccess::write}, AreaAccess({15, 25}))) + == true); + + REQUIRE( + B::is_serial( + B(IOAccess{IOAccess::read}, AreaAccess({10, 20})), + B(IOAccess{IOAccess::write}, AreaAccess({30, 40}))) + == true); } TEST_CASE("ArrayAccess") { - using A = ArrayAccess< - IOAccess, - 2, - And_t - >; - - REQUIRE(A::is_serial( - A({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} }), - A({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} })) - == false); - - REQUIRE(A::is_serial( - A({ IOAccess{IOAccess::read}, IOAccess{IOAccess::write} }), - A({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} })) - == false); - - REQUIRE(A::is_serial( - A({ IOAccess{IOAccess::write}, IOAccess{IOAccess::write} }), - A({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} })) - == true); - - REQUIRE(A({ IOAccess{IOAccess::read}, IOAccess{IOAccess::write} }) - .is_superset_of( - A({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} })) - == true ); - - REQUIRE(A({ IOAccess{IOAccess::read}, IOAccess{IOAccess::write} }) - .is_superset_of( - A({ IOAccess{IOAccess::write}, IOAccess{IOAccess::read} })) - == false ); - - using B = ArrayAccess< - IOAccess, - 2, - Or_t - >; - - - REQUIRE(B::is_serial( - B({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} }), - B({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} })) - == false); - - REQUIRE(B::is_serial( - B({ IOAccess{IOAccess::read}, IOAccess{IOAccess::write} }), - B({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} })) - == true); - - REQUIRE(B::is_serial( - B({ IOAccess{IOAccess::write}, IOAccess{IOAccess::write} }), - B({ IOAccess{IOAccess::read}, IOAccess{IOAccess::read} })) - == true); + using A = ArrayAccess; + + REQUIRE( + A::is_serial( + A({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}}), + A({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}})) + == false); + + REQUIRE( + A::is_serial( + A({IOAccess{IOAccess::read}, IOAccess{IOAccess::write}}), + A({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}})) + == false); + + REQUIRE( + A::is_serial( + A({IOAccess{IOAccess::write}, IOAccess{IOAccess::write}}), + A({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}})) + == true); + + REQUIRE( + A({IOAccess{IOAccess::read}, IOAccess{IOAccess::write}}) + .is_superset_of(A({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}})) + == true); + + REQUIRE( + A({IOAccess{IOAccess::read}, IOAccess{IOAccess::write}}) + .is_superset_of(A({IOAccess{IOAccess::write}, IOAccess{IOAccess::read}})) + == false); + + using B = ArrayAccess; + + + REQUIRE( + B::is_serial( + B({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}}), + B({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}})) + == false); + + REQUIRE( + B::is_serial( + B({IOAccess{IOAccess::read}, IOAccess{IOAccess::write}}), + B({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}})) + == true); + + REQUIRE( + B::is_serial( + B({IOAccess{IOAccess::write}, IOAccess{IOAccess::write}}), + B({IOAccess{IOAccess::read}, IOAccess{IOAccess::read}})) + == true); } TEST_CASE("FieldAccess") { using Arr = ArrayAccess; - REQUIRE(FieldAccess<3>::is_serial( - FieldAccess<3>( - IOAccess{IOAccess::read}, - Arr({ - AreaAccess({0,10}), - AreaAccess({0,10}), - AreaAccess({0,10})})), - FieldAccess<3>( - IOAccess{IOAccess::read}, - Arr({ - AreaAccess({0,10}), - AreaAccess({0,10}), - AreaAccess({0,10})}))) - == false); - - REQUIRE(FieldAccess<3>::is_serial( - FieldAccess<3>( - IOAccess{IOAccess::write}, - Arr({ - AreaAccess({0,10}), - AreaAccess({0,10}), - AreaAccess({0,10})})), - FieldAccess<3>( - IOAccess{IOAccess::read}, - Arr({ - AreaAccess({0,10}), - AreaAccess({0,10}), - AreaAccess({0,10})}))) - == true); + REQUIRE( + FieldAccess<3>::is_serial( + FieldAccess<3>( + IOAccess{IOAccess::read}, + Arr({AreaAccess({0, 10}), AreaAccess({0, 10}), AreaAccess({0, 10})})), + FieldAccess<3>( + IOAccess{IOAccess::read}, + Arr({AreaAccess({0, 10}), AreaAccess({0, 10}), AreaAccess({0, 10})}))) + == false); + + REQUIRE( + FieldAccess<3>::is_serial( + FieldAccess<3>( + IOAccess{IOAccess::write}, + Arr({AreaAccess({0, 10}), AreaAccess({0, 10}), AreaAccess({0, 10})})), + FieldAccess<3>( + IOAccess{IOAccess::read}, + Arr({AreaAccess({0, 10}), AreaAccess({0, 10}), AreaAccess({0, 10})}))) + == true); } - diff --git a/test/chunked_list.cpp b/test/chunked_list.cpp index 3efaee53..99d01f58 100644 --- a/test/chunked_list.cpp +++ b/test/chunked_list.cpp @@ -1,15 +1,19 @@ -#include -#include -#include - #include #include +#include +#include + +#include + struct TestItem { int id; - TestItem(int id):id(id){} + TestItem(int id) : id(id) + { + } + ~TestItem() { spdlog::info("destroy {}", this->id); @@ -50,7 +54,7 @@ TEST_CASE("Chunked List") for( auto it = l.rbegin(); it != l.rend(); ++it ) fmt::print("v = {}\n", it->id); - + fmt::print("--\n"); l.remove(p1); l.remove(p2); @@ -109,12 +113,12 @@ TEST_CASE("ChunkedList singlethreaded") unsigned r2 = 48; unsigned r3 = 49; unsigned r4 = 1023; - + l.remove(r1); l.remove(r2); l.remove(r3); l.remove(r4); - + // check that backward iterator skips removed elements unsigned i = 4096; for( auto it = l.rbegin(); it != l.rend(); ++it ) diff --git a/test/cv.cpp b/test/cv.cpp index 7e53bce1..da05bfd1 100644 --- a/test/cv.cpp +++ b/test/cv.cpp @@ -1,35 +1,38 @@ -#include -#include #include + +#include + #include +#include TEST_CASE("CV") { - for(int i=0; i< 5000; ++i) + for(int i = 0; i < 5000; ++i) { std::atomic finished = {false}; - volatile bool start = false; + bool volatile start = false; redGrapes::CondVar cv; - std::thread t([&] { - /* wait should run through without waiting, - * because notify came before wait - */ - cv.wait(); - finished = true; - }); + std::thread t( + [&] + { + /* wait should run through without waiting, + * because notify came before wait + */ + cv.wait(); + finished = true; + }); cv.notify(); - + auto end = std::chrono::steady_clock::now() + std::chrono::seconds(10); - while( std::chrono::steady_clock::now() < end ) - if( finished ) + while(std::chrono::steady_clock::now() < end) + if(finished) break; - - REQUIRE( finished ); + + REQUIRE(finished); t.join(); } - } diff --git a/test/random_graph.cpp b/test/random_graph.cpp index f028e0b7..3a06da77 100644 --- a/test/random_graph.cpp +++ b/test/random_graph.cpp @@ -1,39 +1,37 @@ -#include +#include "sha256.c" + +#include +#include + #include +#include +#include -#include +#include +#include #include #include -#include +#include #include +#include #include -#include -#include -#include -#include -#include -#include "sha256.c" namespace rg = redGrapes; using namespace std::chrono; - void sleep(std::chrono::microseconds d) { - std::this_thread::sleep_for( d ); + std::this_thread::sleep_for(d); } -void hash(unsigned task_id, - std::array & val) +void hash(unsigned task_id, std::array& val) { val[0] += task_id; - uint32_t state[8] = { - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, - 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 - }; + uint32_t state[8] + = {0x6a09'e667, 0xbb67'ae85, 0x3c6e'f372, 0xa54f'f53a, 0x510e'527f, 0x9b05'688c, 0x1f83'd9ab, 0x5be0'cd19}; - sha256_process(state, (uint8_t*)&val[0], sizeof(val)); + sha256_process(state, (uint8_t*) &val[0], sizeof(val)); } std::chrono::microseconds task_duration(2); @@ -72,21 +70,21 @@ void generate_access_pattern() access_pattern[i].push_back(resource_id); hash(i, expected_hash[resource_id]); - if( path_length[resource_id] > max_path_length ) + if(path_length[resource_id] > max_path_length) max_path_length = path_length[resource_id]; break; } } - for( unsigned rid : access_pattern[i] ) + for(unsigned rid : access_pattern[i]) path_length[rid] = max_path_length + 1; } } unsigned max_path_length = 1; - for( unsigned pl : path_length ) - if( pl > max_path_length ) + for(unsigned pl : path_length) + if(pl > max_path_length) max_path_length = pl; std::cout << "max path length = " << max_path_length << std::endl; @@ -99,9 +97,9 @@ TEST_CASE("RandomGraph") generate_access_pattern(); rg::init(n_threads); - + { - std::vector>> resources(n_resources); + std::vector>> resources(n_resources); for(int i = 0; i < n_tasks; ++i) switch(access_pattern[i].size()) @@ -112,80 +110,79 @@ TEST_CASE("RandomGraph") case 1: rg::emplace_task( - [i](auto ra1) - { - sleep(task_duration); - hash(i, *ra1); - }, - resources[access_pattern[i][0]].write()); + [i](auto ra1) + { + sleep(task_duration); + hash(i, *ra1); + }, + resources[access_pattern[i][0]].write()); break; case 2: rg::emplace_task( - [i](auto ra1, auto ra2) - { - sleep(task_duration); - hash(i, *ra1); - hash(i, *ra2); - }, - resources[access_pattern[i][0]].write(), - resources[access_pattern[i][1]].write()); + [i](auto ra1, auto ra2) + { + sleep(task_duration); + hash(i, *ra1); + hash(i, *ra2); + }, + resources[access_pattern[i][0]].write(), + resources[access_pattern[i][1]].write()); break; case 3: rg::emplace_task( - [i](auto ra1, auto ra2, auto ra3) - { - sleep(task_duration); - hash(i, *ra1); - hash(i, *ra2); - hash(i, *ra3); - }, - resources[access_pattern[i][0]].write(), - resources[access_pattern[i][1]].write(), - resources[access_pattern[i][2]].write()); + [i](auto ra1, auto ra2, auto ra3) + { + sleep(task_duration); + hash(i, *ra1); + hash(i, *ra2); + hash(i, *ra3); + }, + resources[access_pattern[i][0]].write(), + resources[access_pattern[i][1]].write(), + resources[access_pattern[i][2]].write()); break; case 4: rg::emplace_task( - [i](auto ra1, auto ra2, auto ra3, auto ra4) - { - sleep(task_duration); - hash(i, *ra1); - hash(i, *ra2); - hash(i, *ra3); - hash(i, *ra4); - }, - resources[access_pattern[i][0]].write(), - resources[access_pattern[i][1]].write(), - resources[access_pattern[i][2]].write(), - resources[access_pattern[i][3]].write()); + [i](auto ra1, auto ra2, auto ra3, auto ra4) + { + sleep(task_duration); + hash(i, *ra1); + hash(i, *ra2); + hash(i, *ra3); + hash(i, *ra4); + }, + resources[access_pattern[i][0]].write(), + resources[access_pattern[i][1]].write(), + resources[access_pattern[i][2]].write(), + resources[access_pattern[i][3]].write()); break; case 5: rg::emplace_task( - [i](auto ra1, auto ra2, auto ra3, auto ra4, auto ra5) - { - sleep(task_duration); - hash(i, *ra1); - hash(i, *ra2); - hash(i, *ra3); - hash(i, *ra4); - hash(i, *ra5); - }, - resources[access_pattern[i][0]].write(), - resources[access_pattern[i][1]].write(), - resources[access_pattern[i][2]].write(), - resources[access_pattern[i][3]].write(), - resources[access_pattern[i][4]].write()); + [i](auto ra1, auto ra2, auto ra3, auto ra4, auto ra5) + { + sleep(task_duration); + hash(i, *ra1); + hash(i, *ra2); + hash(i, *ra3); + hash(i, *ra4); + hash(i, *ra5); + }, + resources[access_pattern[i][0]].write(), + resources[access_pattern[i][1]].write(), + resources[access_pattern[i][2]].write(), + resources[access_pattern[i][3]].write(), + resources[access_pattern[i][4]].write()); break; } rg::barrier(); for(int i = 0; i < n_resources; ++i) - REQUIRE( *resources[i] == expected_hash[i] ); + REQUIRE(*resources[i] == expected_hash[i]); } - rg::finalize(); + rg::finalize(); } - diff --git a/test/resource.cpp b/test/resource.cpp index 921d9761..cfe1d494 100644 --- a/test/resource.cpp +++ b/test/resource.cpp @@ -1,24 +1,30 @@ -#include - -#include -#include #include +#include +#include + +#include struct Access { static bool is_serial(Access a, Access b) - { return true; } + { + return true; + } bool is_synchronizing() const { return true; } - + bool is_superset_of(Access a) const - { return true; } + { + return true; + } - bool operator==(Access const & other) const - { return false; } + bool operator==(Access const& other) const + { + return false; + } }; template<> @@ -39,19 +45,19 @@ struct fmt::formatter TEST_CASE("Resource ID") { redGrapes::init(1); - redGrapes::Resource< Access > a, b; + redGrapes::Resource a, b; // same resource - REQUIRE( redGrapes::ResourceAccess::is_serial( a.make_access(Access{}), a.make_access(Access{}) ) == true ); - REQUIRE( redGrapes::ResourceAccess::is_serial( b.make_access(Access{}), b.make_access(Access{}) ) == true ); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.make_access(Access{}), a.make_access(Access{})) == true); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.make_access(Access{}), b.make_access(Access{})) == true); // same resource, but copied - redGrapes::Resource< Access > a2(a); - REQUIRE( redGrapes::ResourceAccess::is_serial( a.make_access(Access{}), a2.make_access(Access{}) ) == true ); + redGrapes::Resource a2(a); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.make_access(Access{}), a2.make_access(Access{})) == true); // different resource - REQUIRE( redGrapes::ResourceAccess::is_serial( a.make_access(Access{}), b.make_access(Access{}) ) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial( b.make_access(Access{}), a.make_access(Access{}) ) == false ); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.make_access(Access{}), b.make_access(Access{})) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.make_access(Access{}), a.make_access(Access{})) == false); redGrapes::finalize(); } @@ -60,26 +66,25 @@ TEST_CASE("IOResource") redGrapes::init(1); redGrapes::IOResource a, b; - REQUIRE( redGrapes::ResourceAccess::is_serial(a.read(), a.read()) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial(a.read(), a.write()) == true ); - REQUIRE( redGrapes::ResourceAccess::is_serial(a.write(), a.read()) == true ); - REQUIRE( redGrapes::ResourceAccess::is_serial(a.write(), a.write()) == true ); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.read(), a.read()) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.read(), a.write()) == true); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.write(), a.read()) == true); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.write(), a.write()) == true); - REQUIRE( redGrapes::ResourceAccess::is_serial(b.read(), b.read()) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial(b.read(), b.write()) == true ); - REQUIRE( redGrapes::ResourceAccess::is_serial(b.write(), b.read()) == true ); - REQUIRE( redGrapes::ResourceAccess::is_serial(b.write(), b.write()) == true ); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.read(), b.read()) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.read(), b.write()) == true); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.write(), b.read()) == true); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.write(), b.write()) == true); - REQUIRE( redGrapes::ResourceAccess::is_serial(a.read(), b.read()) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial(a.read(), b.write()) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial(a.write(), b.read()) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial(a.write(), b.write()) == false ); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.read(), b.read()) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.read(), b.write()) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.write(), b.read()) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(a.write(), b.write()) == false); - REQUIRE( redGrapes::ResourceAccess::is_serial(b.read(), a.read()) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial(b.read(), a.write()) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial(b.write(), a.read()) == false ); - REQUIRE( redGrapes::ResourceAccess::is_serial(b.write(), a.write()) == false ); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.read(), a.read()) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.read(), a.write()) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.write(), a.read()) == false); + REQUIRE(redGrapes::ResourceAccess::is_serial(b.write(), a.write()) == false); redGrapes::finalize(); } - diff --git a/test/resource_user.cpp b/test/resource_user.cpp index 5168ac8f..94a4b993 100644 --- a/test/resource_user.cpp +++ b/test/resource_user.cpp @@ -1,10 +1,10 @@ -#include - #include #include #include +#include + TEST_CASE("Resource User") { redGrapes::init(); @@ -17,49 +17,48 @@ TEST_CASE("Resource User") redGrapes::ResourceUser f4({b.read(), b.write()}); redGrapes::ResourceUser f5({a.read(), a.write(), b.read(), b.write()}); - REQUIRE( redGrapes::ResourceUser::is_serial(f1, f1) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f1, f2) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f1, f3) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f1, f4) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f1, f5) == true ); + REQUIRE(redGrapes::ResourceUser::is_serial(f1, f1) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f1, f2) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f1, f3) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f1, f4) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f1, f5) == true); - REQUIRE( redGrapes::ResourceUser::is_serial(f2, f1) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f2, f2) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f2, f3) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f2, f4) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f2, f5) == true ); + REQUIRE(redGrapes::ResourceUser::is_serial(f2, f1) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f2, f2) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f2, f3) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f2, f4) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f2, f5) == true); - REQUIRE( redGrapes::ResourceUser::is_serial(f3, f1) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f3, f2) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f3, f3) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f3, f4) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f3, f5) == true ); + REQUIRE(redGrapes::ResourceUser::is_serial(f3, f1) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f3, f2) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f3, f3) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f3, f4) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f3, f5) == true); - REQUIRE( redGrapes::ResourceUser::is_serial(f4, f1) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f4, f2) == false ); - REQUIRE( redGrapes::ResourceUser::is_serial(f4, f3) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f4, f4) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f4, f5) == true ); + REQUIRE(redGrapes::ResourceUser::is_serial(f4, f1) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f4, f2) == false); + REQUIRE(redGrapes::ResourceUser::is_serial(f4, f3) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f4, f4) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f4, f5) == true); - REQUIRE( redGrapes::ResourceUser::is_serial(f5, f1) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f5, f2) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f5, f3) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f5, f4) == true ); - REQUIRE( redGrapes::ResourceUser::is_serial(f5, f5) == true ); + REQUIRE(redGrapes::ResourceUser::is_serial(f5, f1) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f5, f2) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f5, f3) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f5, f4) == true); + REQUIRE(redGrapes::ResourceUser::is_serial(f5, f5) == true); - REQUIRE( f1.is_superset_of(f1) == true ); - REQUIRE( f1.is_superset_of(f2) == false ); - REQUIRE( f1.is_superset_of(f3) == false ); - REQUIRE( f1.is_superset_of(f4) == false ); - REQUIRE( f1.is_superset_of(f5) == false ); + REQUIRE(f1.is_superset_of(f1) == true); + REQUIRE(f1.is_superset_of(f2) == false); + REQUIRE(f1.is_superset_of(f3) == false); + REQUIRE(f1.is_superset_of(f4) == false); + REQUIRE(f1.is_superset_of(f5) == false); - REQUIRE( f2.is_superset_of(f1) == true ); - REQUIRE( f2.is_superset_of(f2) == true ); - REQUIRE( f2.is_superset_of(f3) == false ); - REQUIRE( f2.is_superset_of(f4) == false ); - REQUIRE( f2.is_superset_of(f5) == false ); + REQUIRE(f2.is_superset_of(f1) == true); + REQUIRE(f2.is_superset_of(f2) == true); + REQUIRE(f2.is_superset_of(f3) == false); + REQUIRE(f2.is_superset_of(f4) == false); + REQUIRE(f2.is_superset_of(f5) == false); redGrapes::finalize(); } - diff --git a/test/scheduler.cpp b/test/scheduler.cpp index 0a1e516f..09413586 100644 --- a/test/scheduler.cpp +++ b/test/scheduler.cpp @@ -1,45 +1,46 @@ +#include +#include + #include +#include -#include +#include +#include #include #include -#include +#include #include +#include #include -#include -#include -#include -#include -#include namespace rg = redGrapes; using namespace std::chrono; -void test_worker_utilization( unsigned n_workers ) +void test_worker_utilization(unsigned n_workers) { rg::init(n_workers); spdlog::set_pattern("[thread %t] %^[%l]%$ %v"); - std::atomic< unsigned > count(0); + std::atomic count(0); - for( unsigned i=0; i < n_workers; ++i ) + for(unsigned i = 0; i < n_workers; ++i) { rg::emplace_task( - [&count] { - count ++; - std::this_thread::sleep_for(milliseconds(300)); - } - ); + [&count] + { + count++; + std::this_thread::sleep_for(milliseconds(300)); + }); } auto end = std::chrono::steady_clock::now() + std::chrono::seconds(10); - while( std::chrono::steady_clock::now() < end ) - if( count == n_workers ) + while(std::chrono::steady_clock::now() < end) + if(count == n_workers) break; - REQUIRE( count == n_workers ); + REQUIRE(count == n_workers); - rg::finalize(); + rg::finalize(); } /* @@ -48,10 +49,8 @@ void test_worker_utilization( unsigned n_workers ) */ TEST_CASE("WorkerUtilization") { - for( int i = 1; i < std::thread::hardware_concurrency(); i += 5) + for(int i = 1; i < std::thread::hardware_concurrency(); i += 5) test_worker_utilization(i); test_worker_utilization(std::thread::hardware_concurrency()); } - - diff --git a/test/sha256.c b/test/sha256.c index 16c93cde..cbc78d24 100644 --- a/test/sha256.c +++ b/test/sha256.c @@ -4,44 +4,34 @@ /* xlc -DTEST_MAIN sha256.c -o sha256.exe */ /* gcc -DTEST_MAIN -std=c99 sha256.c -o sha256.exe */ +#include #include #include -#include - -static const uint32_t K256[] = -{ - 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, - 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, - 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, - 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, - 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, - 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, - 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, - 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, - 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, - 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, - 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, - 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, - 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, - 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, - 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, - 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2 -}; - -#define ROTATE(x,y) (((x)>>(y)) | ((x)<<(32-(y)))) -#define Sigma0(x) (ROTATE((x), 2) ^ ROTATE((x),13) ^ ROTATE((x),22)) -#define Sigma1(x) (ROTATE((x), 6) ^ ROTATE((x),11) ^ ROTATE((x),25)) -#define sigma0(x) (ROTATE((x), 7) ^ ROTATE((x),18) ^ ((x)>> 3)) -#define sigma1(x) (ROTATE((x),17) ^ ROTATE((x),19) ^ ((x)>>10)) -#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) -#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +static const uint32_t K256[] + = {0x428A'2F98, 0x7137'4491, 0xB5C0'FBCF, 0xE9B5'DBA5, 0x3956'C25B, 0x59F1'11F1, 0x923F'82A4, 0xAB1C'5ED5, + 0xD807'AA98, 0x1283'5B01, 0x2431'85BE, 0x550C'7DC3, 0x72BE'5D74, 0x80DE'B1FE, 0x9BDC'06A7, 0xC19B'F174, + 0xE49B'69C1, 0xEFBE'4786, 0x0FC1'9DC6, 0x240C'A1CC, 0x2DE9'2C6F, 0x4A74'84AA, 0x5CB0'A9DC, 0x76F9'88DA, + 0x983E'5152, 0xA831'C66D, 0xB003'27C8, 0xBF59'7FC7, 0xC6E0'0BF3, 0xD5A7'9147, 0x06CA'6351, 0x1429'2967, + 0x27B7'0A85, 0x2E1B'2138, 0x4D2C'6DFC, 0x5338'0D13, 0x650A'7354, 0x766A'0ABB, 0x81C2'C92E, 0x9272'2C85, + 0xA2BF'E8A1, 0xA81A'664B, 0xC24B'8B70, 0xC76C'51A3, 0xD192'E819, 0xD699'0624, 0xF40E'3585, 0x106A'A070, + 0x19A4'C116, 0x1E37'6C08, 0x2748'774C, 0x34B0'BCB5, 0x391C'0CB3, 0x4ED8'AA4A, 0x5B9C'CA4F, 0x682E'6FF3, + 0x748F'82EE, 0x78A5'636F, 0x84C8'7814, 0x8CC7'0208, 0x90BE'FFFA, 0xA450'6CEB, 0xBEF9'A3F7, 0xC671'78F2}; + +#define ROTATE(x, y) (((x) >> (y)) | ((x) << (32 - (y)))) +#define Sigma0(x) (ROTATE((x), 2) ^ ROTATE((x), 13) ^ ROTATE((x), 22)) +#define Sigma1(x) (ROTATE((x), 6) ^ ROTATE((x), 11) ^ ROTATE((x), 25)) +#define sigma0(x) (ROTATE((x), 7) ^ ROTATE((x), 18) ^ ((x) >> 3)) +#define sigma1(x) (ROTATE((x), 17) ^ ROTATE((x), 19) ^ ((x) >> 10)) + +#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) /* Avoid undefined behavior */ /* https://stackoverflow.com/q/29538935/608639 */ uint32_t B2U32(uint8_t val, uint8_t sh) { - return ((uint32_t)val) << sh; + return ((uint32_t) val) << sh; } /* Process multiple blocks. The caller is responsible for setting the initial */ @@ -52,7 +42,7 @@ void sha256_process(uint32_t state[8], const uint8_t data[], uint32_t length) uint32_t X[16], i; size_t blocks = length / 64; - while (blocks--) + while(blocks--) { a = state[0]; b = state[1]; @@ -63,7 +53,7 @@ void sha256_process(uint32_t state[8], const uint8_t data[], uint32_t length) g = state[6]; h = state[7]; - for (i = 0; i < 16; i++) + for(i = 0; i < 16; i++) { X[i] = B2U32(data[0], 24) | B2U32(data[1], 16) | B2U32(data[2], 8) | B2U32(data[3], 0); data += 4; @@ -87,7 +77,7 @@ void sha256_process(uint32_t state[8], const uint8_t data[], uint32_t length) a = T1 + T2; } - for (; i < 64; i++) + for(; i < 64; i++) { s0 = X[(i + 1) & 0x0f]; s0 = sigma0(s0); @@ -117,4 +107,3 @@ void sha256_process(uint32_t state[8], const uint8_t data[], uint32_t length) state[7] += h; } } - diff --git a/test/task_space.cpp b/test/task_space.cpp index a5ccddcc..14588be8 100644 --- a/test/task_space.cpp +++ b/test/task_space.cpp @@ -1,16 +1,16 @@ -#include - #include +#include + struct TestTask { unsigned id; unsigned task_id; - //using VertexPtr = std::shared_ptr>; - //using WeakVertexPtr = std::weak_ptr>; - + // using VertexPtr = std::shared_ptr>; + // using WeakVertexPtr = std::weak_ptr>; + /* * Create the following graph: * 0 2 @@ -19,15 +19,11 @@ struct TestTask * | / \ / * 6 5 7 */ - static bool is_serial(TestTask const & a, TestTask const & b) + static bool is_serial(TestTask const& a, TestTask const& b) { - return (a.id == 0 && b.id == 1) - || (a.id == 0 && b.id == 3) - || (a.id == 2 && b.id == 4) - || (a.id == 1 && b.id == 6) - || (a.id == 3 && b.id == 5) - || (a.id == 3 && b.id == 7) - || (a.id == 4 && b.id == 7); + return (a.id == 0 && b.id == 1) || (a.id == 0 && b.id == 3) || (a.id == 2 && b.id == 4) + || (a.id == 1 && b.id == 6) || (a.id == 3 && b.id == 5) || (a.id == 3 && b.id == 7) + || (a.id == 4 && b.id == 7); } }; @@ -81,4 +77,3 @@ TEST_CASE("precedence graph") } */ } -