From e8b5f08392ede52bc992e4ed91bd31a8161cf2c8 Mon Sep 17 00:00:00 2001 From: Ilya Yaroshenko Date: Mon, 17 Oct 2016 16:54:44 +0200 Subject: [PATCH] switch to native complex numbers (#367) * rework glas * add const * use native complex numbers * remove complex and update sum * fix style --- benchmarks/glas/gemm_bench.d | 29 +++-- benchmarks/glas/gemm_report.d | 28 +++-- benchmarks/glas/symm_bench.d | 29 +++-- source/mir/glas/common.d | 52 +-------- source/mir/glas/internal/blocking.d | 32 +++--- source/mir/glas/internal/config.d | 1 - source/mir/glas/internal/context.d | 66 ++++++++---- source/mir/glas/internal/copy.d | 24 ++--- source/mir/glas/internal/gemm.d | 15 ++- source/mir/glas/internal/symm.d | 10 +- source/mir/glas/internal/test.d | 1 - source/mir/glas/internal/trmm.d | 1 - source/mir/glas/internal/trsm.d | 1 - source/mir/glas/l1.d | 85 +++++++-------- source/mir/glas/l2.d | 12 +-- source/mir/glas/l3.d | 65 +++++------- source/mir/internal/utility.d | 11 +- source/mir/sum.d | 157 ++++++++++++++++------------ 18 files changed, 287 insertions(+), 332 deletions(-) diff --git a/benchmarks/glas/gemm_bench.d b/benchmarks/glas/gemm_bench.d index 67d1f41c..50702483 100644 --- a/benchmarks/glas/gemm_bench.d +++ b/benchmarks/glas/gemm_bench.d @@ -2,8 +2,9 @@ /+ dub.json: { "name": "gemm_bench", - "dependencies": {"mir": {"path": "../.."}, "cblas": "~>0.1.0"}, + "dependencies": {"mir": {"path": "../.."}, "cblas": "~>1.0.0"}, "dflags-ldc": ["-mcpu=native"], + "libs": ["blas"], "lflags": ["-L./"] } +/ @@ -11,24 +12,23 @@ import std.math; import std.traits; import std.datetime; import std.conv; -import std.complex; import std.algorithm.comparison; import std.stdio; import std.exception; import std.getopt; import mir.ndslice; import mir.glas; +import mir.internal.utility : isComplex; alias C = float; //alias C = double; -//alias C = Complex!float; -//alias C = Complex!double; +//alias C = cfloat; +//alias C = cdouble; alias A = C; alias B = C; void main(string[] args) { - auto glas = new GlasContext; size_t m = 1000; size_t n = size_t.max; size_t k = size_t.max; @@ -60,10 +60,10 @@ void main(string[] args) d[] = c[]; - static if(is(C : Complex!F, F)) + static if(isComplex!C) { - C alpha = C(3, 7); - C beta = C(2, 5); + C alpha = 3 + 7i; + C beta = 2 + 5i; } else { @@ -71,17 +71,15 @@ void main(string[] args) C beta = 2; } - auto nsecsBLAS = double.max; - foreach(_; 0..count) { StopWatch sw; sw.start; - static if(!(is(C == real) || is(C : Complex!real) || is(C : long))) + static if(!(is(C == real) || is(C == creal) || is(C : long))) { static import cblas; - static if(is(C : Complex!E, E)) + static if(isComplex!C) cblas.gemm( cblas.Order.RowMajor, cblas.Transpose.NoTrans, @@ -128,7 +126,7 @@ void main(string[] args) { StopWatch sw; sw.start; - glas.gemm(alpha, a, b, beta, c); + gemm(alpha, a, b, beta, c); sw.stop; auto newns = sw.peek.to!Duration.total!"nsecs".to!double; //writefln("_GLAS (single thread) : %5s GFLOPS", (m * n * k * 2) / newns); @@ -147,10 +145,9 @@ void fillRNG(T)(Slice!(2, T*) sl) import std.random; foreach(ref e; sl.byElement) { - static if(is(T : Complex!F, F)) + static if(isComplex!T) { - e.re = cast(F) uniform(-100, 100); - e.im = cast(F) uniform(-100, 100); + e = uniform(-100, 100) + uniform(-100, 100) * 1i; } else { diff --git a/benchmarks/glas/gemm_report.d b/benchmarks/glas/gemm_report.d index ad27b6f6..004735a7 100644 --- a/benchmarks/glas/gemm_report.d +++ b/benchmarks/glas/gemm_report.d @@ -2,10 +2,10 @@ /+ dub.json: { "name": "gemm_report", - "dependencies": {"mir": {"path": "../.."}, "cblas": "~>0.2.0"}, + "dependencies": {"mir": {"path": "../.."}, "cblas": "~>1.0.0"}, "lflags": ["-L./"], "libs": ["blas"], - "dflags-ldc": ["-mcpu=native", "-singleobj"], + "dflags-ldc": ["-mcpu=native"], } +/ //"lflags": ["-L/opt/intel/mkl/lib"], @@ -19,18 +19,18 @@ import std.math; import std.traits; import std.datetime; import std.conv; -import std.complex; import std.algorithm.comparison; import std.stdio; import std.exception; import std.getopt; import mir.ndslice; import mir.glas; +import mir.internal.utility : isComplex; alias C = float; //alias C = double; -//alias C = Complex!float; -//alias C = Complex!double; +//alias C = cfloat; +//alias C = cdouble; alias A = C; alias B = C; @@ -41,7 +41,6 @@ size_t[] reportValues = [ void main(string[] args) { - auto glas = new GlasContext; size_t count = 6; auto helpInformation = getopt(args, @@ -69,10 +68,10 @@ void main(string[] args) d[] = c[]; - static if(is(C : Complex!F, F)) + static if(isComplex!C) { - C alpha = C(3, 7); - C beta = C(2, 5); + C alpha = 3 + 7i; + C beta = 2 + 5i; } else { @@ -85,10 +84,10 @@ void main(string[] args) foreach(_; 0..count) { StopWatch sw; sw.start; - static if(!(is(C == real) || is(C : Complex!real) || is(C : long))) + static if(!(is(C == real) || is(C == creal) || is(C : long))) { static import cblas; - static if(is(C : Complex!E, E)) + static if(isComplex!C) cblas.gemm( cblas.Order.RowMajor, cblas.Transpose.NoTrans, @@ -137,7 +136,7 @@ void main(string[] args) { StopWatch sw; sw.start; - glas.gemm(alpha, a, b, beta, c); + gemm(alpha, a, b, beta, c); sw.stop; auto newns = sw.peek.to!Duration.total!"nsecs".to!double; //writefln("_GLAS (single thread) : %5s GFLOPS", (m * n * k * 2) / newns); @@ -154,10 +153,9 @@ void fillRNG(T)(Slice!(2, T*) sl) import std.random; foreach(ref e; sl.byElement) { - static if(is(T : Complex!F, F)) + static if(isComplex!T) { - e.re = cast(F) uniform(-100, 100); - e.im = cast(F) uniform(-100, 100); + e = uniform(-100, 100) + uniform(-100, 100) * 1i; } else { diff --git a/benchmarks/glas/symm_bench.d b/benchmarks/glas/symm_bench.d index 33479d84..8a04938e 100644 --- a/benchmarks/glas/symm_bench.d +++ b/benchmarks/glas/symm_bench.d @@ -2,8 +2,9 @@ /+ dub.json: { "name": "symm_bench", - "dependencies": {"mir": {"path": "../.."}, "cblas": "~>0.1.0"}, + "dependencies": {"mir": {"path": "../.."}, "cblas": "~>1.0.0"}, "dflags-ldc": ["-mcpu=native"], + "libs": ["blas"], "lflags": ["-L./"] } +/ @@ -11,24 +12,23 @@ import std.math; import std.traits; import std.datetime; import std.conv; -import std.complex; import std.algorithm.comparison; import std.stdio; import std.exception; import std.getopt; import mir.ndslice; import mir.glas; +import mir.internal.utility : isComplex; alias C = float; //alias C = double; -//alias C = Complex!float; -//alias C = Complex!double; +//alias C = cfloat; +//alias C = cdouble; alias A = C; alias B = C; void main(string[] args) { - auto glas = new GlasContext; size_t m = 1000; size_t n = size_t.max; size_t k = size_t.max; @@ -59,10 +59,10 @@ void main(string[] args) d[] = c[]; - static if(is(C : Complex!F, F)) + static if(isComplex!C) { - C alpha = C(3, 7); - C beta = C(2, 5); + C alpha = 3 + 7i; + C beta = 2 + 5i; } else { @@ -70,17 +70,15 @@ void main(string[] args) C beta = 2; } - auto nsecsBLAS = double.max; - foreach(_; 0..count) { StopWatch sw; sw.start; - static if(!(is(C == real) || is(C : Complex!real) || is(C : long))) + static if(!(is(C == real) || is(C == creal) || is(C : long))) { static import cblas; - static if(is(C : Complex!E, E)) + static if(isComplex!C) cblas.symm( cblas.Order.RowMajor, cblas.Side.Left, @@ -125,7 +123,7 @@ void main(string[] args) { StopWatch sw; sw.start; - glas.symm(Side.left, Uplo.lower, alpha, a, b, beta, c); + symm(Side.left, Uplo.lower, alpha, a, b, beta, c); sw.stop; auto newns = sw.peek.to!Duration.total!"nsecs".to!double; //writefln("_GLAS (single thread) : %5s GFLOPS", (m * n * m * 2) / newns); @@ -148,10 +146,9 @@ void fillRNG(T)(Slice!(2, T*) sl) import std.random; foreach(ref e; sl.byElement) { - static if(is(T : Complex!F, F)) + static if(isComplex!T) { - e.re = cast(F) uniform(-100, 100); - e.im = cast(F) uniform(-100, 100); + e = uniform(-100, 100) + uniform(-100, 100) * 1i; } else { diff --git a/source/mir/glas/common.d b/source/mir/glas/common.d index 485831c3..d646d483 100644 --- a/source/mir/glas/common.d +++ b/source/mir/glas/common.d @@ -9,53 +9,6 @@ Authors: Ilya Yaroshenko +/ module mir.glas.common; -/++ -GLAS Context - -Note: `GlasContext` is single thread for now. -+/ -struct GlasContext -{ - import mir.internal.memory; - import mir.glas.internal.context; - static import cpuid.unified; - import core.sync.mutex; - - private - { - void[] _memory; - } - -nothrow @nogc: - - ~this() - { - release; - } - - /// Returns: reused unaligned memory chunk - nothrow @nogc void[] memory(size_t size) - { - if (_memory.length < size) - { - auto f = _memory.length << 1; - if (f > size) - size = f; - if (_memory !is null) - deallocate(_memory); - _memory = alignedAllocate(size, 4096); - } - return _memory[0 .. size]; - } - - /// Releases memory. - void release() - { - if (_memory !is null) - deallocate(_memory); - } -} - /++ Uplo specifies whether a matrix is an upper or lower triangular matrix. +/ @@ -153,10 +106,7 @@ package mixin template prefix3() enum PB = CB ? 2 : 1; enum PC = CC ? 2 : 1; - static if (is(C : Complex!F, F)) - alias T = F; - else - alias T = C; + alias T = realType!C; static assert(!isComplex!T); } diff --git a/source/mir/glas/internal/blocking.d b/source/mir/glas/internal/blocking.d index 6c39bc9a..00edad47 100644 --- a/source/mir/glas/internal/blocking.d +++ b/source/mir/glas/internal/blocking.d @@ -1,11 +1,13 @@ module mir.glas.internal.blocking; -import std.traits; +import core.sync.mutex; import std.meta; -import std.complex : Complex; -import mir.internal.utility; -import mir.glas.internal.config; +import std.traits; import mir.glas.common; +import mir.glas.internal.config; +import mir.glas.internal.context; +import mir.internal.utility; +static import cpuid.unified; import ldc.attributes : fastmath; @fastmath: @@ -20,20 +22,20 @@ struct BlockInfo(T) T* b; } -BlockInfo!T blocking(size_t PA, size_t PB, size_t PC, T)(GlasContext* ctx, size_t m, size_t n, size_t k) +BlockInfo!T blocking(size_t PA, size_t PB, size_t PC, T)(size_t m, size_t n, size_t k) { import mir.glas.internal.context; mixin RegisterConfig!(PC, PA, PB, T); BlockInfo!T ret = void; - sizediff_t l2 = c2.size << 9; // half cache + sizediff_t l2 = c2 >> 1; // half cache ret.kc = (l2 - m * T[PC][main_nr].sizeof) / (m * T[PA].sizeof + T[PB][main_nr].sizeof); ret.mc = m; enum minKc = 320 / PC; - auto a = 2 * (T[PC][main_nr][main_mr].sizeof + main_nr * c1.line) + 512; - if (ret.kc < minKc || ret.kc * (T[PA][main_mr].sizeof + T[PB][main_nr].sizeof) + a > c1.size << 10) + auto a = 2 * (T[PC][main_nr][main_mr].sizeof + main_nr * line) + 512; + if (ret.kc < minKc || ret.kc * (T[PA][main_mr].sizeof + T[PB][main_nr].sizeof) + a > c1) { - ret.kc = ((c1.size << 10) - a) / (T[PA][main_mr].sizeof + T[PB][main_nr].sizeof); - assert(c1.size << 10 > main_mr); + ret.kc = (c1 - a) / (T[PA][main_mr].sizeof + T[PB][main_nr].sizeof); + assert(c1 > main_mr); assert(ret.kc > main_mr); ret.kc.normalizeChunkSize!main_mr(k); assert(ret.kc > 0); @@ -48,20 +50,20 @@ BlockInfo!T blocking(size_t PA, size_t PB, size_t PC, T)(GlasContext* ctx, size_ auto a_length = ret.kc * ret.mc * T[PA].sizeof; auto b_length = ret.kc * T[PB].sizeof * (ret.mc == m && false ? main_nr : n); auto buffLength = a_length + b_length; - auto _mem = ctx.memory(a_length + b_length + prefetchShift); + auto _mem = memory(a_length + b_length + prefetchShift); ret.a = cast(T*) _mem.ptr; ret.b = cast(T*) (_mem.ptr + a_length); return ret; } -BlockInfo!T blocking_triangular(size_t PA, size_t PB, T)(GlasContext* ctx, size_t m, size_t n) +BlockInfo!T blocking_triangular(size_t PA, size_t PB, T)(size_t m, size_t n) { import mir.glas.internal.context; mixin RegisterConfig!(PB, PA, PB, T); BlockInfo!T ret = void; - sizediff_t l2 = c2.size << 10; // half matrix - //ret.kc = ((c1.size << 10) - 2 * (T[PB][main_nr][main_mr].sizeof + main_nr * c1.line) - 512) / (T[PA][main_nr].sizeof + T[PB][main_mr].sizeof); + sizediff_t l2 = c2; // half matrix + //ret.kc = (c1 - 2 * (T[PB][main_nr][main_mr].sizeof + main_nr * line) - 512) / (T[PA][main_nr].sizeof + T[PB][main_mr].sizeof); import std.stdio; if (l2 >= (m * ((m + main_nr) * PA + PB * main_mr * 2)) * T.sizeof) @@ -83,7 +85,7 @@ BlockInfo!T blocking_triangular(size_t PA, size_t PB, T)(GlasContext* ctx, size_ auto a_length = ret.kc * ret.mc * T[PA].sizeof; auto b_length = ret.kc * T[PB].sizeof * (ret.mc == m && false ? main_mr : n); auto buffLength = a_length + b_length; - auto _mem = ctx.memory(a_length + b_length + prefetchShift); + auto _mem = memory(a_length + b_length + prefetchShift); ret.b = cast(T*) _mem.ptr; ret.a = cast(T*) (_mem.ptr + b_length); diff --git a/source/mir/glas/internal/config.d b/source/mir/glas/internal/config.d index f3f4d649..e944745d 100644 --- a/source/mir/glas/internal/config.d +++ b/source/mir/glas/internal/config.d @@ -2,7 +2,6 @@ module mir.glas.internal.config; import std.traits; import std.meta; -import std.complex: Complex; import mir.internal.utility; template RegisterConfig(size_t PS, size_t PB, size_t PR, T) diff --git a/source/mir/glas/internal/context.d b/source/mir/glas/internal/context.d index 7f461afe..06e756c8 100644 --- a/source/mir/glas/internal/context.d +++ b/source/mir/glas/internal/context.d @@ -2,10 +2,38 @@ module mir.glas.internal.context; import std.range.primitives; import cpuid.unified; +import mir.internal.memory; -__gshared immutable Cache c1; -__gshared immutable Cache c2; -__gshared immutable Tlb tlb; +__gshared uint c1; +__gshared uint c2; +__gshared uint line; +__gshared void[] _memory; + + +import ldc.attributes : fastmath; +@fastmath: + +/// Returns: reused unaligned memory chunk +nothrow @nogc void[] memory(size_t size) +{ + if (_memory.length < size) + { + auto f = _memory.length << 1; + if (f > size) + size = f; + if (_memory !is null) + deallocate(_memory); + _memory = alignedAllocate(size, 4096); + } + return _memory[0 .. size]; +} + +/// Releases memory. +nothrow @nogc void release() +{ + if (_memory !is null) + deallocate(_memory); +} nothrow @nogc shared static this() @@ -19,43 +47,45 @@ shared static this() enum msg = "MIR: failed to get CPUID information"; - while (!uc.empty && uc.back.size > (1024 * 32)) // > 32 MB is CPU memory + while (!uc.empty && uc.back.size > (1024 * 64)) // > 64 MB is CPU memory { uc.popBack; } if (dc.length) { - c1 = dc.front; + c1 = dc.front.size; + line = dc.front.line; dc.popFront; } else if (uc.length) { - c1 = uc.front; + c1 = uc.front.size; + line = uc.front.line; uc.popFront; } - else assert(0, msg); + else + { + c1 = 16; + } if (uc.length) { - c2 = uc.back; + c2 = uc.back.size; } else if (dc.length) { - c2 = dc.back; - } - else assert(0, msg); - - if (uTlb.length) - { - tlb = uTlb.back; + c2 = dc.back.size; } else - if (dTlb.length) { - tlb = dTlb.back; + c1 = 256; } - else assert(0, msg); + + c1 <<= 10; + c2 <<= 10; + if (line == 0) + line = 64; } diff --git a/source/mir/glas/internal/copy.d b/source/mir/glas/internal/copy.d index abb86214..d5e66461 100644 --- a/source/mir/glas/internal/copy.d +++ b/source/mir/glas/internal/copy.d @@ -2,7 +2,6 @@ module mir.glas.internal.copy; import std.traits; import std.meta; -import std.complex : Complex; import mir.ndslice.slice : Slice; import mir.internal.utility; import mir.glas.internal.config; @@ -12,7 +11,7 @@ import ldc.attributes : fastmath; @fastmath: pragma(inline, true) -T* pack_b_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, sizediff_t elemStride, F* from, T* to) +T* pack_b_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, sizediff_t elemStride, const(F)* from, T* to) { if (elemStride == 1) return pack_b_dense_nano!(n, P, conj)(length, stride, from, to); @@ -21,7 +20,7 @@ T* pack_b_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sized } pragma(inline, false) -T* pack_b_sym_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, Slice!(2, F*) sl, size_t j, size_t i, T* to) +T* pack_b_sym_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, Slice!(2, const(F)*) sl, size_t j, size_t i, T* to) { { sizediff_t diff = i - j; @@ -90,7 +89,7 @@ T* pack_b_sym_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, S } pragma(inline, false) -T* pack_b_strided_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, sizediff_t elemStride, F* from, T* to) +T* pack_b_strided_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, sizediff_t elemStride, const(F)* from, T* to) { enum s = n * P; do @@ -121,7 +120,7 @@ T* pack_b_strided_nano(size_t n, size_t P, bool conj = false, F, T)(size_t lengt } //pragma(inline, false) -T* pack_b_dense_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, F* from, T* to) +T* pack_b_dense_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, const(F)* from, T* to) { enum s = n * P; do @@ -164,7 +163,7 @@ T* pack_b_dense_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, } pragma(inline, true) -T* pack_a_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, sizediff_t elemStride, F* from, T* to) +T* pack_a_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, sizediff_t elemStride, const(F)* from, T* to) { if (elemStride == 1) return pack_a_dense_nano!(n, P, conj)(length, stride, from, to); @@ -173,7 +172,7 @@ T* pack_a_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sized } //pragma(inline, false) -T* pack_a_strided_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, sizediff_t elemStride, F* from, T* to) +T* pack_a_strided_nano(size_t n, size_t P, bool conj = false, F, T)(size_t length, sizediff_t stride, sizediff_t elemStride, const(F)* from, T* to) { static if (P == 1) { @@ -201,7 +200,7 @@ T* pack_a_strided_nano(size_t n, size_t P, bool conj = false, F, T)(size_t lengt } //pragma(inline, false) -T* pack_a_dense_nano(size_t mr, size_t P, bool conj = false, T, F)(size_t length, sizediff_t stride, F* from, T* to) +T* pack_a_dense_nano(size_t mr, size_t P, bool conj = false, T, F)(size_t length, sizediff_t stride, const(F)* from, T* to) { do { @@ -254,7 +253,7 @@ T* pack_a_dense_nano(size_t mr, size_t P, bool conj = false, T, F)(size_t length } pragma(inline, false) -void pack_a(size_t PA, size_t PB, size_t PC, bool conj = false, T, C)(Slice!(2, C*) sl, T* a) +void pack_a(size_t PA, size_t PB, size_t PC, bool conj = false, T, C)(Slice!(2, const(C)*) sl, T* a) { import mir.ndslice.iteration: transposed; mixin RegisterConfig!(PC, PA, PB, T); @@ -281,7 +280,7 @@ void pack_a(size_t PA, size_t PB, size_t PC, bool conj = false, T, C)(Slice!(2, } pragma(inline, false) -void pack_a_sym(size_t PA, size_t PB, size_t PC, bool conj = false, T, F)(Slice!(2, F*) sl, size_t i, size_t t, size_t mc, size_t kc, T* to) +void pack_a_sym(size_t PA, size_t PB, size_t PC, bool conj = false, T, F)(Slice!(2, const(F)*) sl, size_t i, size_t t, size_t mc, size_t kc, T* to) { import mir.ndslice.iteration: transposed, reversed; mixin RegisterConfig!(PC, PA, PB, T); @@ -375,7 +374,7 @@ void pack_a_sym(size_t PA, size_t PB, size_t PC, bool conj = false, T, F)(Slice! //pragma(inline, false) -void pack_b_triangular(Uplo uplo, bool inverseDiagonal, size_t PA, size_t PB, size_t PC, T, C)(Slice!(2, C*) sl, T* b) +void pack_b_triangular(Uplo uplo, bool inverseDiagonal, size_t PA, size_t PB, size_t PC, T, C)(Slice!(2, const(C)*) sl, T* b) { assert(sl.length!0 == sl.length!1); import mir.ndslice.iteration: transposed; @@ -424,7 +423,7 @@ void pack_b_triangular(Uplo uplo, bool inverseDiagonal, size_t PA, size_t PB, si while (!nri && sl.length >= nr); } -void load_simd(size_t mr, size_t P, T)(T* to, T[P]* from) +void load_simd(size_t mr, size_t P, T)(T* to, const(T[P])* from) { static if (mr > 1 && !is(T == real)) { @@ -455,7 +454,6 @@ void load_simd(size_t mr, size_t P, T)(T* to, T[P]* from) //void pack_b(size_t PA, size_t PB, size_t PC, T, C)(Slice!(2, C*) sl, T* b) //{ // import mir.ndslice.iteration: transposed; -// import std.complex: Complex; // mixin RegisterConfig!(PC, PA, PB, T); // if (sl.stride!0 == 1) // { diff --git a/source/mir/glas/internal/gemm.d b/source/mir/glas/internal/gemm.d index ada6ad0a..9cd8f894 100644 --- a/source/mir/glas/internal/gemm.d +++ b/source/mir/glas/internal/gemm.d @@ -1,7 +1,6 @@ module mir.glas.internal.gemm; import std.traits; -import std.complex; import std.meta; public import mir.glas.common; @@ -18,10 +17,9 @@ pragma(inline, false) //nothrow @nogc void gemm_impl(A, B, C) ( - GlasContext* ctx, C alpha, - Slice!(2, A*) asl, - Slice!(2, B*) bsl, + Slice!(2, const(A)*) asl, + Slice!(2, const(B)*) bsl, C beta, Slice!(2, C*) csl, Conjugated conja, @@ -60,7 +58,7 @@ void gemm_impl(A, B, C) } else { - ctx.gemm!(B, A, C)(alpha, bsl.transposed, asl.transposed, beta, csl.transposed, conjb, conja); + gemm_impl!(B, A, C)(alpha, bsl.transposed, asl.transposed, beta, csl.transposed, conjb, conja); return; } } @@ -110,7 +108,7 @@ void gemm_impl(A, B, C) beta_kernels[nri] = &gemv_reg!(BetaType.beta, PA, PB, PC, nr, T); kernels = beta_kernels.ptr; } - auto bl = blocking!(PA, PB, PC, T)(ctx, asl.length!0, bsl.length!1, asl.length!1); + auto bl = blocking!(PA, PB, PC, T)(asl.length!0, bsl.length!1, asl.length!1); with(bl) { sizediff_t incb; @@ -426,9 +424,8 @@ void scale_nano(size_t M, size_t P, size_t N, V, F)(ref const F[P] alpha, ref V[ pragma(inline, true) ref auto castByRef(C)(return ref C val) { - import std.complex: Complex; - static if (is(C : Complex!T, T)) - alias R = T[2]; + static if (isComplex!C) + alias R = typeof(val.re)[2]; else alias R = C[1]; return *cast(R*) &val; diff --git a/source/mir/glas/internal/symm.d b/source/mir/glas/internal/symm.d index 7dd574c4..2a15cb10 100644 --- a/source/mir/glas/internal/symm.d +++ b/source/mir/glas/internal/symm.d @@ -1,7 +1,6 @@ module mir.glas.internal.symm; import std.traits; -import std.complex; import std.meta; public import mir.glas.common; @@ -17,10 +16,9 @@ pragma(inline, false) //nothrow @nogc void symm_impl(A, B, C) ( - GlasContext* ctx, C alpha, - Slice!(2, A*) asl, - Slice!(2, B*) bsl, + Slice!(2, const(A)*) asl, + Slice!(2, const(B)*) bsl, C beta, Slice!(2, C*) csl, Conjugated conja, @@ -114,7 +112,7 @@ void symm_impl(A, B, C) } } - auto bl = blocking!(PA, PB, PC, T)(ctx, asl.length!0, bsl.length!1, asl.length!0); + auto bl = blocking!(PA, PB, PC, T)(asl.length!0, bsl.length!1, asl.length!0); size_t j; sizediff_t incb; if (bl.mc < asl.length!0) @@ -195,7 +193,7 @@ void symm_impl(A, B, C) bsl = bsl.transposed; csl = csl.transposed; assert(csl.stride!0 == 1); - auto bl = blocking!(PB, PA, PC, T)(ctx, bsl.length!0, asl.length!0, bsl.length!1); + auto bl = blocking!(PB, PA, PC, T)(bsl.length!0, asl.length!0, bsl.length!1); sizediff_t incb; if (bl.mc < bsl.length!0) incb = bl.kc; diff --git a/source/mir/glas/internal/test.d b/source/mir/glas/internal/test.d index 407a7c10..999048b3 100644 --- a/source/mir/glas/internal/test.d +++ b/source/mir/glas/internal/test.d @@ -2,7 +2,6 @@ module mir.glas.internal.test; import std.traits; import std.meta; -import std.complex; import std.algorithm: min, max; import mir.ndslice; import mir.glas; diff --git a/source/mir/glas/internal/trmm.d b/source/mir/glas/internal/trmm.d index 858ea1b8..88e8c136 100644 --- a/source/mir/glas/internal/trmm.d +++ b/source/mir/glas/internal/trmm.d @@ -1,7 +1,6 @@ module mir.glas.trmm; import std.traits; -import std.complex; import std.meta; version(none): diff --git a/source/mir/glas/internal/trsm.d b/source/mir/glas/internal/trsm.d index c9f017f3..04ec4061 100644 --- a/source/mir/glas/internal/trsm.d +++ b/source/mir/glas/internal/trsm.d @@ -1,7 +1,6 @@ module mir.glas.trsm; import std.traits; -import std.complex; import std.meta; version(none): diff --git a/source/mir/glas/l1.d b/source/mir/glas/l1.d index 0a81723d..e9644b30 100644 --- a/source/mir/glas/l1.d +++ b/source/mir/glas/l1.d @@ -80,7 +80,6 @@ unittest import std.traits; import std.meta; -import std.complex : Complex, conj; import std.typecons: Flag, Yes, No; import mir.internal.math; @@ -100,11 +99,15 @@ template _rot(alias c, alias s) auto y = yr; auto t1 = c * x + s * y; static if (isComplex!(typeof(c))) - auto t2 = conj(c) * y; + { + auto t2 = (c.re - c.im * 1fi) * y; + } else auto t2 = c * y; static if (isComplex!(typeof(s))) - t2 -= conj(s) * x; + { + t2 -= (s.re - s.im * 1fi) * x; + } else t2 -= s * x; xr = t1; @@ -130,7 +133,7 @@ A _fmuladdc(A, B, C)(A a, in B b, in C c) { static if (isComplex!B) { - return a + conj(b) * c; + return a + (b.re - b.im * 1fi) * c; } else return a + b * c; @@ -263,16 +266,14 @@ unittest unittest { import mir.ndslice.slice: slice; - import std.complex; - alias cd = Complex!double; - - auto a = cd(3, 4); - auto x = slice!cd(2); - auto y = slice!cd(2); - x[] = [cd(0, 1), cd(2, 3)]; - y[] = [cd(4, 5), cd(6, 7)]; + + auto a = 3 + 4i; + auto x = slice!cdouble(2); + auto y = slice!cdouble(2); + x[] = [0 + 1i, 2 + 3i]; + y[] = [4 + 5i, 6 + 7i]; axpy(a, x, y); - assert(y == [a * cd(0, 1) + cd(4, 5), a * cd(2, 3) + cd(6, 7)]); + assert(y == [a * (0 + 1i) + (4 + 5i), a * (2 + 3i) + (6 + 7i)]); } /++ @@ -295,7 +296,7 @@ F dot(F, size_t N, R1, R2)(Slice!(N, R1) x, Slice!(N, R2) y) { assert(x.shape == y.shape, "constraints: x and y must have equal shapes"); pragma(inline, false); - return ndReduce!(_fmuladd, Yes.vectorized)(F(0), x, y); + return ndReduce!(_fmuladd, Yes.vectorized)(cast(F)(0), x, y); } } @@ -342,14 +343,12 @@ unittest unittest { import mir.ndslice.slice: slice; - import std.complex; - alias cd = Complex!double; - auto x = slice!cd(2); - auto y = slice!cd(2); - x[] = [cd(0, 1), cd(2, 3)]; - y[] = [cd(4, 5), cd(6, 7)]; - assert(dot(x, y) == cd(0, 1) * cd(4, 5) + cd(2, 3) * cd(6, 7)); + auto x = slice!cdouble(2); + auto y = slice!cdouble(2); + x[] = [0 + 1i, 2 + 3i]; + y[] = [4 + 5i, 6 + 7i]; + assert(dot(x, y) == (0 + 1i) * (4 + 5i) + (2 + 3i) * (6 + 7i)); } /++ @@ -373,7 +372,7 @@ F dotc(F, size_t N, R1, R2)(Slice!(N, R1) x, Slice!(N, R2) y) { assert(x.shape == y.shape, "constraints: x and y must have equal shapes"); pragma(inline, false); - return ndReduce!(_fmuladdc, Yes.vectorized)(F(0), x, y); + return ndReduce!(_fmuladdc, Yes.vectorized)(cast(F)(0), x, y); } } @@ -387,14 +386,12 @@ auto dotc(size_t N, R1, R2)(Slice!(N, R1) x, Slice!(N, R2) y) unittest { import mir.ndslice.slice: slice; - import std.complex; - alias cd = Complex!double; - auto x = slice!cd(2); - auto y = slice!cd(2); - x[] = [cd(0, 1), cd(2, 3)]; - y[] = [cd(4, 5), cd(6, 7)]; - assert(dotc(x, y) == cd(0, -1) * cd(4, 5) + cd(2, -3) * cd(6, 7)); + auto x = slice!cdouble(2); + auto y = slice!cdouble(2); + x[] = [0 + 1i, 2 + 3i]; + y[] = [4 + 5i, 6 + 7i]; + assert(dotc(x, y) == (0 + -1i) * (4 + 5i) + (2 + -3i) * (6 + 7i)); } /++ @@ -435,11 +432,9 @@ unittest { import mir.ndslice.slice: slice; import std.math: sqrt, approxEqual; - import std.complex; - alias cd = Complex!double; - auto x = slice!cd(2); - x[] = [cd(0, 1), cd(2, 3)]; + auto x = slice!cdouble(2); + x[] = [0 + 1i, 2 + 3i]; assert(nrm2(x).approxEqual(sqrt(1.0 + 4 + 9))); } @@ -484,11 +479,9 @@ unittest unittest { import mir.ndslice.slice: slice; - import std.complex; - alias cd = Complex!double; - auto x = slice!cd(2); - x[] = [cd(0, 1), cd(2, 3)]; + auto x = slice!cdouble(2); + x[] = [0 + 1i, 2 + 3i]; assert(sqnrm2(x) == 1.0 + 4 + 9); } @@ -535,11 +528,9 @@ unittest unittest { import mir.ndslice.slice: slice; - import std.complex; - alias cd = Complex!double; - auto x = slice!cd(2); - x[] = [cd(0, -1), cd(-2, 3)]; + auto x = slice!cdouble(2); + x[] = [0 - 1i, -2 + 3i]; assert(asum(x) == 1 + 2 + 3); } @@ -619,12 +610,10 @@ unittest unittest { import mir.ndslice.slice: slice; - import std.complex; - alias cd = Complex!double; - auto x = slice!cd(4); + auto x = slice!cdouble(4); // 0 1 2 3 - x[] = [cd(0, -1), cd(-2, 3), cd(2, 3), cd(2, 2)]; + x[] = [0 + -1i, -2 + 3i, 2 + 3i, 2 + 2i]; assert(iamax(x) == 1); // -1 for empty vectors @@ -670,11 +659,9 @@ unittest unittest { import mir.ndslice.slice: slice; - import std.complex; - alias cd = Complex!double; - auto x = slice!cd(4); - x[] = [cd(0, -1), cd(-7, 3), cd(2, 3), cd(2, 2)]; + auto x = slice!cdouble(4); + x[] = [0 + -1i, -7 + 3i, 2 + 3i, 2 + 2i]; assert(amax(x) == 10); // 0 for empty vectors diff --git a/source/mir/glas/l2.d b/source/mir/glas/l2.d index fc8d3bb5..0d5efb95 100644 --- a/source/mir/glas/l2.d +++ b/source/mir/glas/l2.d @@ -30,7 +30,6 @@ public import mir.glas.common; import std.traits; import std.meta; -import std.complex : Complex, conj; import std.typecons: Flag, Yes, No; import mir.internal.math; @@ -50,7 +49,6 @@ Performs general matrix-vector multiplication. Pseudo_code: `y := alpha A × x + beta y`. Params: - ctx = GLAS context. Should not be accessed by other threads. alpha = scalar asl = `m ⨉ n` matrix xsl = `n ⨉ 1` vector @@ -67,13 +65,12 @@ BLAS: SGEMV, DGEMV, (CGEMV, ZGEMV are not implemented for now) See_also: $(SUBREF common, Conjugated). +/ -nothrow @nogc +nothrow @nogc @system void gemv(A, B, C) ( - GlasContext* ctx, C alpha, - Slice!(2, A*) asl, - Slice!(1, B*) xsl, + Slice!(2, const(A)*) asl, + Slice!(1, const(B)*) xsl, C beta, Slice!(1, C*) ysl, Conjugated conja = Conjugated.no, @@ -141,8 +138,7 @@ unittest auto c = slice!double(3); - auto glas = new GlasContext; - glas.gemv!(double, double, double)(1.0, a, b, 0.0, c); + gemv!(double, double, double)(1.0, a, b, 0.0, c); assert(c == [-42.0, diff --git a/source/mir/glas/l3.d b/source/mir/glas/l3.d index 8a731e50..9165186b 100644 --- a/source/mir/glas/l3.d +++ b/source/mir/glas/l3.d @@ -32,7 +32,6 @@ public import mir.glas.common; import std.traits; import std.meta; -import std.complex: Complex; import mir.ndslice.slice; import mir.internal.utility; @@ -45,7 +44,6 @@ Performs general matrix-matrix multiplication. Pseudo_code: `C := alpha A × B + beta C`. Params: - ctx = GLAS context. Should not be accessed by other threads. alpha = scalar asl = `m ⨉ k` matrix bsl = `k ⨉ n` matrix @@ -67,12 +65,11 @@ pragma(inline, true) nothrow @nogc void gemm(A, B, C) ( - GlasContext* ctx, C alpha, - Slice!(2, A*) asl, - Slice!(2, B*) bsl, + Slice!(2, const(A)*) asl, + Slice!(2, const(B)*) bsl, C beta, - Slice!(2, C*) csl, + Slice!(2, C*) csl, Conjugated conja = Conjugated.no, Conjugated conjb = Conjugated.no, ) @@ -91,7 +88,6 @@ body static assert(is(Unqual!C == C), msgWrongType); import mir.glas.internal.gemm: gemm_impl; gemm_impl( - ctx, alpha, cast(Slice!(2, Unqual!A*)) asl, cast(Slice!(2, Unqual!B*)) bsl, @@ -123,8 +119,7 @@ unittest auto c = slice!double(3, 4); - auto glas = new GlasContext; - glas.gemm(1.0, a, b, 0.0, c); + gemm(1.0, a, b, 0.0, c); assert(c == [[-42.0, 35, -7, 77], @@ -138,8 +133,7 @@ unittest auto b = slice!double(0, 4); auto c = slice!double(3, 4); - auto glas = new GlasContext; - glas.gemm(1.0, a, b, 0.0, c); + gemm(1.0, a, b, 0.0, c); assert(c == [[0.0, 0, 0, 0], @@ -147,7 +141,7 @@ unittest [0.0, 0, 0, 0]]); c[] = 2; - glas.gemm(1.0, a, b, 2, c); + gemm(1.0, a, b, 2, c); assert(c == [[4.0, 4, 4, 4], @@ -163,7 +157,6 @@ Pseudo_code: `C := alpha A × B + beta C` or `C := alpha B × A + beta C`, `C` are `m × n` matrices. Params: - ctx = GLAS context. Should not be accessed by other threads. side = specifies whether the symmetric matrix A appears on the left or right in the operation. uplo = specifies whether the upper or lower triangular @@ -198,14 +191,13 @@ pragma(inline, true) nothrow @nogc void symm(A, B, C) ( - GlasContext* ctx, Side side, Uplo uplo, C alpha, - Slice!(2, A*) asl, - Slice!(2, B*) bsl, + Slice!(2, const(A)*) asl, + Slice!(2, const(B)*) bsl, C beta, - Slice!(2, C*) csl, + Slice!(2, C*) csl, Conjugated conja = Conjugated.no, Conjugated conjb = Conjugated.no, ) @@ -238,7 +230,6 @@ body import mir.glas.internal.symm: symm_impl; symm_impl( - ctx, alpha, cast(Slice!(2, Unqual!A*)) asl, cast(Slice!(2, Unqual!B*)) bsl, @@ -268,8 +259,7 @@ unittest auto c = slice!double(3, 4); - auto glas = new GlasContext; - glas.symm(Side.left, Uplo.lower, 1.0, a, b, 0.0, c); + symm(Side.left, Uplo.lower, 1.0, a, b, 0.0, c); assert(c == [[ 38, 23, 20, 2], @@ -281,30 +271,26 @@ unittest unittest { import mir.ndslice; - import std.complex; - alias cd = Complex!double; - auto a = slice!cd(3, 3); + auto a = slice!cdouble(3, 3); a[] = - [[cd(-2, 0), cd.init, cd.init], - [cd(+3, 2), cd(-5, 0), cd.init], - [cd(-4, 7), cd(-2, 3), cd(-3, 0)]]; + [[-2 + 0i, cdouble.init, cdouble.init], + [+3 + 2i, -5 + 0i, cdouble.init], + [-4 + 7i, -2 + 3i, -3 + 0i]]; - auto b = slice!cd(3, 4); + auto b = slice!cdouble(3, 4); b[] = - [[cd(-5, 3), cd(-3, 9), cd( 3, 2), cd(1, 2)], - [cd( 4, 5), cd( 3, 4), cd( 6, 5), cd(4, 9)], - [cd(-4, 2), cd(-2, 2), cd(-2, 7), cd(2, 6)]]; + [[-5 + 3i, -3 + 9i, 3 + 2i, 1 + 2i], + [ 4 + 5i, 3 + 4i, 6 + 5i, 4 + 9i], + [-4 + 2i, -2 + 2i, -2 + 7i, 2 + 6i]]; - auto c = slice!cd(3, 4); - auto d = slice!cd(3, 4); + auto c = slice!cdouble(3, 4); + auto d = slice!cdouble(3, 4); - auto glas = new GlasContext; + symm(Side.left, Uplo.lower, 1 + 0i, a, b, 0 + 0i, c, Conjugated.yes); - glas.symm(Side.left, Uplo.lower, cd(1.0), a, b, cd(0.0), c, Conjugated.yes); - - ndEach!((ref a, ref b){a = conj(b);}, Select.triangular)(a, a.transposed); - glas.gemm(cd(1.0), a, b, cd(0.0), d); + ndEach!((ref a, ref b){a = (b.re - b.im * 1fi);}, Select.triangular)(a, a.transposed); + gemm(1 + 0i, a, b, 0 + 0i, d); assert(c == d); } @@ -315,8 +301,7 @@ unittest auto b = slice!double(3, 4); auto c = slice!double(3, 4); - auto glas = new GlasContext; - glas.symm(Side.left, Uplo.lower, 0.0, a, b, 0.0, c); + symm(Side.left, Uplo.lower, 0.0, a, b, 0.0, c); assert(c == [[0.0, 0, 0, 0], @@ -324,7 +309,7 @@ unittest [0.0, 0, 0, 0]]); c[] = 2; - glas.symm(Side.left, Uplo.upper, 0.0, a, b, 2, c); + symm(Side.left, Uplo.upper, 0.0, a, b, 2, c); assert(c == [[4.0, 4, 4, 4], diff --git a/source/mir/internal/utility.d b/source/mir/internal/utility.d index 0c0d89a4..4ca49f47 100644 --- a/source/mir/internal/utility.d +++ b/source/mir/internal/utility.d @@ -22,17 +22,18 @@ enum isSimpleSlice(S) = is(S : Slice!(N1, T1[]), size_t N1,T1) || is(S : Slice!( template realType(C) { - import std.complex : Complex; - static if (is(C : Complex!F, F)) - alias realType = Unqual!F; + static if (isComplex!C) + alias realType = typeof(Unqual!C.init.re); else alias realType = Unqual!C; } template isComplex(C) { - import std.complex : Complex; - enum bool isComplex = is(C : Complex!F, F); + enum bool isComplex + = is(Unqual!C == creal) + || is(Unqual!C == cdouble) + || is(Unqual!C == cfloat); } //enum isSIMDVector(V) = is(V : __vector(F[N]), F, size_t N); diff --git a/source/mir/sum.d b/source/mir/sum.d index 88558a92..7bbc3eb7 100644 --- a/source/mir/sum.d +++ b/source/mir/sum.d @@ -7,6 +7,8 @@ Authors: Ilya Yaroshenko */ module mir.sum; +import ldc.attributes: fastmath; + /// unittest { @@ -90,12 +92,11 @@ All summation algorithms available for complex numbers. +/ unittest { - import std.complex; - Complex!double[] ar = [complex(1.0, 2), complex(2, 3), complex(3, 4), complex(4, 5)]; - Complex!double r = complex(10, 14); + cdouble[] ar = [1.0 + 2i, 2 + 3i, 3 + 4i, 4 + 5i]; + cdouble r = 10 + 14i; assert(r == ar.sum!(Summation.fast)); assert(r == ar.sum!(Summation.naive)); - //assert(r == ar.sum!(Summation.pairwise)); + assert(r == ar.sum!(Summation.pairwise)); assert(r == ar.sum!(Summation.kahan)); assert(r == ar.sum!(Summation.kbn)); assert(r == ar.sum!(Summation.kb2)); @@ -246,6 +247,7 @@ import std.traits; import std.typecons; import std.range.primitives; import std.math: isInfinity, isFinite, isNaN, signbit; +import mir.internal.utility: isComplex; /++ @@ -357,7 +359,7 @@ enum Summation naive, /++ - SIMD optimized summation algorithm. It is identical to `naive` for now. + SIMD optimized summation algorithm. +/ fast, } @@ -369,6 +371,17 @@ struct Summator(T, Summation summation) if (isMutable!T) { import std.math: fabs; + //import mir.internal.math: fabs; + + static if (summation == Summation.fast) + alias attr = fastmath; + else + { + import std.meta: AliasSeq; + alias attr = AliasSeq!(); + } + + @attr: static if (summation != Summation.pairwise) @disable this(); @@ -560,20 +573,34 @@ public: static if (summation == Summation.kb2) { s = n; - cs = 0.0; - ccs = 0.0; + static if (isComplex!T) + { + cs = 0 + 0fi; + ccs = 0 + 0fi; + } + else + { + cs = 0.0; + ccs = 0.0; + } } else static if (summation == Summation.kbn) { s = n; - c = 0.0; + static if (isComplex!T) + c = 0 + 0fi; + else + c = 0.0; } else static if (summation == Summation.kahan) { s = n; - c = 0.0; + static if (isComplex!T) + c = 0 + 0fi; + else + c = 0.0; } else static if (summation == Summation.pairwise) @@ -719,29 +746,33 @@ public: F t = s + x; if (fabs(s.re) < fabs(x.re)) { - auto t_re = s.re; - s.re = x.re; - x.re = t_re; + auto s_re = s.re; + auto x_re = x.re; + s = x_re + s.im * 1fi; + x = s_re + x.im * 1fi; } if (fabs(s.im) < fabs(x.im)) { - auto t_im = s.im; - s.im = x.im; - x.im = t_im; + auto s_im = s.im; + auto x_im = x.im; + s = s.re + x_im * 1fi; + x = x.re + s_im * 1fi; } F c = (s-t)+x; s = t; if (fabs(cs.re) < fabs(c.re)) { - auto t_re = cs.re; - cs.re = c.re; - c.re = t_re; + auto c_re = c.re; + auto cs_re = cs.re; + c = cs_re + c.im * 1fi; + cs = c_re + cs.im * 1fi; } if (fabs(cs.im) < fabs(c.im)) { - auto t_im = cs.im; - cs.im = c.im; - c.im = t_im; + auto c_im = c.im; + auto cs_im = cs.im; + c = c.re + cs_im * 1fi; + cs = cs.re + c_im * 1fi; } F d = cs - t; d += c; @@ -774,15 +805,17 @@ public: F t = s + x; if (fabs(s.re) < fabs(x.re)) { - auto t_re = s.re; - s.re = x.re; - x.re = t_re; + auto s_re = s.re; + auto x_re = x.re; + s = x_re + s.im * 1fi; + x = s_re + x.im * 1fi; } if (fabs(s.im) < fabs(x.im)) { - auto t_im = s.im; - s.im = x.im; - x.im = t_im; + auto s_im = s.im; + auto x_im = x.im; + s = s.re + x_im * 1fi; + x = x.re + s_im * 1fi; } F d = s - t; d += x; @@ -1462,20 +1495,34 @@ public: static if (summation == Summation.kb2) { s = rhs; - cs = 0.0; - ccs = 0.0; + static if (isComplex!T) + { + cs = 0 + 0fi; + ccs = 0 + 0fi; + } + else + { + cs = 0.0; + ccs = 0.0; + } } else static if (summation == Summation.kbn) { s = rhs; - c = 0.0; + static if (isComplex!T) + c = 0 + 0fi; + else + c = 0.0; } else static if (summation == Summation.kahan) { s = rhs; - c = 0.0; + static if (isComplex!T) + c = 0 + 0fi; + else + c = 0.0; } else static if (summation == Summation.pairwise) @@ -1816,33 +1863,10 @@ unittest foreach (t; test[0]) summator.put(t); auto r = test[1]; auto s = summator.sum; - import core.exception: AssertError; - try - { - version(X86) - { - import std.math: nextDown, nextUp; - assert(summator.isNaN() == r.isNaN()); - assert(summator.isFinite() == r.isFinite() || r == -double.max - && s == -double.infinity || r == double.max && s == double.infinity); - assert(summator.isInfinity() == r.isInfinity() || r == -double.max - && s == -double.infinity || r == double.max && s == double.infinity); - assert(nextDown(s) <= r && r <= nextUp(s) || s.isNaN && r.isNaN); - } - else - { - assert(summator.isNaN() == r.isNaN()); - assert(summator.isFinite() == r.isFinite()); - assert(summator.isInfinity() == r.isInfinity()); - assert(s == r || s.isNaN && r.isNaN); - } - } - catch(AssertError e) - { - import std.stdio; - stderr.writefln("i = %s, result = %s (%A), target = %s (%A)", i, s, s, r, r); - throw e; - } + assert(summator.isNaN() == r.isNaN()); + assert(summator.isFinite() == r.isFinite()); + assert(summator.isInfinity() == r.isInfinity()); + assert(s == r || s.isNaN && r.isNaN); } } @@ -2030,7 +2054,7 @@ unittest /++ Precise summation. +/ -private F sumPrecise(Range, F)(Range r, F seed = 0.0) +private F sumPrecise(Range, F)(Range r, F seed = summationInitValue!F) if (isFloatingPoint!F || isComplex!F) { static if (isFloatingPoint!F) @@ -2053,7 +2077,7 @@ private F sumPrecise(Range, F)(Range r, F seed = 0.0) sumRe.put(e.re); sumIm.put(e.im); } - return F(sumRe.sum, sumIm.sum); + return sumRe.sum + sumIm.sum * 1fi; } } @@ -2104,10 +2128,15 @@ private T summationInitValue(T)() return a; } else + static if (__traits(compiles, {T a = 0 + 0fi;})) + { + T a = 0 + 0fi; + return a; + } + else { return T.init; } - } private template sumType(Range) @@ -2147,12 +2176,6 @@ unittest static assert(isSummable!(typeof(iota(ulong.init, ulong.init, ulong.init)), double)); } -template isComplex(C) -{ - import std.complex : Complex; - enum bool isComplex = is(C : Complex!F, F); -} - private enum bool isCompesatorAlgorithm(Summation summation) = summation == Summation.precise || summation == Summation.kb2