diff --git a/Project.toml b/Project.toml index 21fd26c3..2d028c05 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LoopVectorization" uuid = "bdcacae8-1622-11e9-2a5c-532679323890" authors = ["Chris Elrod "] -version = "0.12.168" +version = "0.12.169" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" diff --git a/src/codegen/lower_threads.jl b/src/codegen/lower_threads.jl index 6c268a30..40f8b541 100644 --- a/src/codegen/lower_threads.jl +++ b/src/codegen/lower_threads.jl @@ -495,7 +495,7 @@ function thread_one_loops_expr( $AM, $LPSYM, Val(typeof(var"#avx#call#args#")), - flatten_to_tuple(var"#avx#call#args#")... + var"#avx#call#args#"... )) update_return_values = if length(ls.outer_reductions) > 0 retv = loopset_return_value(ls, Val(false)) @@ -555,7 +555,7 @@ function thread_one_loops_expr( $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), - flatten_to_tuple(var"##lbvargs#to_launch##"), + var"##lbvargs#to_launch##", var"#thread#id#" ) @@ -744,7 +744,7 @@ function thread_two_loops_expr( $AM, $LPSYM, Val(typeof(var"#avx#call#args#")), - flatten_to_tuple(var"#avx#call#args#")... + var"#avx#call#args#"... )) update_return_values = if length(ls.outer_reductions) > 0 retv = loopset_return_value(ls, Val(false)) @@ -867,7 +867,7 @@ function thread_two_loops_expr( $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), - flatten_to_tuple(var"##lbvargs#to_launch##"), + var"##lbvargs#to_launch##", var"#thread#id#" ) var"#thread#mask#" >>>= var"#trailzing#zeros#" diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index fd0411e8..69c65933 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -4,71 +4,6 @@ Base.:|(u::Unsigned, it::IndexType) = u | UInt8(it) Base.:(==)(u::Unsigned, it::IndexType) = (u % UInt8) == UInt8(it) -function _append_fields!(t::Expr, body::Expr, sym::Symbol, ::Type{T}) where {T} - for f ∈ 1:fieldcount(T) - TF = fieldtype(T, f) - Base.issingletontype(TF) && continue - gfcall = Expr(:call, getfield, sym, f) - if fieldcount(TF) ≡ 0 - push!(t.args, gfcall) - elseif TF <: DataType - push!(t.args, Expr(:call, Expr(:curly, lv(:StaticType), gfcall))) - else - newsym = gensym(sym) - push!(body.args, Expr(:(=), newsym, gfcall)) - _append_fields!(t, body, newsym, TF) - end - end - return nothing -end -@generated function flatten_to_tuple(r::T) where {T} - body = Expr(:block, Expr(:meta, :inline)) - t = Expr(:tuple) - if Base.issingletontype(T) - nothing - elseif fieldcount(T) ≡ 0 - push!(t.args, :r) - elseif T <: DataType - push!(t.args, Expr(:call, Expr(:curly, lv(:StaticType), :r))) - else - _append_fields!(t, body, :r, T) - end - push!(body.args, t) - body -end -function rebuild_fields(offset::Int, ::Type{T}) where {T} - call = (T <: Tuple) ? Expr(:tuple) : Expr(:new, T) - for f ∈ 1:fieldcount(T) - TF = fieldtype(T, f) - if Base.issingletontype(TF) - push!(call.args, TF.instance) - elseif fieldcount(TF) ≡ 0 - push!(call.args, Expr(:call, getfield, :t, (offset += 1))) - elseif TF <: DataType - push!( - call.args, - Expr(:call, lv(:gettype), Expr(:call, getfield, :t, (offset += 1))) - ) - else - arg, offset = rebuild_fields(offset, TF) - push!(call.args, arg) - end - end - return call, offset -end -@generated function reassemble_tuple(::Type{T}, t::Tuple) where {T} - if Base.issingletontype(T) - return T.instance - elseif fieldcount(T) ≡ 0 - call = Expr(:call, getfield, :t, 1) - elseif T <: DataType - call = Expr(:call, lv(:gettype), Expr(:call, getfield, :t, 1)) - else - call, _ = rebuild_fields(0, T) - end - Expr(:block, Expr(:meta, :inline), call) -end - """ ArrayRefStruct @@ -893,9 +828,9 @@ function generate_call_types( ) ) if manyarg - push!(q.args, Expr(:call, lv(:flatten_to_tuple), vargsym)) + push!(q.args, vargsym) else - push!(q.args, Expr(:(...), Expr(:call, lv(:flatten_to_tuple), vargsym))) + push!(q.args, Expr(:(...), vargsym)) end Expr(:block, Expr(:(=), vargsym, Expr(:tuple, lbarg, extra_args))) end diff --git a/src/reconstruct_loopset.jl b/src/reconstruct_loopset.jl index d044fbeb..51c93ffb 100644 --- a/src/reconstruct_loopset.jl +++ b/src/reconstruct_loopset.jl @@ -1111,7 +1111,7 @@ Execute an `@turbo` block. The block's code is represented via the arguments: ::Val{var"#AM#"}, ::Val{var"#LPSYM#"}, ::Val{Tuple{var"#LB#",var"#V#"}}, - var"#flattened#var#arguments#"::Vararg{Any,var"#num#vargs#"} + var"#lv#tuple#args#"::Vararg{Any,var"#num#vargs#"} ) where { var"#UNROLL#", var"#OPS#", @@ -1132,15 +1132,6 @@ Execute an `@turbo` block. The block's code is represented via the arguments: var"#V#".parameters, var"#UNROLL#" ) - pushfirst!( - ls.preamble.args, - :( - var"#lv#tuple#args#" = reassemble_tuple( - Tuple{var"#LB#",var"#V#"}, - var"#flattened#var#arguments#" - ) - ) - ) post = hoist_constant_memory_accesses!(ls) # q = @show(avx_body(ls, var"#UNROLL#")); post === ls.preamble ? q : Expr(:block, q, post) q = if (var"#UNROLL#"[10] > 1) && length(var"#LPSYM#") == length(ls.loops) @@ -1171,7 +1162,7 @@ end ::Val{var"#AM#"}, ::Val{var"#LPSYM#"}, ::Val{Tuple{var"#LB#",var"#V#"}}, - var"#flattened#var#arguments#"::Tuple{Vararg{Any,var"#num#vargs#"}} + var"#lv#tuple#args#"::Tuple{Vararg{Any,var"#num#vargs#"}} ) where { var"#UNROLL#", var"#OPS#", @@ -1192,15 +1183,6 @@ end var"#V#".parameters, var"#UNROLL#" ) - pushfirst!( - ls.preamble.args, - :( - var"#lv#tuple#args#" = reassemble_tuple( - Tuple{var"#LB#",var"#V#"}, - var"#flattened#var#arguments#" - ) - ) - ) post = hoist_constant_memory_accesses!(ls) # q = @show(avx_body(ls, var"#UNROLL#")); post === ls.preamble ? q : Expr(:block, q, post) q = if (var"#UNROLL#"[10] > 1) && length(var"#LPSYM#") == length(ls.loops)