From b5d2e651402ea7c1a6ffd6601d8a909f91723486 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Wed, 21 Aug 2024 16:02:02 -0400
Subject: [PATCH] Added compose for scalar TPS functions

---
 docs/src/devel.md |  8 +++++
 src/methods.jl    | 88 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 93 insertions(+), 3 deletions(-)
diff --git a/docs/src/devel.md b/docs/src/devel.md
index 6519e55..8a00c47 100644
--- a/docs/src/devel.md
+++ b/docs/src/devel.md
@@ -85,6 +85,14 @@ The `@FastGTPSA`/`@FastGTPSA!` macros work by changes all arithmetic operators i
 
 All arithmetic operators are changed to `GTPSA.:<special symbols>`, e.g. `+` → `GTPSA.:±`. All non-arithmetic operators that are supported by GTPSA are then changed to `GTPSA.__t_<operator>`, e.g. `sin` → `GTPSA.__t_sin`, where the prefix `__t_` is also chosen somewhat arbitrarily. These operators are all defined in `fastgtpsa/operators.jl`, and when they encounter a TPS type, they use the temporaries, and when other number types are detected, they fallback to the regular, non-`__t_` operator. This approach works extraordinarily well, and introduces no problems externally because none of these functions/symbols are exported.
 
+## Calling the C-library with pointer-to-pointers
+
+All of the GTPSA map functions required a vector of `TPS` as input, in C `**tpsa`. In Julia, this works automatically for `Vector{<:Union{TPS64,ComplexTPS64}}` by specifying the C argument type in the C call as `::Ptr{TPS64}` or `::Ptr{ComplexTPS64}`. However, in some cases, one might have only a single `TPS64` and would like the call the corresponding map function without having to allocate an array. After some experimenting, I've found the following solution to have zero allocations, using `compose!` as an example:
+
+```julia
+mad_compose!(na, ma::TPS64, nb, mb::AbstractVector{TPS64}, mc::TPS64) = GC.@preserve ma mc @ccall MAD_TPSA.mad_tpsa_compose(Cint(na)::Cint, Ref(pointer_from_objref(ma))::Ptr{Cvoid}, Cint(nb)::Cint, mb::Ptr{TPS64}, Ref(pointer_from_objref(mc))::Ptr{Cvoid})::Cvoid
+```
+
 ## Low-Level
 
 Below is documentation for every single 1-to-1 C function in the GTPSA library. If there is any function missing, please submit an issue to `GTPSA.jl`.
diff --git a/src/methods.jl b/src/methods.jl
index cac1836..a0e322d 100644
--- a/src/methods.jl
+++ b/src/methods.jl
@@ -328,9 +328,12 @@ scalar(t::Number) = t[1]
 mad_compose!(na, ma::AbstractVector{TPS{Float64}},    nb, mb::AbstractVector{TPS{Float64}},    mc::AbstractVector{TPS{Float64}}) = mad_tpsa_compose!(Cint(na), ma, Cint(nb), mb, mc)
 mad_compose!(na, ma::AbstractVector{TPS{ComplexF64}}, nb, mb::AbstractVector{TPS{ComplexF64}}, mc::AbstractVector{TPS{ComplexF64}}) = mad_ctpsa_compose!(Cint(na), ma, Cint(nb), mb, mc)
 
+# This is for composing single TPSs with ZERO allocations:
+mad_compose!(na, ma::TPS64,        nb, mb::AbstractVector{TPS64},        mc::TPS64)        = GC.@preserve ma mc @ccall MAD_TPSA.mad_tpsa_compose(Cint(na)::Cint, Ref(pointer_from_objref(ma))::Ptr{Cvoid}, Cint(nb)::Cint, mb::Ptr{TPS64}, Ref(pointer_from_objref(mc))::Ptr{Cvoid})::Cvoid
+mad_compose!(na, ma::ComplexTPS64, nb, mb::AbstractVector{ComplexTPS64}, mc::ComplexTPS64) = GC.@preserve ma mc @ccall MAD_TPSA.mad_tpsa_compose(Cint(na)::Cint, Ref(pointer_from_objref(ma))::Ptr{Cvoid}, Cint(nb)::Cint, mb::Ptr{ComplexTPS64}, Ref(pointer_from_objref(mc))::Ptr{Cvoid})::Cvoid
 
 """
-    compose!(m, m2, m1; work::Union{Nothing,Vector{TPS{ComplexF64}}}=nothing)compose!(m::Vector{TPS{<:Union{Float64,ComplexF64}}}, m2::Vector{TPS{<:Union{Float64,ComplexF64}}}, m1::Vector{TPS{<:Union{Float64,ComplexF64}}}; work::Union{Nothing,Vector{TPS{ComplexF64}}}=nothing)
+    compose!(m::AbstractVector{<:Union{TPS64,ComplexTPS64}}, m2::AbstractVector{<:Union{TPS64,ComplexTPS64}}, m1::AbstractVector{<:Union{TPS64,ComplexTPS64}}; work::Union{Nothing,AbstractVector{ComplexTPS64}}=nothing)
 
 Composes the vector functions `m2 ∘ m1` and stores the result in-place in `m`. 
 Promotion is allowed, provided the output vector function `m` has the correct type. 
@@ -347,7 +350,8 @@ else if `eltype(m.x) != eltype(m2.x)` (then `m2` must be promoted):
 
 The `ComplexTPS64`s in `work` must be defined and have the same `Descriptor`.
 """
-function compose!(m, m2, m1; work::Union{Nothing,AbstractVector{TPS{ComplexF64}}}=nothing)
+function compose!(m::AbstractVector{<:Union{TPS64,ComplexTPS64}}, m2::AbstractVector{<:Union{TPS64,ComplexTPS64}}, m1::AbstractVector{<:Union{TPS64,ComplexTPS64}}; work::Union{Nothing,AbstractVector{ComplexTPS64}}=nothing)
+  Base.require_one_based_indexing(m, m2, m1)
   n = length(m)
   n2 = length(m2)
   n1 = length(m1)
@@ -400,6 +404,70 @@ function compose!(m, m2, m1; work::Union{Nothing,AbstractVector{TPS{ComplexF64}}
   end
 end
 
+"""
+    compose!(m::TPS, m2::TPS, m1::AbstractVector{<:Union{TPS64,ComplexTPS64}}; work::Union{Nothing,ComplexTPS64,AbstractVector{ComplexTPS64}}=nothing)
+
+Composes the scalar TPS function `m2` with vector function `m1`, and stores the result in-place in `m`. 
+Promotion is allowed, provided the output function `m` has the correct type. 
+
+If promotion is occuring, then the inputs must be promoted to `ComplexTPS64`. If `m1` is 
+to be promoted, a vector of pre-allocated `ComplexTPS64`s can optionally provided 
+in `work`, and has the requirement:
+
+If `eltype(m.x) != eltype(m1.x)` (then `m1` must be promoted):
+`work = m1_prom  # Length >= length(m1), Vector{ComplexTPS64}`
+
+Else if `m2` is to be promoted (`typeof(m.x) != typeof(m2.x)`), a single `ComplexTPS64` 
+can be provided in `work`:  
+
+`work = m2_prom  # ComplexTPS64`
+
+The `ComplexTPS64`(s) in `work` must be defined and have the same `Descriptor`.
+"""
+function compose!(m::TPS, m2::TPS, m1::AbstractVector{<:Union{TPS64,ComplexTPS64}}; work::Union{Nothing,ComplexTPS64,AbstractVector{ComplexTPS64}}=nothing)
+  n1 = length(m1)
+
+  # Checks:
+  numnn(m2) == n1 || error("Not enough input arguments")
+  typeof(m) == promote_type(typeof(m2),eltype(m1)) || error("Cannot compose: output type $(typeof(m)) must be $(promote_type(typeof(m2),eltype(m1)))")
+
+  # Check if promoting
+  if typeof(m) != eltype(m1)  # Promoting m1
+    if isnothing(work)
+      m1_prom = Vector{TPS{ComplexF64}}(undef, n1)
+      for i=1:n1  # Allocate
+        @inbounds m1_prom[i] = TPS{ComplexF64}(use=first(m))
+      end
+    else
+      Base.require_one_based_indexing(work)
+      m1_prom = work
+      @assert (work isa AbstractVector{ComplexTPS64}) "Incorrect type for work = m1_prom: `work isa AbstractVector{ComplexTPS64}` must be `true`"
+      @assert length(work) >= n1 "Incorrect length for work = m1_prom: Received $(length(work)), should be >=$n1"
+    end
+
+    for i=1:n1
+      @inbounds copy!(m1_prom[i], m1[i])
+    end
+
+    mad_compose!(-1, m2, n1, m1_prom, m)
+
+  elseif eltype(m) != eltype(m2) # Promoting m2
+    if isnothing(work)
+      m2_prom = TPS{ComplexF64}(use=first(m))
+    else
+      m2_prom = work
+      @assert (work isa ComplexTPS64) "Incorrect type for work = m2_prom: `work isa ComplexTPS64` must be `true`"
+    end
+    
+    copy!(m2_prom, m2)
+    mad_compose!(-1, m2_prom, n1, m1, m)
+    
+  else  # No promotion, just do it
+    mad_compose!(-1, m2, n1, m1, m)
+  end
+end
+
+
 """
     compose(m2::AbstractVector{<:TPS{<:Union{Float64,ComplexF64}}}, m1::AbstractVector{<:TPS{<:Union{Float64,ComplexF64}}})
 
@@ -418,7 +486,21 @@ function compose(m2::AbstractVector{<:TPS{<:Union{Float64,ComplexF64}}}, m1::Abs
   return m
 end
 
-∘(m2::Vector{<:TPS{<:Union{Float64,ComplexF64}}}, m1::Vector{<:TPS{<:Union{Float64,ComplexF64}}}) = compose(m2, m1)
+"""
+    compose(m2::TPS, m1::AbstractVector{<:TPS{<:Union{Float64,ComplexF64}}})
+
+Composes the scalar TPS functions `m2` with vector function `m1`
+"""
+function compose(m2::TPS, m1::AbstractVector{<:TPS{<:Union{Float64,ComplexF64}}})
+  Base.require_one_based_indexing(m1)
+  desc = getdesc(first(m2))
+  outT = promote_type(typeof(m2),eltype(m1))
+  m = outT(use=desc)
+  compose!(m, m2, m1)
+  return m
+end
+
+∘(m2::Union{TPS,Vector{<:TPS{<:Union{Float64,ComplexF64}}}}, m1::Vector{<:TPS{<:Union{Float64,ComplexF64}}}) = compose(m2, m1)
 
 # --- translate ---
 mad_translate!(na, ma::Vector{TPS{Float64}},    nb, tb, mc::Vector{TPS{Float64}})    = mad_tpsa_translate!(Cint(na), ma, Cint(nb), convert(Vector{Float64}, tb), mc)