Merge pull request #123 from bmad-sim/devnewstructs

New TPS struct
bmad-sim · Jul 25, 2024 · ab9a555 · ab9a555
2 parents 15df0ac + 876573b
commit ab9a555
Show file tree

Hide file tree

Showing 45 changed files with 4,891 additions and 6,386 deletions.
diff --git a/Project.toml b/Project.toml
@@ -11,7 +11,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 
 [compat]
-GTPSA_jll = "1.3.4"
+GTPSA_jll = "1.4"
 PrettyTables = "2"
 SpecialFunctions = "2.3.1"
 julia = "1.9"

diff --git a/README.md b/README.md
@@ -3,17 +3,21 @@
 [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://bmad-sim.github.io/GTPSA.jl/dev/)
 [![Build Status](https://github.com/bmad-sim/GTPSA.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/bmad-sim/GTPSA.jl/actions/workflows/CI.yml?query=branch%3Amain)
 
-## Overview
-This package provides a full-featured Julia interface to the [Generalised Truncated Power Series Algebra (GTPSA) library](https://mad.web.cern.ch/mad/releases/madng/html/mad_mod_diffalg.html), which computes Taylor expansions, or Truncated Power Series (TPSs) of real and complex multivariable functions to arbitrary orders. 
+# GTPSA.jl
+
+*A full-featured Julia interface to the Generalised Truncated Power Series Algebra library.*
 
-Truncated Power Series Algebra (TPSA) performs forward-mode automatic differentation (AD) similar to the dual-number implementation of `ForwardDiff.jl`. However, instead of nesting derivatives for higher orders, TPSA naturally extends to arbitary orders by directly using the power series expansions. This, paired with a highly optimized monomial indexing function/storage for propagating the partial derivatives, makes `GTPSA.jl` significantly faster for 2nd-order calculations and above (for 1st-order calculations the performance is similar to `ForwardDiff.jl`).See the [`benchmark/track.jl`](https://github.com/bmad-sim/GTPSA.jl/blob/main/benchmark/track.jl) example for a speed comparison of `GTPSA.jl` with `ForwardDiff.jl` in calculating the partial derivatives for a system with 58 inputs and 6 outputs. **In this example, GTPSA was x3.3 faster than ForwardDiff to 2nd order, and x18.5 faster to 3rd order.**
+`GTPSA.jl` is a full-featured Julia interface to the [Generalised Truncated Power Series Algebra (GTPSA) library](https://github.com/MethodicalAcceleratorDesign/MAD-NG), which computes Taylor expansions, or Truncated Power Series (TPSs), of real and complex multivariable functions to high orders.
+
+Truncated Power Series Algebra (TPSA) performs forward-mode automatic differentation (AD) similar to the typical dual-number implementation, as in `ForwardDiff.jl`. However, instead of nesting derivatives for higher orders, TPSA naturally extends to arbitary orders by directly using the power series expansions. Furthermore, because TPSA is designed for high order AD, extra focus is given to the data structure storing all partial derivatives, as well as indexing/propagating each of the nonzero partial derivatives in an efficient way. With these features, `GTPSA.jl` is significantly faster than `ForwardDiff.jl` for 2nd-order calculations and above, and has similar performance to 1st-order. See the [`benchmark/track.jl`](https://github.com/bmad-sim/GTPSA.jl/blob/main/benchmark/track.jl) example for a speed comparison in calculating the partial derivatives for a system with 58 inputs and 6 outputs. **In this example, GTPSA was x3.5 faster than ForwardDiff to 2nd order, and x19.8 faster to 3rd order.**
 
 GTPSA provides several advantages over current Julia AD packages:
 
 1. **Speed**: `GTPSA.jl` is significantly faster than `ForwardDiff.jl` for 2nd-order calculations and above, and has very similar performance at 1st-order
-2. **Custom Orders in Individual Variables**: Other packages use a single maximum order for all variables. With GTPSA, the maximum order can be set differently for individual variables, as well as for a separate part of the monomial. For example, computing the Taylor expansion of $f(x_1,x_2)$ to 2nd order in $x_1$ and 6th order in $x_2$ is possible
-3. **Complex Numbers**: GTPSA natively supports complex numbers and allows for mixing of complex and real truncated power series
-4. **Distinction Between State Variables and Parameters**: Distinguishing between dependent variables and parameters in the solution of a differential equation expressed as a power series in the dependent variables/parameters can be advantageous in analysis
+2. **Easy Monomial Indexing**: Beyond 2nd order, accessing/manipulating the partial derivatives in an organized way can be a significant challenge when using other AD packages. In GTPSA, three simple indexing schemes for getting/setting monomial coefficients in a truncated power series is provided, as well as a `cycle!` function for cycling through all nonzero monomials
+3. **Custom Orders in Individual Variables**: Other packages use a single maximum order for all variables. With GTPSA, the maximum order can be set differently for individual variables, as well as for a separate part of the monomial. For example, computing the Taylor expansion of $f(x_1,x_2)$ to 2nd order in $x_1$ and 6th order in $x_2$ is possible
+4. **Complex Numbers**: GTPSA natively supports complex numbers and allows for mixing of complex and real truncated power series
+5. **Distinction Between State Variables and Parameters**: Distinguishing between dependent variables and parameters in the solution of a differential equation expressed as a power series in the dependent variables/parameters can be advantageous in analysis
 
 ## Setup
 To use `GTPSA.jl`, in the Julia REPL run
@@ -23,7 +27,7 @@ import Pkg; Pkg.add("GTPSA")
 ```
 
 ## Basic Usage
-First, a `Descriptor` must be created specifying the number of variables, number of parameters, and truncation order for each variable/parameter in the TPSA. A `TPS` or `ComplexTPS` can then be created based on the `Descriptor`. TPSs can be manipulated using all of the arithmetic operators (`+`,`-`,`*`,`/`,`^`) and math functions (e.g. `abs`, `sqrt`, `sin`, `exp`, `log`, `tanh`, etc.).
+First, a `Descriptor` must be created specifying the number of variables, number of parameters, and truncation order(s) for the variables/parameters in the TPSA. A `TPS` can then be created based on the `Descriptor`. TPSs can be manipulated using all of the arithmetic operators (`+`,`-`,`*`,`/`,`^`) and math functions (e.g. `abs`, `sqrt`, `sin`, `exp`, `log`, `tanh`, etc.).
 
 TPSs can be viewed as structures containing the coefficients for all of the monomials of a multivariable Taylor expansion up to the orders specified in the `Descriptor`. As an example, to compute the truncated power series of a function $f(x_1, x_2) = \cos{(x_1)}+i\sin{(x_2)}$ to 6th order in $x_1$ and $x_2$:
 ```julia
@@ -38,13 +42,13 @@ x = vars()
 
 # Manipulate the TPSs as you would any other mathematical variable in Julia
 f = cos(x[1]) + im*sin(x[2])
-# f is a new ComplexTPS 
+# f is a new ComplexTPS64
 ```
 
 Note that scalars do not need to be defined as TPSs when writing expressions. Running `print(f)` gives the output
 
 ```
-ComplexTPS:
+ComplexTPS64:
  Real Imag Order Exponent
  1.0000000000000000e+00 0.0000000000000000e+00 0 0 0
  0.0000000000000000e+00 1.0000000000000000e+00 1 0 1
@@ -55,10 +59,11 @@ ComplexTPS:
  -1.3888888888888887e-03 0.0000000000000000e+00 6 6 0
 ```
 
+The GTPSA library currently only supports truncated power series representing `Float64` and `ComplexF64` number types.
+
 For more details, including using TPSs with differing truncation orders for each variable, see the [GTPSA documentation](https://bmad-sim.github.io/GTPSA.jl/).
 
 ## Acknowledgements
 Much thanks must be given to Laurent Deniau, the creator of the C GTPSA library, for his time and great patience in explaining his code. 
 
-Advanced users are referred to [this paper](https://inspirehep.net/files/286f2ab60e1e7c372cec485337ab5eb6) discussing
-the inner workings of the C GTPSA library.
+Advanced users are referred to [this paper](https://inspirehep.net/files/286f2ab60e1e7c372cec485337ab5eb6) discussing the inner workings of the C GTPSA library.
diff --git a/benchmark/track.jl b/benchmark/track.jl
@@ -2,102 +2,111 @@ using GTPSA
 using ForwardDiff
 using BenchmarkTools: @btime, @benchmark
 
-# As of 05/03/2024, Julia v1.10.2 on Mac M2 Ultra: Comparison with GTPSA for 58 inputs and 6 outputs
+# As of 07/25/2024, Julia v1.10.3 on Mac M2 Ultra: Comparison with GTPSA for 58 inputs and 6 outputs
 # Numbers calculated using BenchmarkTools.@btime
 #
 # 3rd Order ---------------------------------------------------------
 # Using the @FastGTPSA macro:
-# GTPSA: 227.111 ms
-# ForwardDiff: 4.212 s
+# GTPSA: 199.609 ms (2929 allocations: 360.36 MiB)
+# ForwardDiff: 3.427 s (85142 allocations: 3.93 GiB)
 #
 # Without the @FastGTPSA macro (including ForwardDiff as control):
-# GTPSA: 371.465 ms
-# ForwardDiff: 4.096 s 
+# GTPSA: 202.778 ms (19129 allocations: 2.52 GiB)
+# ForwardDiff: 3.944 s  (85142 allocations: 3.93 GiB)
 #
 # 2nd Order ---------------------------------------------------------
 # Using the @FastGTPSA macro:
-# GTPSA: 7.024 ms 
-# ForwardDiff: 23.417 ms
+# GTPSA: 6.465 ms (2927 allocations: 17.81 MiB)
+# ForwardDiff: 25.577 ms (9166 allocations: 63.47 MiB)
 #
 # Without the @FastGTPSA macro (including ForwardDiff as control):
-# GTPSA: 15.594 ms
-# ForwardDiff: 23.318 ms
+# GTPSA: 13.131 ms (19129 allocations: 127.70 MiB)
+# ForwardDiff: 22.839 ms (9166 allocations: 63.47 MiB)
 #
 # 1st Order ---------------------------------------------------------
 # Using the @FastGTPSA macro:
-# GTPSA: 280.542 μs 
-# ForwardDiff: 188.792 μs
+# GTPSA: 282.375 μs (2927 allocations: 715.48 KiB)
+# ForwardDiff: 223.375 μs (1520 allocations: 1.10 MiB)
 #
 # Without the @FastGTPSA macro (including ForwardDiff as control):
-# GTPSA: 697.625 μs
-# ForwardDiff: 161.125 μs
+# GTPSA: 473.917 μs (19129 allocations: 4.84 MiB)
+# ForwardDiff: 158.208 μs (1520 allocations: 1.10 MiB)
 #
-# Note that @FastGTPSA is transparent to all types except TPS/ComplexTPS, so it can be
-# inserted into functions while still maintaining generic code, as shown here
- 
+# Note that @FastGTPSA is transparent to all types except TPS types, so it can be
+# inserted into functions while still maintaining generic code, as shown here.
+
 function track_qf(z0, k1, hkick)
  z = Vector{promote_type(eltype(z0),typeof(k1),typeof(hkick))}(undef, length(z0))
  lbend=0.1
+
+ @FastGTPSA begin
+ L = 0.5/(1.0+z0[6])
+ h = -L*(z0[2]^2+k1*z0[1]^2+ z0[4]^2-k1*z0[3]^2)/(1.0+z0[6])/2.0
+ z[1] = cos(sqrt(k1)*L)*z0[1]+1/sqrt(k1)*sin(sqrt(k1)*L)*z0[2]
+ z[2] = -sqrt(k1)*sin(sqrt(k1)*L)*z0[1]+cos(sqrt(k1)*L)*z0[2]+hkick+lbend*z0[6]
+ z[3] = cosh(sqrt(k1)*L)*z0[3]+1/sqrt(k1)*sinh(sqrt(k1)*L)*z0[4]
+ z[4] = sqrt(k1)*sinh(sqrt(k1)*L)*z0[3]+cosh(sqrt(k1)*L)*z0[4]
+ z[5] = z0[5]+h-lbend*z[1]
+ z[6] = z0[6]
+ end
 
- L = @FastGTPSA 0.5/(1.0+z0[6])
- h = @FastGTPSA -L*(z0[2]^2+k1*z0[1]^2+ z0[4]^2-k1*z0[3]^2)/(1.0+z0[6])/2.0
- z[1] = @FastGTPSA cos(sqrt(k1)*L)*z0[1]+1/sqrt(k1)*sin(sqrt(k1)*L)*z0[2]
- z[2] = @FastGTPSA -sqrt(k1)*sin(sqrt(k1)*L)*z0[1]+cos(sqrt(k1)*L)*z0[2]+hkick+lbend*z0[6]
- z[3] = @FastGTPSA cosh(sqrt(k1)*L)*z0[3]+1/sqrt(k1)*sinh(sqrt(k1)*L)*z0[4]
- z[4] = @FastGTPSA sqrt(k1)*sinh(sqrt(k1)*L)*z0[3]+cosh(sqrt(k1)*L)*z0[4]
- z[5] = @FastGTPSA z0[5]+h-lbend*z[1]
- z[6] = +z0[6]
  return z
 end
 
 function track_qd(z0, k1, vkick)
  z = Vector{promote_type(eltype(z0),typeof(k1),typeof(vkick))}(undef, length(z0))
 
- L = @FastGTPSA 0.5/(1.0+z0[6])
- h = @FastGTPSA -L*(z0[2]^2-k1*z0[1]^2+z0[4]^2+k1*z0[3]^2)/(1.0+z0[6])/2.0
- z[1] = @FastGTPSA cosh(sqrt(k1)*L)*z0[1]+1/sqrt(k1)*sinh(sqrt(k1)*L)*z0[2]
- z[2] = @FastGTPSA sqrt(k1)*sinh(sqrt(k1)*L)*z0[1]+cosh(sqrt(k1)*L)*z0[2]
- z[3] = @FastGTPSA cos(sqrt(k1)*L)*z0[3]+1/sqrt(k1)*sin(sqrt(k1)*L)*z0[4]
- z[4] = @FastGTPSA -sqrt(k1)*sin(sqrt(k1)*L)*z0[3]+cos(sqrt(k1)*L)*z0[4]+vkick 
- z[5] = @FastGTPSA z0[5]+h
- z[6] = +z0[6]
+ @FastGTPSA begin
+ L = 0.5/(1.0+z0[6])
+ h = -L*(z0[2]^2-k1*z0[1]^2+z0[4]^2+k1*z0[3]^2)/(1.0+z0[6])/2.0
+ z[1] = cosh(sqrt(k1)*L)*z0[1]+1/sqrt(k1)*sinh(sqrt(k1)*L)*z0[2]
+ z[2] = sqrt(k1)*sinh(sqrt(k1)*L)*z0[1]+cosh(sqrt(k1)*L)*z0[2]
+ z[3] = cos(sqrt(k1)*L)*z0[3]+1/sqrt(k1)*sin(sqrt(k1)*L)*z0[4]
+ z[4] = -sqrt(k1)*sin(sqrt(k1)*L)*z0[3]+cos(sqrt(k1)*L)*z0[4]+vkick 
+ z[5] = z0[5]+h
+ z[6] = z0[6]
+ end
+
  return z
 end
 
 function track_drift(z0)
  z = Vector{eltype(z0)}(undef, length(z0))
 
  L = 0.75
- z[1] = @FastGTPSA z0[1]+z0[2]*L/(1.0+z0[6])
- z[2] = +z0[2]
- z[3] = @FastGTPSA z0[3]+z0[4]*L/(1.0+z0[6])
- z[4] = +z0[4]
- z[5] = @FastGTPSA z0[5]-L*((z0[2]^2)+(z0[4]^2))/(1.0+z0[6])^2/2.0
- z[6] = +z0[6] 
+ @FastGTPSA begin
+ z[1] = z0[1]+z0[2]*L/(1.0+z0[6])
+ z[2] = z0[2]
+ z[3] = z0[3]+z0[4]*L/(1.0+z0[6])
+ z[4] = z0[4]
+ z[5] = z0[5]-L*((z0[2]^2)+(z0[4]^2))/(1.0+z0[6])^2/2.0
+ z[6] = z0[6] 
+ end
  return z
 end
 
 function track_cav(z0)
  z = Vector{eltype(z0)}(undef, length(z0))
-
- z[1] = +z0[1]
- z[2] = +z0[2]
- z[3] = +z0[3]
- z[4] = +z0[4]
- z[5] = +z0[5]
- z[6] = @FastGTPSA z0[6]+0.0001*z0[5]
+ z[1] = z0[1]
+ z[2] = z0[2]
+ z[3] = z0[3]
+ z[4] = z0[4]
+ z[5] = z0[5]
+ z[6] = z0[6]+0.0001*z0[5]
  return z
 end
 
 function track_sextupole(z0, k2l)
  z = Vector{promote_type(eltype(z0),typeof(k2l))}(undef, length(z0))
-
- z[1] = +z0[1]
- z[2] = @FastGTPSA z0[2]-k2l*(z0[1]^2-z0[3]^2)
- z[3] = +z0[3]
- z[4] = @FastGTPSA z0[4]+k2l*2.0*z0[1]*z0[3]
- z[5] = +z0[5]
- z[6] = +z0[6]
+
+ @FastGTPSA begin
+ z[1] = z0[1]
+ z[2] = z0[2]-k2l*(z0[1]^2-z0[3]^2)
+ z[3] = z0[3]
+ z[4] = z0[4]+k2l*2.0*z0[1]*z0[3]
+ z[5] = z0[5]
+ z[6] = z0[6]
+ end
  return z
 end
 
@@ -119,21 +128,60 @@ function track_ring(z0, k1=0.36, k2l=1.2, kick=zeros(50))
  return z0
 end
 
-function benchmark_GTPSA()
+function benchmark_GTPSA1()
+ d = Descriptor(6,1,52,1)
+ z = vars()
+ k = params()
+ map = track_ring(z, 0.36+k[1], 1.2+k[2], k[3:end])
+ return map
+end
+
+function benchmark_GTPSA2()
  d = Descriptor(6,2,52,2)
  z = vars()
  k = params()
  map = track_ring(z, 0.36+k[1], 1.2+k[2], k[3:end])
  return map
 end
 
-function benchmark_ForwardDiff()
+function benchmark_GTPSA3()
+ d = Descriptor(6,3,52,3)
+ z = vars()
+ k = params()
+ map = track_ring(z, 0.36+k[1], 1.2+k[2], k[3:end])
+ return map
+end
+
+function benchmark_ForwardDiff1()
+ m(z) = track_ring([z[1], z[2], z[3], z[4], z[5], z[6]], 0.36+z[7], 1.2+z[8], z[9:end])
+ j = Array{Float64}(undef,6,58)
+
+
+ ForwardDiff.jacobian!(j, m, zeros(58))
+
+
+ return j
+end
+
+function benchmark_ForwardDiff2()
  m(z) = track_ring([z[1], z[2], z[3], z[4], z[5], z[6]], 0.36+z[7], 1.2+z[8], z[9:end])
  j = Array{Float64}(undef,6,58)
  h = Array{Float64}(undef,348,58)
- #c = Array{Float64}(undef,20184,58)
+
+ ForwardDiff.jacobian!(j, m, zeros(58))
+ ForwardDiff.jacobian!(h, z->ForwardDiff.jacobian(z->m(z), z), zeros(58))
+
+ return j, h
+end
+
+function benchmark_ForwardDiff3()
+ m(z) = track_ring([z[1], z[2], z[3], z[4], z[5], z[6]], 0.36+z[7], 1.2+z[8], z[9:end])
+ j = Array{Float64}(undef,6,58)
+ h = Array{Float64}(undef,348,58)
+ c = Array{Float64}(undef,20184,58)
  ForwardDiff.jacobian!(j, m, zeros(58))
  ForwardDiff.jacobian!(h, z->ForwardDiff.jacobian(z->m(z), z), zeros(58))
- #ForwardDiff.jacobian!(c, z->ForwardDiff.jacobian(z->ForwardDiff.jacobian(z->m(z), z), z), zeros(58))
- return j, h #, c
+ ForwardDiff.jacobian!(c, z->ForwardDiff.jacobian(z->ForwardDiff.jacobian(z->m(z), z), z), zeros(58))
+ return j, h, c
 end
+
diff --git a/docs/make.jl b/docs/make.jl
@@ -13,17 +13,16 @@ makedocs(
  "man/b_definitions.md", 
  "man/c_descriptor.md", 
  "man/d_tps.md",
- "man/e_complextps.md",
- "man/f_varsparams.md",
- "man/g_monoindex.md",
- "man/h_mono.md",
- "man/i_gjh.md",
- "man/j_slice.md",
- "man/k_methods.md",
- "man/l_fastgtpsa.md",
- "man/m_io.md",
- "man/n_global.md",
- "man/o_all.md"],
+ "man/e_varsparams.md",
+ "man/f_monoindex.md",
+ "man/g_mono.md",
+ "man/h_gjh.md",
+ "man/i_slice.md",
+ "man/j_methods.md",
+ "man/k_fastgtpsa.md",
+ "man/l_io.md",
+ "man/m_global.md",
+ "man/n_all.md"],
  "For Developers" => "devel.md"
  ]
 )