From 8e3e36f65d45fe24aeef2feabb9b091d4e746fdc Mon Sep 17 00:00:00 2001
From: Ed J <mohawk2@users.noreply.github.com>
Date: Tue, 6 Feb 2024 02:25:29 +0000
Subject: [PATCH] doc simq especially its problem with broadcasting

---
 Basic/MatrixOps/matrixops.pd | 131 ++++++++++++++++++++---------------
 1 file changed, 74 insertions(+), 57 deletions(-)

diff --git a/Basic/MatrixOps/matrixops.pd b/Basic/MatrixOps/matrixops.pd
index b0d378fe5..f2ce1c448 100644
--- a/Basic/MatrixOps/matrixops.pd
+++ b/Basic/MatrixOps/matrixops.pd
@@ -176,7 +176,7 @@ sub stretcher {
   my $in = shift;
   my $out = zeroes($in->dim(0),$in->dims);
   my $tmp;  # work around for perl -d "feature"
-  ($tmp = $out->diagonal(0,1)) += $in;	
+  ($tmp = $out->diagonal(0,1)) += $in;
   $out;
 }
 
@@ -217,7 +217,7 @@ OPTIONS:
 =item * s
 
 Boolean value indicating whether to complain if the matrix is singular.  If
-this is false, singular matrices cause inverse to barf.  If it is true, then 
+this is false, singular matrices cause inverse to barf.  If it is true, then
 singular matrices cause inverse to return undef.
 
 =item * lu (I/O)
@@ -244,7 +244,7 @@ sub inv {
   my $opt = shift;
   $opt = {} unless defined($opt);
 
-  barf "inverse needs a square PDL as a matrix\n" 
+  barf "inverse needs a square PDL as a matrix\n"
     unless(UNIVERSAL::isa($x,'PDL') &&
 	   $x->dims >= 2 &&
 	   $x->dim(0) == $x->dim(1)
@@ -266,7 +266,7 @@ sub inv {
     if exists($opt->{det});
 
   unless($det->nelem > 1 || $det) {
-    return undef 
+    return undef
       if $opt->{s};
     barf("PDL::inv: got a singular matrix or LU decomposition\n");
   }
@@ -315,7 +315,7 @@ OPTIONS:
 
 =item * lu (I/O)
 
-Provides a cache for the LU decomposition of the matrix.  If you 
+Provides a cache for the LU decomposition of the matrix.  If you
 provide the key but leave the value undefined, then the LU decomposition
 goes in here; if you put an LU decomposition here, it will be used and
 the matrix will not be decomposed again.
@@ -401,13 +401,13 @@ sub determinant {
     my $y7 = $y->index(7);
     my $y8 = $y->index(8);
 
-    return ( 
+    return (
 	 $y->index(0) * ( $y4 * $y8 - $y5 * $y7 )
       +  $y->index(1) * ( $y5 * $y6 - $y3 * $y8 )
       +  $y->index(2) * ( $y3 * $y7 - $y4 * $y6 )
 	     );
   }
-  
+
   my($i);
   my($sum) = zeroes($x->slice('(0),(0)'));
 
@@ -415,7 +415,7 @@ sub determinant {
   for $i(1..$n-2) {
     my $el = $x->slice("($i),(0)");
     next if( ($el==0)->all );  # Optimize away unnecessary recursion
-    $sum += $el * (1-2*($i%2)) * 
+    $sum += $el * (1-2*($i%2)) *
       determinant($x->slice("0:".($i-1).",1:-1")->
 		  append($x->slice(($i+1).":-1,1:-1")));
   }
@@ -423,7 +423,7 @@ sub determinant {
   # Do beginning and end submatrices
   $sum += $x->slice("(0),(0)")  * determinant($x->slice('1:-1,1:-1'));
   $sum -= $x->slice("(-1),(0)") * determinant($x->slice('0:-2,1:-1')) * (1 - 2*($n % 2));
-  
+
   return $sum;
 }
 
@@ -450,7 +450,7 @@ pp_def("eigens_sym",
    sub PDL::eigens_sym {
       my ($x) = @_;
       my (@d) = $x->dims;
-      barf "Need real square matrix for eigens_sym" 
+      barf "Need real square matrix for eigens_sym"
             if $#d < 1 or $d[0] != $d[1];
       my ($sym) = 0.5*($x + $x->transpose);
       my ($err) = PDL::max(abs($sym));
@@ -478,7 +478,7 @@ It\'s broadcastable, so if C<$a> is 3x3x100, it\'s treated as 100 separate 3x3
 matrices, and both C<$ev> and C<$e> get extra dimensions accordingly.
 
 If called in scalar context it hands back only the eigenvalues.  Ultimately,
-it should switch to a faster algorithm in this case (as discarding the 
+it should switch to a faster algorithm in this case (as discarding the
 eigenvectors is wasteful).
 
 The algorithm used is due to J. vonNeumann, which was a rediscovery of
@@ -498,7 +498,7 @@ runs across their components.
     ($ev, $e) = eigens_sym($x); # e-vects & e-values
     $e = eigens_sym($x);        # just eigenvalues
 
-=cut 
+=cut
 
 ',);
 
@@ -531,7 +531,7 @@ pp_def("eigens",
 		j++;
 	    }
 
-	    Eigen( sn, 0, (double **) foo, 20*sn, 1e-13, 0, 
+	    Eigen( sn, 0, (double **) foo, 20*sn, 1e-13, 0,
 		   (complex double *)( $P(e) ),
                    (complex double **) bar
 		 );
@@ -539,7 +539,7 @@ pp_def("eigens",
             Safefree(bar);
 
             /* Check for invalid values: convenience block */
-            { 
+            {
                 int k;
                 char ok, flag;
                 PDL_Double eps = 0;
@@ -549,19 +549,19 @@ pp_def("eigens",
                     PDL_Double z = fabs (  ($P(e))[i*2]  );
                     if( z > eps )
                         eps = z;
-                }    
+                }
                 eps *= 1e-10;
 
 		/* Next: scan for non-real terms and parallel vectors */
                 for( i=0;  i < sn;  i++ ) {
                    ok = ( fabs( ($P(e))[i*2+1] ) < eps );
-                   for(  j=0; ok && j< sn;  j++) 
+                   for(  j=0; ok && j< sn;  j++)
                       ok &= fabs( ($P(ev))[ 2* (i*sn + j) + 1] ) < eps;
                    for(  k=0; ok && k < i; k++ ) {
-                     if( isfinite( ($P(ev))[ 2 * (k*sn) ] ) ) { 
-                       for( flag=1, j=0; ok && flag && j< sn; j++) 
+                     if( isfinite( ($P(ev))[ 2 * (k*sn) ] ) ) {
+                       for( flag=1, j=0; ok && flag && j< sn; j++)
                          flag &= ( fabs( ($P(ev))[ 2 * (i*sn + j) ] -
-                                         ($P(ev))[ 2 * (k*sn + j) ] 
+                                         ($P(ev))[ 2 * (k*sn + j) ]
                                        )
                                    < 1e-10 * (
                                       fabs(($P(ev))[ 2 * (k*sn + j) ]) +
@@ -583,7 +583,7 @@ pp_def("eigens",
                    /* Set column to NaN if necessary */
                    if( !ok  ) {
                      /* TODO: We should set the badval bit if in use. */
-		     for(j=0; j< sn; j++) 
+		     for(j=0; j< sn; j++)
 		       ($P(ev))[2 * (i*sn+j)] = NAN;
 		     ($P(e))[i*2] = NAN;
                    }
@@ -594,7 +594,7 @@ pp_def("eigens",
    sub PDL::eigens {
       my ($x) = @_;
       my (@d) = $x->dims;
-      barf "Need real square matrix for eigens" 
+      barf "Need real square matrix for eigens"
             if $#d < 1 or $d[0] != $d[1];
       my $deviation = PDL::max(abs($x - $x->transpose))/PDL::max(abs($x));
       if ( $deviation <= 1e-5 ) {
@@ -626,7 +626,7 @@ pp_def("eigens",
       , Doc => '
 =for ref
 
-Real eigenvalues and -vectors of a real square matrix.  
+Real eigenvalues and -vectors of a real square matrix.
 
 (See also L<"eigens_sym"|/eigens_sym>, for eigenvalues and -vectors
 of a real, symmetric, square matrix).
@@ -669,7 +669,7 @@ eigenvectors and the 1st dim runs across their components.
 	$vector = $ev->slice($n);   # Select nth eigenvector as a column-vector
 	$vector = $ev->slice("($n)"); # Select nth eigenvector as a row-vector
 
-DEVEL NOTES: 
+DEVEL NOTES:
 
 For now, there is no distinction between a complex eigenvalue and an
 invalid eigenvalue, although the underlying code generates complex
@@ -846,8 +846,8 @@ sub lu_decomp {
    my $TINY = 1e-30;
 
    barf("lu_decomp requires a square (2D) PDL\n")
-   if(!UNIVERSAL::isa($in,'PDL') || 
-      $in->ndims < 2 || 
+   if(!UNIVERSAL::isa($in,'PDL') ||
+      $in->ndims < 2 ||
       $in->dim(0) != $in->dim(1));
 
    my($n) = $in->dim(0);
@@ -858,8 +858,8 @@ sub lu_decomp {
 
    if(defined $permute) {
       barf('lu_decomp: permutation vector must match the matrix')
-      if(!UNIVERSAL::isa($permute,'PDL') || 
-         $permute->ndims != 1 || 
+      if(!UNIVERSAL::isa($permute,'PDL') ||
+         $permute->ndims != 1 ||
          $permute->dim(0) != $out->dim(0));
       $permute .= PDL->xvals($in->dim(0));
    } else {
@@ -867,7 +867,7 @@ sub lu_decomp {
    }
 
    if(defined $parity) {
-      barf('lu_decomp: parity must be a scalar PDL') 
+      barf('lu_decomp: parity must be a scalar PDL')
       if(!UNIVERSAL::isa($parity,'PDL') ||
          $parity->dim(0) != 1);
       $parity .= 1.0;
@@ -887,8 +887,8 @@ sub lu_decomp {
    my($tmpval) = $tmprow->slice('(0)')->sever;
 
    my($col,$row);
-   for $col(0..$n1) {       
-      for $row(1..$n1) {   
+   for $col(0..$n1) {
+      for $row(1..$n1) {
          my($klim) = $row<$col ? $row : $col;
          if($klim > 0) {
             $klim--;
@@ -957,10 +957,10 @@ LU decompose a matrix, with no row permutation
 =for usage
 
   ($lu, $perm, $parity) = lu_decomp2($x);
-  
+
   $lu = lu_decomp2($x,$perm,$parity);   # or
   $lu = lu_decomp2($x);                 # $perm and $parity are optional
-  
+
   lu_decomp($x->inplace,$perm,$parity); # or
   lu_decomp($x->inplace);               # $perm and $parity are optional
 
@@ -973,7 +973,7 @@ for them -- but they are always trivial.
 
 Because C<lu_decomp2> does not pivot, it is numerically B<unstable> --
 that means it is less precise than L</lu_decomp>, particularly for
-large or near-singular matrices.  There are also specific types of 
+large or near-singular matrices.  There are also specific types of
 non-singular matrices that confuse it (e.g. ([0,-1,0],[1,0,0],[0,0,1]),
 which is a 90 degree rotation matrix but which confuses C<lu_decomp2>).
 
@@ -984,7 +984,7 @@ decomposition for some of the input matrices.
 
 The output is a single matrix that contains the LU decomposition of C<$a>;
 you can even do it in-place, thereby destroying C<$a>, if you want.  See
-L</lu_decomp> for more information about LU decomposition. 
+L</lu_decomp> for more information about LU decomposition.
 
 C<lu_decomp2> is ported from I<Numerical Recipes> into PDL.
 
@@ -1000,12 +1000,12 @@ sub lu_decomp2 {
   my($sing_ok) = shift;
 
   my $TINY = 1e-30;
-  
+
   barf("lu_decomp2 requires a square (2D) PDL\n")
-    if(!UNIVERSAL::isa($in,'PDL') || 
-       $in->ndims < 2 || 
+    if(!UNIVERSAL::isa($in,'PDL') ||
+       $in->ndims < 2 ||
        $in->dim(0) != $in->dim(1));
-  
+
   my($n) = $in->dim(0);
   my($n1) = $n; $n1--;
 
@@ -1015,8 +1015,8 @@ sub lu_decomp2 {
 
   if(defined $perm) {
     barf('lu_decomp2: permutation vector must match the matrix')
-      if(!UNIVERSAL::isa($perm,'PDL') || 
-	 $perm->ndims != 1 || 
+      if(!UNIVERSAL::isa($perm,'PDL') ||
+	 $perm->ndims != 1 ||
 	 $perm->dim(0) != $out->dim(0));
     $perm .= PDL->xvals($in->dim(0));
   } else {
@@ -1024,7 +1024,7 @@ sub lu_decomp2 {
   }
 
   if(defined $par) {
-    barf('lu_decomp: parity must be a scalar PDL') 
+    barf('lu_decomp: parity must be a scalar PDL')
       if(!UNIVERSAL::isa($par,'PDL') ||
 	 $par->nelem != 1);
     $par .= 1.0;
@@ -1035,8 +1035,8 @@ sub lu_decomp2 {
   my $diagonal = $out->diagonal(0,1);
 
   my($col,$row);
-  for $col(0..$n1) {       
-    for $row(1..$n1) {   
+  for $col(0..$n1) {
+    for $row(1..$n1) {
       my($klim) = $row<$col ? $row : $col;
       if($klim > 0) {
 	$klim--;
@@ -1047,10 +1047,10 @@ sub lu_decomp2 {
       }
 
     }
-    
+
     # Figure a_ij, with no pivoting
     if($col < $n1) {
-      # Divide the rest of the column by the diagonal element 
+      # Divide the rest of the column by the diagonal element
       my $tmp; # work around for perl -d "feature"
       ($tmp = $out->slice("($col),".($col+1).":$n1")) /= $diagonal->index($col)->dummy(0,$n1-$col);
     }
@@ -1100,6 +1100,15 @@ Solve A x = B for matrix A, by back substitution into A's LU decomposition.
   ($lu,$perm,$par) = lu_decomp($A);
   $x = lu_backsub($lu,$perm,$par, $B->transpose)->transpose;
 
+  # using simq
+  # remove all active dims
+  @A_dims = $A->dims; @B_dims = $B->transpose->dims;
+  splice @A_dims, 0, 2; splice @B_dims, 0, 1;
+  @broadcast = PDL::Core::dims_filled(\@A_dims, \@B_dims);
+  # simq modifies A, so need 1 copy per broadcast else non-first run has wrong A
+  ($x) = simq($A->dupN(1,1,map +($A_dims[$_]//1)==1?$broadcast[$_]:1, 0..$#broadcast)->copy, $B->transpose, 0);
+  $x = $x->inplace->transpose;
+
   # or with Slatec LINPACK
   use PDL::Slatec;
   gefa($lu=$A->copy, $ipiv=null, $info=null);
@@ -1165,12 +1174,12 @@ sub lu_backsub {
       ($lu, $perm, $y) = @_;
    } elsif(@_==4) {
       ($lu, $perm, $par, $y) = @_;
-   } 
+   }
 
    barf("lu_backsub: LU decomposition is undef -- probably from a singular matrix.\n")
    unless defined($lu);
 
-   barf("Usage: \$x = lu_backsub(\$lu,\$perm,\$y); all must be PDLs\n") 
+   barf("Usage: \$x = lu_backsub(\$lu,\$perm,\$y); all must be PDLs\n")
    unless(UNIVERSAL::isa($lu,'PDL') &&
       UNIVERSAL::isa($perm,'PDL') &&
       UNIVERSAL::isa($y,'PDL'));
@@ -1280,15 +1289,8 @@ BROADCAST_OK:
    }
    $out;
 }
-
-
 EOD
 
-
-######################################################################
-### simq
-###
-
 # XXX Destroys a!!!
 # To use the new a again, must store both a and ips.
 pp_def("simq",
@@ -1305,9 +1307,20 @@ pp_def("simq",
 =for ref
 
 Solution of simultaneous linear equations, C<a x = b>.
+B<NB does not broadcast well>.
+
+=for usage
+
+  # remove all active dims
+  @A_dims = $A->dims; @B_dims = $B->transpose->dims;
+  splice @A_dims, 0, 2; splice @B_dims, 0, 1;
+  @broadcast = PDL::Core::dims_filled(\@A_dims, \@B_dims);
+  # simq modifies A, so need 1 copy per broadcast else non-first run has wrong A
+  ($x) = simq($A->dupN(1,1,map +($A_dims[$_]//1)==1?$broadcast[$_]:1, 0..$#broadcast)->copy, $B->transpose, 0);
+  $x = $x->inplace->transpose;
 
 C<$a> is an C<n x n> matrix (i.e., a vector of length C<n*n>), stored row-wise:
-that is, C<a(i,j) = a[ij]>, where C<ij = i*n + j>.  
+that is, C<a(i,j) = a[ij]>, where C<ij = i*n + j>.
 
 While this is the transpose of the normal column-wise storage, this
 corresponds to normal PDL usage.  The contents of matrix a may be
@@ -1315,12 +1328,16 @@ altered (but may be required for subsequent calls with flag = -1).
 
 C<$y>, C<$x>, C<$ips> are vectors of length C<n>.
 
-Set C<flag=0> to solve.  
+Set C<flag=0> to solve.
 Set C<flag=-1> to do a new back substitution for
 different C<$y> vector using the same a matrix previously reduced when
 C<flag=0> (the C<$ips> vector generated in the previous solution is also
 required).
 
+For this function to work well with broadcasting, it will need the LU
+decomposition part split out, so that for solving C<A x = B> only C<x>
+would be written to.
+
 See also L</lu_backsub>, which does the same thing with a slightly
 less opaque interface.
 
@@ -1334,7 +1351,7 @@ less opaque interface.
 ###
 # this doesn't need to be changed to support bad values
 # I could put 'HandleBad => 1', but it would just cause an
-# unnecessary increase (admittedly small) in the amount of 
+# unnecessary increase (admittedly small) in the amount of
 # code
 #
 pp_def("squaretotri",