From 26f7d3a7169af732bde67488226fff4a6fba5996 Mon Sep 17 00:00:00 2001
From: PabloIbannez
Date: Mon, 19 Feb 2024 16:31:21 +0100
Subject: [PATCH 01/15] Updated quaternion and tensor types. Added qualifier
---
src/utils/quaternion.cuh | 196 ++++++++++++++++++++-------------------
src/utils/tensor.cuh | 26 +++++-
2 files changed, 124 insertions(+), 98 deletions(-)
diff --git a/src/utils/quaternion.cuh b/src/utils/quaternion.cuh
index 2c384549..bdaec2a4 100644
--- a/src/utils/quaternion.cuh
+++ b/src/utils/quaternion.cuh
@@ -1,7 +1,7 @@
/* P. Palacios Alonso 2021
Some quaternion algebra and useful functions
Notation:
- n(real) - Scalar part
+ n(real) - Scalar part
v(real3) - vectorial part
q(real4) - quaternion
*/
@@ -13,146 +13,150 @@ Notation:
namespace uammd{
class Quat{
- public:
-
- real n;
- real3 v;
-
- QUATTR Quat();
- QUATTR Quat(real4 q);
- QUATTR Quat(real n, real3 v);
- QUATTR Quat(real n, real vx, real vy, real vz);
-
- QUATTR Quat operator+(Quat q1);
- QUATTR void operator+=(Quat q);
- QUATTR Quat operator-(Quat q);
- QUATTR void operator-=(Quat q);
- QUATTR Quat operator*(Quat q);
- QUATTR void operator*=(Quat q);
- QUATTR Quat operator*(real scalar);
- QUATTR void operator*=(real scalar);
- QUATTR Quat operator/(real scalar);
- QUATTR void operator/=(real scalar);
- QUATTR void operator=(real4 q);
-
- QUATTR real3 getVx(); //Returns the first vector of the reference system encoded by the quaternion
- QUATTR real3 getVy(); //Returns the second vector of the reference system encoded by the quaternion
- QUATTR real3 getVz(); //Returns the third vector of the reference system encoded by the quaternion
- QUATTR real4 to_real4();
-
- QUATTR Quat getConjugate();
+ public:
+
+ real n;
+ real3 v;
+
+ QUATTR Quat();
+ QUATTR Quat(const real4& q);
+ QUATTR Quat(real n, const real3& v);
+ QUATTR Quat(real n, real vx, real vy, real vz);
+
+ QUATTR Quat operator+ (const Quat& q) const;
+ QUATTR void operator+= (const Quat& q);
+ QUATTR Quat operator- (const Quat& q) const;
+ QUATTR void operator-= (const Quat& q);
+ QUATTR Quat operator* (const Quat& q) const;
+ QUATTR void operator*= (const Quat& q);
+ QUATTR Quat operator* (real scalar) const;
+ QUATTR void operator*= (real scalar);
+ QUATTR Quat operator/ (real scalar) const;
+ QUATTR void operator/= (real scalar);
+ QUATTR void operator= (const real4& q);
+
+ QUATTR real3 getVx() const;
+ QUATTR real3 getVy() const;
+ QUATTR real3 getVz() const;
+ QUATTR real4 to_real4() const;
+
+ QUATTR Quat getConjugate() const;
};
-
+
QUATTR Quat::Quat(){
n = real(1.0);
v = real3();
}
-
- QUATTR Quat::Quat(real4 q){
+
+ QUATTR Quat::Quat(const real4& q){
n = q.x;
v.x = q.y;
v.y = q.z;
v.z = q.w;
}
-
- QUATTR Quat::Quat(real n, real3 v){
+
+ QUATTR Quat::Quat(real n, const real3& v){
this->n = n;
this->v = v;
}
-
- QUATTR Quat::Quat(real n, real vx, real vy, real vz){
+
+ QUATTR Quat::Quat(real n,
+ real vx,real vy,real vz){
this -> n = n;
v.x = vx;
v.y = vy;
- v.z = vz;
+ v.z = vz;
}
-
- QUATTR Quat Quat::operator+(Quat q){
- return Quat(n+q.n,v+q.v);
+
+ QUATTR Quat Quat::operator+(const Quat& q) const {
+ return Quat(n+q.n,v+q.v);
}
- QUATTR void Quat::operator+=(Quat q){
+ QUATTR void Quat::operator+=(const Quat& q){
n+=q.n;
v+=q.v;
}
-
- QUATTR Quat Quat::operator-(Quat q){
- return Quat(n-q.n,v-q.v);
- }
-
- QUATTR void Quat::operator-=(Quat q){
+
+ QUATTR Quat Quat::operator-(const Quat& q) const {
+ return Quat(n-q.n,v-q.v);
+ }
+
+ QUATTR void Quat::operator-=(const Quat& q){
n-=q.n;
v-=q.v;
}
-
- QUATTR Quat Quat::operator*(Quat q){
+
+ QUATTR Quat Quat::operator*(const Quat& q) const {
/*
- Product of two quaternions:
- q3 = q1*q2 = (n1*n2 - v1*v2, n1*v2 + n2*v1 + v1 x v2)
- */
- return Quat(n*q.n-dot(v,q.v),n*q.v+v*q.n+cross(v,q.v));
+ Product of two quaternions:
+ q3 = q1*q2 = (n1*n2 - v1*v2, n1*v2 + n2*v1 + v1 x v2)
+ */
+ return Quat(n*q.n-dot(v,q.v),n*q.v+v*q.n+cross(v,q.v));
}
-
- QUATTR void Quat::operator*=(Quat q){
- n=n*q.n-dot(v,q.v);
- v=n*q.v+v*q.n+cross(q.v,v);
+
+ QUATTR void Quat::operator*=(const Quat& q){
+ real n_new = n*q.n-dot(v,q.v);
+ real3 v_new = n*q.v+v*q.n+cross(v,q.v);
+
+ n = n_new;
+ v = v_new;
}
-
- QUATTR Quat Quat::operator*(real scalar){
- return Quat(scalar*n,scalar*v);
+
+ QUATTR Quat Quat::operator*(real scalar) const {
+ return Quat(scalar*n,scalar*v);
}
-
+
QUATTR void Quat::operator*=(real scalar){
- n*=scalar;
- v*=scalar;
+ n*=scalar;
+ v*=scalar;
}
-
- QUATTR Quat operator*(real scalar, Quat q){
- return Quat(scalar*q.n,scalar*q.v);
+
+ QUATTR Quat operator*(real scalar, const Quat& q){
+ return Quat(scalar*q.n,scalar*q.v);
}
-
- QUATTR Quat Quat::operator/(real scalar){
- return Quat(n/scalar,v/scalar);
+
+ QUATTR Quat Quat::operator/(real scalar) const {
+ return Quat(n/scalar,v/scalar);
}
-
+
QUATTR void Quat::operator/=(real scalar){
n/=scalar;
v/=scalar;
}
-
- QUATTR void Quat::operator=(real4 q){
+
+ QUATTR void Quat::operator=(const real4& q){
n = q.x;
v.x = q.y;
v.y = q.z;
v.z = q.w;
}
-
- QUATTR real3 Quat::getVx(){
+
+ QUATTR real3 Quat::getVx() const {
real a = n;
real b = v.x;
real c = v.y;
real d = v.z;
- real3 vx = make_real3(a*a+b*b-c*c-d*d,2*(b*c+a*d),2*(b*d-a*c));
+ real3 vx = make_real3(a*a+b*b-c*c-d*d,real(2.0)*(b*c+a*d),real(2.0)*(b*d-a*c));
return vx;
}
-
- QUATTR real3 Quat::getVy(){
+
+ QUATTR real3 Quat::getVy() const {
real a = n;
real b = v.x;
real c = v.y;
real d = v.z;
- return make_real3(2*(b*c-a*d),a*a-b*b+c*c-d*d,2*(c*d+a*b));
+ return make_real3(real(2.0)*(b*c-a*d),a*a-b*b+c*c-d*d,real(2.0)*(c*d+a*b));
}
-
- QUATTR real3 Quat::getVz(){
+
+ QUATTR real3 Quat::getVz() const {
real a = n;
real b = v.x;
real c = v.y;
real d = v.z;
- return make_real3(2*(b*d+a*c),2*(c*d-a*b),a*a-b*b-c*c+d*d);
+ return make_real3(real(2.0)*(b*d+a*c),real(2.0)*(c*d-a*b),a*a-b*b-c*c+d*d);
}
-
- QUATTR real4 Quat::to_real4(){
+
+ QUATTR real4 Quat::to_real4() const {
real4 r4;
r4.x = n;
r4.y = v.x;
@@ -160,32 +164,32 @@ namespace uammd{
r4.w = v.z;
return r4;
}
-
+
QUATTR real4 make_real4(Quat q){
return q.to_real4();
}
- QUATTR Quat Quat::getConjugate(){
+ QUATTR Quat Quat::getConjugate() const {
return Quat(n,-v);
}
- /*
- Returns the quaternion that encondes a rotation of ang radians
- around the axis vrot
+ /*
+ Returns the quaternion that encondes a rotation of ang radians
+ around the axis vrot
q = (cos(phi/2),vrot·sin(phi/2))
- */
+ */
QUATTR Quat rotVec2Quaternion(real3 vrot, real phi){
real norm = (dot(vrot,vrot));
- if (norm==0) return Quat(1.0,0.,0.,0.);
+ if (norm==real(0.0)) return Quat(1.0, 0., 0., 0.);
vrot*=rsqrt(norm); // The rotation axis must be a unitary vector
real cphi2, sphi2;
real* cphi2_ptr = &cphi2;
real* sphi2_ptr = &sphi2;
- sincos(phi*0.5,sphi2_ptr,cphi2_ptr);
+ sincos(phi*real(0.5),sphi2_ptr,cphi2_ptr);
Quat q = Quat(cphi2,sphi2*vrot);
return q;
}
-
+
QUATTR Quat rotVec2Quaternion(real3 vrot){
// If no angle is given the rotation angle is the modulus of vrot
real phi = sqrt(dot(vrot,vrot));
@@ -197,10 +201,10 @@ namespace uammd{
To speed up the computation, we write the rotation in the next form:
v' = v + 2*q_n*(q_v x v) + 2*q_v * (q_v x v).
See https://fgiesen.wordpress.com/2019/02/09/rotating-a-single-vector-using-a-quaternion/
- */
- QUATTR real3 rotateVector(Quat q, real3 v){
- real3 aux = 2*cross(q.v,v);
- return v+q.n*aux+cross(q.v,aux);
+ */
+ QUATTR real3 rotateVector(const Quat& q, const real3& v){
+ real3 aux = real(2.0)*cross(q.v,v);
+ return v+q.n*aux+cross(q.v,aux);
}
}
diff --git a/src/utils/tensor.cuh b/src/utils/tensor.cuh
index 0acba92c..e67c729b 100644
--- a/src/utils/tensor.cuh
+++ b/src/utils/tensor.cuh
@@ -36,9 +36,15 @@ namespace uammd{
this->zz = t.zz;
}
- VECATTR real3 diag(){return {xx,yy,zz};}
+ VECATTR real3 diag() const {return {xx,yy,zz};}
- VECATTR real trace(){return xx+yy+zz;}
+ VECATTR real trace() const {return xx+yy+zz;}
+
+ VECATTR tensor3 transpose() const {
+ return tensor3(xx,yx,zx,
+ xy,yy,zy,
+ xz,yz,zz);
+ }
};
@@ -152,6 +158,22 @@ namespace uammd{
return sub;
}
+ //Define the unary minus operator
+
+ VECATTR tensor3 operator -(const tensor3 &a){
+ tensor3 sub;
+ sub.xx = -a.xx;
+ sub.yx = -a.yx;
+ sub.zx = -a.zx;
+ sub.xy = -a.xy;
+ sub.yy = -a.yy;
+ sub.zy = -a.zy;
+ sub.xz = -a.xz;
+ sub.yz = -a.yz;
+ sub.zz = -a.zz;
+ return sub;
+ }
+
VECATTR void operator -=(tensor3 &a, const real &b){
a.xx -= b;
a.yx -= b;
From b0b86e4781c617000206b084b7939e181520a1a1 Mon Sep 17 00:00:00 2001
From: PabloPalaciosAlonso
Date: Tue, 20 Feb 2024 14:37:06 +0100
Subject: [PATCH 02/15] Extend BVP modules to work either with real or complex
numbers
---
.../BDHI/DoublyPeriodic/DPStokesSlab.cuh | 2 +-
.../DoublyPeriodic/StokesSlab/Correction.cuh | 2 +-
.../StokesSlab/initialization.cu | 2 +-
.../DoublyPeriodic/PoissonSlab/BVPPoisson.cuh | 4 +-
.../BVPSchurComplementMatrices.cuh | 25 +-
src/misc/BoundaryValueProblem/BVPSolver.cuh | 111 ++++--
src/misc/BoundaryValueProblem/KBPENTA.cuh | 15 +-
src/misc/BoundaryValueProblem/MatrixUtils.h | 183 ++++++----
src/utils/atomics.cuh | 28 +-
src/utils/vector.cuh | 335 ++++++++++++++++++
test/misc/bvp/bvp.cu | 2 +-
11 files changed, 565 insertions(+), 144 deletions(-)
diff --git a/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh b/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh
index a02a22b4..aad075a4 100644
--- a/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh
+++ b/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh
@@ -169,7 +169,7 @@ else throw std::runtime_error("[DPStokesSlab] Can only average in direction X (0
real gw;
real tolerance;
WallMode mode;
- shared_ptr bvpSolver;
+ shared_ptr> bvpSolver;
};
diff --git a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/Correction.cuh b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/Correction.cuh
index f6e5f6cf..c7735e3c 100644
--- a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/Correction.cuh
+++ b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/Correction.cuh
@@ -506,7 +506,7 @@ namespace uammd{
}
auto computeZeroModeBoundaryConditions(int nz, real H, WallMode mode){
- BVP::SchurBoundaryCondition bcs(nz, H);
+ BVP::SchurBoundaryCondition bcs(nz, H);
if(mode == WallMode::bottom){
correction_ns::TopBoundaryConditions top(0, H);
correction_ns::BottomBoundaryConditions bot(0, H);
diff --git a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/initialization.cu b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/initialization.cu
index d89ed298..d16b5bf3 100644
--- a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/initialization.cu
+++ b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/initialization.cu
@@ -144,7 +144,7 @@ namespace uammd{
auto botBC = thrust::make_transform_iterator(thrust::make_counting_iterator(0), botdispatch);
int numberSystems = (nk.x/2+1)*nk.y;
int nz = grid.cellDim.z;
- this->bvpSolver = std::make_shared(klist, topBC, botBC, numberSystems, halfH, nz);
+ this->bvpSolver = std::make_shared>(klist, topBC, botBC, numberSystems, halfH, nz);
CudaCheckError();
}
diff --git a/src/Interactor/DoublyPeriodic/PoissonSlab/BVPPoisson.cuh b/src/Interactor/DoublyPeriodic/PoissonSlab/BVPPoisson.cuh
index 161ff7ab..b1aea296 100644
--- a/src/Interactor/DoublyPeriodic/PoissonSlab/BVPPoisson.cuh
+++ b/src/Interactor/DoublyPeriodic/PoissonSlab/BVPPoisson.cuh
@@ -90,7 +90,7 @@ namespace uammd{
}
class BVPPoissonSlab{
- std::shared_ptr bvpSolver;
+ std::shared_ptr> bvpSolver;
real2 Lxy;
real H;
int3 cellDim;
@@ -150,7 +150,7 @@ namespace uammd{
BoundaryConditionsDispatch(klist, H));
int numberSystems = (nk.x/2+1)*nk.y;
- this->bvpSolver = std::make_shared(klist, topBC, bottomBC, numberSystems, H, cellDim.z);
+ this->bvpSolver = std::make_shared>(klist, topBC, bottomBC, numberSystems, H, cellDim.z);
CudaCheckError();
}
};
diff --git a/src/misc/BoundaryValueProblem/BVPSchurComplementMatrices.cuh b/src/misc/BoundaryValueProblem/BVPSchurComplementMatrices.cuh
index 4611db81..02ddc15c 100644
--- a/src/misc/BoundaryValueProblem/BVPSchurComplementMatrices.cuh
+++ b/src/misc/BoundaryValueProblem/BVPSchurComplementMatrices.cuh
@@ -117,6 +117,7 @@ namespace uammd{
}
};
+ template
class SchurBoundaryCondition{
int nz;
real H;
@@ -125,8 +126,8 @@ namespace uammd{
SchurBoundaryCondition(int nz, real H):nz(nz), H(H), bcs(nz){}
template
- std::vector computeBoundaryConditionMatrix(const TopBC &top, const BottomBC &bottom){
- std::vector CandD(2*nz+4, 0);
+ std::vector computeBoundaryConditionMatrix(const TopBC &top, const BottomBC &bottom){
+ std::vector CandD(2*nz+4, 0);
auto topRow = computeTopRow(top, bottom);
auto bottomRow = computeBottomRow(top, bottom);
std::copy(topRow.begin(), topRow.end()-2, CandD.begin());
@@ -141,8 +142,8 @@ namespace uammd{
private:
template
- std::vector computeTopRow(const TopBC &top, const BottomBC &bottom){
- std::vector topRow(nz+2, 0);
+ std::vector computeTopRow(const TopBC &top, const BottomBC &bottom){
+ std::vector topRow(nz+2, 0);
auto tfi = bcs.topFirstIntegral();
auto tsi = bcs.topSecondIntegral();
auto tfiFactor = top.getFirstIntegralFactor();
@@ -156,8 +157,8 @@ namespace uammd{
}
template
- std::vector computeBottomRow(const TopBC &top, const BottomBC &bottom){
- std::vector bottomRow(nz+2, 0);
+ std::vector computeBottomRow(const TopBC &top, const BottomBC &bottom){
+ std::vector bottomRow(nz+2, 0);
auto bfi = bcs.bottomFirstIntegral();
auto bsi = bcs.bottomSecondIntegral();
auto bfiFactor = bottom.getFirstIntegralFactor();
@@ -172,10 +173,11 @@ namespace uammd{
};
- std::vector computeSecondIntegralMatrix(real k, real H, int nz){
- std::vector A(nz*nz, 0);
+ template
+ std::vector computeSecondIntegralMatrix(T k, real H, int nz){
+ std::vector A(nz*nz, 0);
SecondIntegralMatrix sim(nz);
- real kH2 = k*k*H*H;
+ T kH2 = k*k*H*H;
fori(0, nz){
forj(0,nz){
A[i+nz*j] = (i==j) - kH2*sim.getElement(i, j);
@@ -184,9 +186,10 @@ namespace uammd{
return std::move(A);
}
- std::vector computeInverseSecondIntegralMatrix(real k, real H, int nz){
+ template
+ std::vector computeInverseSecondIntegralMatrix(T k, real H, int nz){
if(k==0){
- std::vector invA(nz*nz, 0);
+ std::vector invA(nz*nz, 0);
fori(0, nz){
invA[i+nz*i] = 1;
}
diff --git a/src/misc/BoundaryValueProblem/BVPSolver.cuh b/src/misc/BoundaryValueProblem/BVPSolver.cuh
index 4e54b094..7959fd9b 100644
--- a/src/misc/BoundaryValueProblem/BVPSolver.cuh
+++ b/src/misc/BoundaryValueProblem/BVPSolver.cuh
@@ -42,39 +42,40 @@ namespace uammd{
namespace BVP{
namespace detail{
+ template
class SubsystemSolver{
int nz;
real H;
- StorageHandle CinvA_storage;
- StorageHandle CinvABmD_storage;
+ StorageHandle CinvA_storage;
+ StorageHandle CinvABmD_storage;
public:
SubsystemSolver(int nz, real H):nz(nz), H(H){}
void registerRequiredStorage(StorageRegistration &memoryManager){
- CinvA_storage = memoryManager.registerStorageRequirement(2*nz+2);
- CinvABmD_storage = memoryManager.registerStorageRequirement(1);
+ CinvA_storage = memoryManager.registerStorageRequirement(2*nz+2);
+ CinvABmD_storage = memoryManager.registerStorageRequirement(4);
}
template
- void precompute(real k, const TopBC &top, const BottomBC &bottom, StorageRetriever &memoryManager){
+ void precompute(U k, const TopBC &top, const BottomBC &bottom, StorageRetriever &memoryManager){
auto CinvA_it = memoryManager.retrieveStorage(CinvA_storage);
auto CinvABmD_it = memoryManager.retrieveStorage(CinvABmD_storage);
auto invA = computeInverseSecondIntegralMatrix(k, H, nz);
- SchurBoundaryCondition bcs(nz, H);
+ SchurBoundaryCondition bcs(nz, H);
auto CandD = bcs.computeBoundaryConditionMatrix(top, bottom);
- real4 D = make_real4(CandD[2*nz], CandD[2*nz+1], CandD[2*nz+2], CandD[2*nz+3]);
+ U D[4] = {CandD[2*nz], CandD[2*nz+1], CandD[2*nz+2], CandD[2*nz+3]};
auto CinvA = matmul(CandD, nz, 2, invA, nz, nz);
std::copy(CinvA.begin(), CinvA.end(), CinvA_it);
- real4 CinvAB;
- real B00 = -k*k*H*H;
- real B11 = -k*k*H*H;
- CinvAB.x = CinvA[0]*B00;
- CinvAB.y = CinvA[1]*B11;
- CinvAB.z = CinvA[0+nz]*B00;
- CinvAB.w = CinvA[1+nz]*B11;
- CinvABmD_it[0] = CinvAB - D;
+ U CinvAB[4];
+ U B00 = -k*k*H*H;
+ U B11 = -k*k*H*H;
+ CinvAB[0] = CinvA[0]*B00;
+ CinvAB[1] = CinvA[1]*B11;
+ CinvAB[2] = CinvA[0+nz]*B00;
+ CinvAB[3] = CinvA[1+nz]*B11;
+ fori(0, 4) CinvABmD_it[i] = CinvAB[i] - D[i];
}
template
@@ -83,7 +84,10 @@ namespace uammd{
StorageRetriever &memoryManager){
const auto CinvA = memoryManager.retrieveStorage(CinvA_storage);
const auto CinvAfmab = computeRightHandSide(alpha, beta, fn, CinvA);
- const real4 CinvABmD = *(memoryManager.retrieveStorage(CinvABmD_storage));
+ const auto CinvABmD_it = memoryManager.retrieveStorage(CinvABmD_storage);
+ U CinvABmD[4] = {CinvABmD_it[0], CinvABmD_it[1],
+ CinvABmD_it[2], CinvABmD_it[3]};
+
const auto c0d0 = solveSubsystem(CinvABmD, CinvAfmab);
return c0d0;
}
@@ -91,8 +95,8 @@ namespace uammd{
private:
template
- __device__ thrust::pair solveSubsystem(real4 CinvABmD, thrust::pair CinvAfmab) const{
- auto c0d0 = solve2x2System(CinvABmD, CinvAfmab);
+ __device__ thrust::pair solveSubsystem(U CinvABmD[4], thrust::pair CinvAfmab) const{
+ auto c0d0 = solve2x2System(CinvABmD, CinvAfmab);
return c0d0;
}
@@ -113,25 +117,33 @@ namespace uammd{
};
+ template
class PentadiagonalSystemSolver{
int nz;
real H;
- KBPENTA_mod pentasolve;
+ KBPENTA_mod pentasolve;
public:
+ static_assert(
+ std::is_same::value || std::is_same>::value ||
+ std::is_same::value || std::is_same>::value,
+ "PentadiagonalSystemSolver is expected to work only with real numbers or thrust::complex<> numbers"
+ );
+
PentadiagonalSystemSolver(int nz, real H):
nz(nz), H(H), pentasolve(nz){}
-
+
+
void registerRequiredStorage(StorageRegistration &memoryManager){
pentasolve.registerRequiredStorage(memoryManager);
}
- void precompute(real k, StorageRetriever &memoryManager){
- real diagonal[nz];
- real diagonal_p2[nz]; diagonal_p2[nz-2] = diagonal_p2[nz-1] = 0;
- real diagonal_m2[nz]; diagonal_m2[0] = diagonal_m2[1] = 0;
+ void precompute(U k, StorageRetriever &memoryManager){
+ U diagonal[nz];
+ U diagonal_p2[nz]; diagonal_p2[nz-2] = diagonal_p2[nz-1] = 0;
+ U diagonal_m2[nz]; diagonal_m2[0] = diagonal_m2[1] = 0;
SecondIntegralMatrix sim(nz);
- const real kH2 = k*k*H*H;
+ const U kH2 = k*k*H*H;
for(int i = 0; i
class BoundaryValueProblemSolver{
- detail::PentadiagonalSystemSolver pent;
- detail::SubsystemSolver sub;
- StorageHandle waveVector;
+ detail::PentadiagonalSystemSolver pent;
+ detail::SubsystemSolver sub;
+ StorageHandle waveVector;
int nz;
real H;
public:
+
+ static_assert(
+ std::is_same::value || std::is_same>::value ||
+ std::is_same::value || std::is_same>::value,
+ "BoundaryValueProblemSolver is expected to work only with real numbers or thrust::complex<> numbers"
+ );
+
BoundaryValueProblemSolver(int nz, real H): nz(nz), H(H), sub(nz, H), pent(nz, H){}
void registerRequiredStorage(StorageRegistration &mem){
- waveVector = mem.registerStorageRequirement(1);
+ waveVector = mem.registerStorageRequirement(1);
sub.registerRequiredStorage(mem);
pent.registerRequiredStorage(mem);
}
template
- void precompute(StorageRetriever &mem, real k, const TopBC &top, const BottomBC &bot){
+ void precompute(StorageRetriever &mem, U k, const TopBC &top, const BottomBC &bot){
auto k_access = mem.retrieveStorage(waveVector);
k_access[0] = k;
pent.precompute(k, mem);
@@ -177,10 +197,10 @@ namespace uammd{
AnIterator& an,
CnIterator& cn,
StorageRetriever &mem){
- const real k = *(mem.retrieveStorage(waveVector));
+ const auto k = *(mem.retrieveStorage(waveVector));
T c0, d0;
thrust::tie(c0, d0) = sub.solve(fn, alpha, beta, mem);
- const real kH2 = k*k*H*H;
+ const auto kH2 = k*k*H*H;
fn[0] += kH2*c0;
fn[1] += kH2*d0;
pent.solve(fn, an, mem);
@@ -201,17 +221,25 @@ namespace uammd{
};
+ template
class BatchedBVPHandler;
+ template
struct BatchedBVPGPUSolver{
private:
int numberSystems;
- BoundaryValueProblemSolver bvpSolver;
+ BoundaryValueProblemSolver bvpSolver;
char* gpuMemory;
- friend class BatchedBVPHandler;
- BatchedBVPGPUSolver(int numberSystems, BoundaryValueProblemSolver bvpSolver, char *raw):
+ friend class BatchedBVPHandler;
+ BatchedBVPGPUSolver(int numberSystems, BoundaryValueProblemSolver bvpSolver, char *raw):
numberSystems(numberSystems), bvpSolver(bvpSolver), gpuMemory(raw){}
public:
+
+ static_assert(
+ std::is_same::value || std::is_same>::value ||
+ std::is_same::value || std::is_same>::value,
+ "BatchedBVPGPUHandler is expected to work only with real numbers or thrust::complex<> numbers"
+ );
template
__device__ void solve(int instance,
@@ -225,12 +253,19 @@ namespace uammd{
};
+ template
class BatchedBVPHandler{
int numberSystems;
- BoundaryValueProblemSolver bvp;
+ BoundaryValueProblemSolver bvp;
thrust::device_vector gpuMemory;
public:
+ static_assert(
+ std::is_same::value || std::is_same>::value ||
+ std::is_same::value || std::is_same>::value,
+ "BatchedBVPHandler is expected to work only with real numbers or thrust::complex<> numbers"
+ );
+
template
BatchedBVPHandler(const WaveVectorIterator &klist,
BatchedTopBC top, BatchedBottomBC bot,
@@ -240,9 +275,9 @@ namespace uammd{
precompute(klist, top, bot);
}
- BatchedBVPGPUSolver getGPUSolver(){
+ BatchedBVPGPUSolver getGPUSolver(){
auto raw = thrust::raw_pointer_cast(gpuMemory.data());
- BatchedBVPGPUSolver d_solver(numberSystems, bvp, raw);
+ BatchedBVPGPUSolver d_solver(numberSystems, bvp, raw);
return d_solver;
}
diff --git a/src/misc/BoundaryValueProblem/KBPENTA.cuh b/src/misc/BoundaryValueProblem/KBPENTA.cuh
index 636b217b..0bd69733 100644
--- a/src/misc/BoundaryValueProblem/KBPENTA.cuh
+++ b/src/misc/BoundaryValueProblem/KBPENTA.cuh
@@ -6,20 +6,27 @@ namespace uammd{
namespace BVP{
//Algorithm adapted from http://dx.doi.org/10.1080/00207160802326507 for a special case of only three diagonals being non zero
+ template
class KBPENTA_mod{
- StorageHandle storageHandle;
+ StorageHandle storageHandle;
int nz;
public:
+ static_assert(
+ std::is_same::value || std::is_same>::value ||
+ std::is_same::value || std::is_same>::value,
+ "KBPENTA_mode is expected to work only with real numbers or thrust::complex<> numbers"
+ );
+
KBPENTA_mod(int nz): nz(nz){}
void registerRequiredStorage(StorageRegistration &memoryManager){
- storageHandle = memoryManager.registerStorageRequirement(3*nz+2);
+ storageHandle = memoryManager.registerStorageRequirement(3*nz+2);
}
- void store(real *diagonal, real *diagonal_p2, real *diagonal_m2, StorageRetriever &memoryManager){
+ void store(U *diagonal, U *diagonal_p2, U *diagonal_m2, StorageRetriever &memoryManager){
auto storage = memoryManager.retrieveStorage(storageHandle);
- std::vector beta(nz+1, 0);
+ std::vector beta(nz+1, 0);
beta[0] = 0;
beta[1] = diagonal[nz-nz];
beta[2] = diagonal[nz-(nz-1)];
diff --git a/src/misc/BoundaryValueProblem/MatrixUtils.h b/src/misc/BoundaryValueProblem/MatrixUtils.h
index 7a1689af..25903c71 100644
--- a/src/misc/BoundaryValueProblem/MatrixUtils.h
+++ b/src/misc/BoundaryValueProblem/MatrixUtils.h
@@ -1,5 +1,5 @@
/*Raul P. Pelaez 2019-2021. Boundary Value Problem Matrix algebra utilities
- */
+*/
#ifndef BVP_MATRIX_UTILS_H
#define BVP_MATRIX_UTILS_H
#include "utils/cufftPrecisionAgnostic.h"
@@ -12,95 +12,129 @@
#else
#include
#endif
-
+#include
namespace uammd{
namespace BVP{
-
+ using complex = thrust::complex;
template struct LapackeUAMMD;
template<>
- struct LapackeUAMMD{
- template static int getrf(T... args){return LAPACKE_sgetrf(args...);}
- template static int getri(T... args){return LAPACKE_sgetri(args...);}
- };
+ struct LapackeUAMMD{
+ template static int getrf(T... args){return LAPACKE_sgetrf(args...);}
+ template static int getri(T... args){return LAPACKE_sgetri(args...);}
+ };
template<>
- struct LapackeUAMMD{
- template static int getrf(T... args){return LAPACKE_dgetrf(args...);}
- template static int getri(T... args){return LAPACKE_dgetri(args...);}
- };
+ struct LapackeUAMMD{
+ template static int getrf(T... args){return LAPACKE_dgetrf(args...);}
+ template static int getri(T... args){return LAPACKE_dgetri(args...);}
+ };
template<>
- struct LapackeUAMMD{
- template static int getrf(T... args){return LAPACKE_cgetrf(args...);}
- template static int getri(T... args){return LAPACKE_cgetri(args...);}
- };
+ struct LapackeUAMMD{
+ template static int getrf(T... args){return LAPACKE_cgetrf(args...);}
+ template static int getri(T... args){return LAPACKE_cgetri(args...);}
+ };
template<>
- struct LapackeUAMMD{
- template static int getrf(T... args){return LAPACKE_zgetrf(args...);}
- template static int getri(T... args){return LAPACKE_zgetri(args...);}
- };
+ struct LapackeUAMMD{
+ template static int getrf(T... args){return LAPACKE_zgetrf(args...);}
+ template static int getri(T... args){return LAPACKE_zgetri(args...);}
+ };
template<>
- struct LapackeUAMMD>{
- static int getrf(int matrix_layout, lapack_int n, lapack_int m,
- cufftComplex_t* a, lapack_int lda,
- lapack_int* ipiv){
- return LAPACKE_cgetrf(matrix_layout, n, m, (lapack_complex_float*)(a), lda, ipiv);
- }
- static int getri(int matrix_layout, lapack_int n,
- cufftComplex_t* a, lapack_int lda,
- lapack_int* ipiv){
- return LAPACKE_cgetri(matrix_layout, n, (lapack_complex_float*)(a), lda, ipiv);
- }
- };
+ struct LapackeUAMMD>{
+ static int getrf(int matrix_layout, lapack_int n, lapack_int m,
+ cufftComplex_t* a, lapack_int lda,
+ lapack_int* ipiv){
+ return LAPACKE_cgetrf(matrix_layout, n, m, (lapack_complex_float*)(a), lda, ipiv);
+ }
+ static int getri(int matrix_layout, lapack_int n,
+ cufftComplex_t* a, lapack_int lda,
+ lapack_int* ipiv){
+ return LAPACKE_cgetri(matrix_layout, n, (lapack_complex_float*)(a), lda, ipiv);
+ }
+ };
template<>
- struct LapackeUAMMD>{
- static int getrf(int matrix_layout, lapack_int n, lapack_int m,
- cufftComplex_t* a, lapack_int lda,
- lapack_int* ipiv){
- return LAPACKE_zgetrf(matrix_layout, n, m, (lapack_complex_double*)(a), lda, ipiv);
- }
- static int getri(int matrix_layout, lapack_int n,
- cufftComplex_t* a, lapack_int lda,
- lapack_int* ipiv){
- return LAPACKE_zgetri(matrix_layout, n, (lapack_complex_double*)(a), lda, ipiv);
- }
- };
+ struct LapackeUAMMD>{
+ static int getrf(int matrix_layout, lapack_int n, lapack_int m,
+ cufftComplex_t* a, lapack_int lda,
+ lapack_int* ipiv){
+ return LAPACKE_zgetrf(matrix_layout, n, m, (lapack_complex_double*)(a), lda, ipiv);
+ }
+ static int getri(int matrix_layout, lapack_int n,
+ cufftComplex_t* a, lapack_int lda,
+ lapack_int* ipiv){
+ return LAPACKE_zgetri(matrix_layout, n, (lapack_complex_double*)(a), lda, ipiv);
+ }
+ };
+ template<>
+ struct LapackeUAMMD>{
+ static int getrf(int matrix_layout, lapack_int n, lapack_int m,
+ thrust::complex* a, lapack_int lda,
+ lapack_int* ipiv){
+ return LAPACKE_cgetrf(matrix_layout, n, m, (lapack_complex_float*)(a), lda, ipiv);
+ }
+ static int getri(int matrix_layout, lapack_int n,
+ thrust::complex* a, lapack_int lda,
+ lapack_int* ipiv){
+ return LAPACKE_cgetri(matrix_layout, n, (lapack_complex_float*)(a), lda, ipiv);
+ }
+ };
+ template<>
+ struct LapackeUAMMD>{
+ static int getrf(int matrix_layout, lapack_int n, lapack_int m,
+ thrust::complex* a, lapack_int lda,
+ lapack_int* ipiv){
+ return LAPACKE_zgetrf(matrix_layout, n, m, (lapack_complex_double*)(a), lda, ipiv);
+ }
+ static int getri(int matrix_layout, lapack_int n,
+ thrust::complex* a, lapack_int lda,
+ lapack_int* ipiv){
+ return LAPACKE_zgetri(matrix_layout, n, (lapack_complex_double*)(a), lda, ipiv);
+ }
+ };
template
- std::vector invertSquareMatrix(const std::vector &A, lapack_int N){
- lapack_int pivotArray[N];
- int errorHandler;
- auto invA = A;
- errorHandler = LapackeUAMMD::getrf(LAPACK_ROW_MAJOR, N, N, invA.data(), N, pivotArray);
- if(errorHandler){
- throw std::runtime_error("Lapacke getrf failed with error code: " + std::to_string(errorHandler));
+ std::vector invertSquareMatrix(const std::vector &A, lapack_int N){
+ lapack_int pivotArray[N];
+ int errorHandler;
+ auto invA = A;
+ errorHandler = LapackeUAMMD::getrf(LAPACK_ROW_MAJOR, N, N, invA.data(), N, pivotArray);
+ if(errorHandler){
+ throw std::runtime_error("Lapacke getrf failed with error code: " + std::to_string(errorHandler));
+ }
+ errorHandler = LapackeUAMMD::getri(LAPACK_ROW_MAJOR, N, invA.data(), N, pivotArray);
+ if(errorHandler){
+ throw std::runtime_error("Lapacke getri failed with error code: " + std::to_string(errorHandler));
+ }
+ return invA;
}
- errorHandler = LapackeUAMMD::getri(LAPACK_ROW_MAJOR, N, invA.data(), N, pivotArray);
- if(errorHandler){
- throw std::runtime_error("Lapacke getri failed with error code: " + std::to_string(errorHandler));
- }
- return invA;
- }
template
- std::vector matmul(const T &A, int ncol_a, int nrow_a,
- const T2 &B, int ncol_b, int nrow_b){
- std::vector C;
- C.resize(ncol_b*nrow_a);
- for(int i = 0; i C;
+ C.resize(ncol_b*nrow_a);
+ for(int i = 0; i
+ __device__ thrust::pair solve2x2System(thrust::complex A[4], thrust::pair b){
+ const auto det = A[0]*A[3] - A[1]*A[2];
+ const T c0 = (b.first*A[3] - A[1]*b.second)/det;
+ const T d0 = (b.second*A[0] - b.first*A[2])/det;
+ return thrust::make_pair(c0, d0);
+ }
+
template
__device__ thrust::pair solve2x2System(real4 A, thrust::pair b){
const real det = A.x*A.w - A.y*A.z;
@@ -109,6 +143,11 @@ namespace uammd{
return thrust::make_pair(c0, d0);
}
+ template
+ __device__ thrust::pair solve2x2System(real A[4], thrust::pair b){
+ real4 A_r4 = {A[0], A[1], A[2], A[3]};
+ return solve2x2System(A_r4, b);
+ }
}
}
#endif
diff --git a/src/utils/atomics.cuh b/src/utils/atomics.cuh
index fc592524..6797debd 100644
--- a/src/utils/atomics.cuh
+++ b/src/utils/atomics.cuh
@@ -7,8 +7,6 @@ namespace uammd{
template
inline __device__ T atomicAdd(T* address, T val){ return ::atomicAdd(address, val);}
-
-#ifndef SINGLE_PRECISION
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 600
inline __device__ double atomicAdd(double* address, double val){
unsigned long long int* address_as_ull =
@@ -23,7 +21,20 @@ namespace uammd{
return __longlong_as_double(old);
}
#endif
-#endif
+
+ inline __device__ float2 atomicAdd(float2* address, float2 val){
+ float2 newval;
+ if(val.x) newval.x = atomicAdd(&(*address).x, val.x);
+ if(val.y) newval.y = atomicAdd(&(*address).y, val.y);
+ return newval;
+ }
+
+ inline __device__ double2 atomicAdd(double2* address, double2 val){
+ double2 newval;
+ if(val.x) newval.x = atomicAdd(&(*address).x, val.x);
+ if(val.y) newval.y = atomicAdd(&(*address).y, val.y);
+ return newval;
+ }
inline __device__ real4 atomicAdd(real4* address, real4 val){
real4 newval;
@@ -41,21 +52,12 @@ namespace uammd{
if(val.z) newval.z = atomicAdd(&(*address).z, val.z);
return newval;
}
-
- inline __device__ real2 atomicAdd(real2* address, real2 val){
- real2 newval;
- if(val.x) newval.x = atomicAdd(&(*address).x, val.x);
- if(val.y) newval.y = atomicAdd(&(*address).y, val.y);
- return newval;
- }
-
-
+
template
inline __device__ T2 atomicAdd(T &ref, T2 val){
return atomicAdd(&ref, val);
}
-
template
inline __device__ T2 atomicAdd(thrust::tuple &&refs, T2 val){
T2 newval;
diff --git a/src/utils/vector.cuh b/src/utils/vector.cuh
index df4ed98c..43a4330f 100644
--- a/src/utils/vector.cuh
+++ b/src/utils/vector.cuh
@@ -976,4 +976,339 @@ namespace uammd{
}
}
+namespace uammd{
+////////////////////////////////////VEC2///////////////////////////////////////
+ template
+ struct vec2{
+ T x,y;
+ };
+
+ template
+ vec2 make_vec2(T x, T y){ return {x, y};}
+
+ template
+ VECATTR vec2 operator +(const vec2 &a, const vec2 &b){
+ return {a.x + b.x, a.y + b.y};
+ }
+
+ template
+ VECATTR vec2 operator +(const vec2 &a, const T &b){
+ return {a.x + b, a.y + b};
+ }
+
+ template
+ VECATTR vec2 operator -(const vec2 &a, const vec2 &b){
+ return {a.x - b.x, a.y - b.y};
+ }
+
+
+ template
+ VECATTR vec2 operator -(const vec2 &a, const T &b){
+ return {a.x - b, a.y - b};
+ }
+
+ template
+ VECATTR vec2 operator -(const T &b, const vec2 &a){
+ return {b-a.x, b-a.y};
+ }
+
+
+ template
+ VECATTR vec2 operator *(const vec2 &a, const vec2 &b){
+ return {a.x * b.x, a.y * b.y};
+ }
+
+
+ template
+ VECATTR vec2 operator *(const vec2 &a, const T &b){
+ return {a.x * b, a.y * b};
+ }
+
+ template
+ VECATTR vec2 operator /(const vec2 &a, const vec2 &b){
+ return {a.x / b.x, a.y / b.y};
+ }
+
+ template
+ VECATTR vec2 operator /(const vec2 &a, const T &b){
+ return {a.x/b, a.y/b};
+ }
+
+
+ template
+ VECATTR vec2 operator /(const T &b, const vec2 &a){
+ return {b / a.x, b / a.y};
+ }
+
+ template
+ VECATTR void operator +=(vec2 &a, const vec2 &b){
+ a = a + b;
+ }
+
+ template
+ VECATTR vec2 operator +(const T &b, const vec2 &a){return a+b;}
+
+ template
+ VECATTR void operator +=(vec2 &a, const T &b){
+ a = a+b;
+ }
+
+ template
+ VECATTR void operator -=(vec2 &a, const vec2 &b){
+ a = a-b;
+ }
+ template
+ VECATTR void operator -=(vec2 &a, const T &b){
+ a=a-b;
+ }
+ template
+ VECATTR void operator *=(vec2 &a, const vec2 &b){
+ a = a*b;
+ }
+
+ template
+ VECATTR vec2 operator *(const T &b, const vec2 &a){
+ return a*b;
+ }
+
+ template
+ VECATTR void operator *=(vec2 &a, const T &b){
+ a=a*b;
+ }
+
+ template
+ VECATTR void operator /=(vec2 &a, const vec2 &b){
+ a = a/b;
+ }
+
+ template
+ VECATTR void operator /=(vec2 &a, const T &b){
+ a = a/b;
+ }
+
+////////////////////////////////////VEC3///////////////////////////////////////
+ template
+ struct vec3{
+ T x,y,z;
+ };
+
+ template
+ vec3 make_vec3(T x, T y, T z){ return {x, y, z};}
+
+ template
+ VECATTR vec3 operator +(const vec3 &a, const vec3 &b){
+ return {a.x + b.x, a.y + b.y, a.z + b.z};
+ }
+
+ template
+ VECATTR vec3 operator +(const vec3 &a, const T &b){
+ return {a.x + b, a.y + b, a.z + b};
+ }
+
+ template
+ VECATTR vec3