diff --git a/docs/BoundaryValueProblemSolver.rst b/docs/BoundaryValueProblemSolver.rst index 04867405..d2f95bf4 100644 --- a/docs/BoundaryValueProblemSolver.rst +++ b/docs/BoundaryValueProblemSolver.rst @@ -19,8 +19,7 @@ With the boundary conditions: Being :math:`\alpha_n,\beta_n` some arbitrary parameters. These parameters can take any value (including zero) as long as the resulting BVP remains well defined. For instance, making :math:`\alpha_1,\beta_1` equal to zero at the same time results in a system with no unique solution. -Both :math:`y(z)` and :math:`f(z)` can be complex-valued. In the main branches of the UAMMD repository only :math:`\alpha_2,\beta_2` can be complex numbers (the rest of the parameters, including :math:`k`, must be real valued). The branch complex_bvp contains an adaptation of the solver that allows every parameter to be complex. As a side note, should the need arise in the future, it would be fairly easy to modify the solver as to make the type of the parameters a template argument. - +Both :math:`y(z)` and :math:`f(z)` can be real or complex-valued as well as :math:`\alpha_2,\beta_2,k` and any other parameter. .. hint:: Initialization of the batched BVP solver requires inverting a dense matrix for each value of :math:`k`, which can become quite expensive. The solver tries to mitigate this cost by inverting these matrices in parallel, but experience suggests that letting it use more than 4 cores is counter-productive. @@ -37,10 +36,9 @@ Usage ------ Every function and class in the BVP solver source code lies under the :cpp:`uammd::BVP` namespace. -The BVP solver library exposes two main classes: - +The BVP solver library exposes two main template classes , designed to manage both real and complex-valued numbers. To ensure consistent behavior and avoid unexpected results, it is recommended to use the complex number types defined in the Thrust library, specifically thrust::complex, when working with complex numbers. -.. cpp:class:: BatchedBVPHandler +.. cpp:class:: template BatchedBVPHandler Used to initialize and hold the information for a group of boundary value problems. Each subproblem can have different parameters (mainly :math:`\alpha_n,\beta_n,k` in the above equations. This class cannot be used in device code, its only used for initialization. See :cpp:any:`BatchedBVPGPUSolver`. @@ -56,14 +54,14 @@ The BVP solver library exposes two main classes: :param H: The location of the boundary conditions (goes from -H to H). :param nz: Number of elements of a subproblem (all subproblems must have the same size). - .. cpp:function:: BatchedBVPGPUSolver getGPUSolver(); + .. cpp:function:: template BatchedBVPGPUSolver_impl getGPUSolver(); Provides an instance of the solver to be used in the GPU. -.. cpp:class:: BatchedBVPGPUSolver +.. cpp:class:: template BatchedBVPGPUSolver - While :cpp:any:`BatchedBVPHandler` is used to initialize and store the different subproblems, this class is used to actually solve the subproblems in a CUDA kernel. - This class has no public constructors, the only way to get an instance to it is via :cpp:any:`BatchedBVPHandler::getGPUSolver`. + While :cpp:any:`BatchedBVPHandler` is used to initialize and store the different subproblems, this class is used to actually solve the subproblems in a CUDA kernel. + This class has no public constructors, the only way to get an instance to it is via :cpp:any:`BatchedBVPHandler::getGPUSolver`. .. cpp:function:: template __device__ void solve(int instance, const FnIterator& fn, T alpha_2, T beta_2, AnIterator& an, CnIterator& cn); @@ -93,6 +91,20 @@ Initialization requires an iterator to a special type of functor that provides t Returns :math:`\alpha_1` or :math:`\beta_1`, depending on which BC this class represents. +Aliases for Real and Complex Types +---------------------------------- + +To facilitate the use of the BVP solver with real and complex numbers, the following aliases are defined: + +.. code-block:: cpp + + using BatchedBVPHandlerReal = BatchedBVPHandler_impl; + using BatchedBVPHandlerrComplex = BatchedBVPHandler_impl>; + + using BatchedBVPGPUSolverReal = BatchedBVPGPUSolver_impl; + using BatchedBVPGPUSolverComplex = BatchedBVPGPUSolver_impl>; + +These aliases allow for a more intuitive and type-safe way to work with the BVP solver for different numerical types. Example ++++++++ diff --git a/src/Integrator/BDHI/BDHI_FCM.cu b/src/Integrator/BDHI/BDHI_FCM.cu index 61287b01..4d2e6a14 100644 --- a/src/Integrator/BDHI/BDHI_FCM.cu +++ b/src/Integrator/BDHI/BDHI_FCM.cu @@ -4,6 +4,7 @@ #include"BDHI_FCM.cuh" namespace uammd{ namespace BDHI{ + inline auto FCMIntegrator::computeHydrodynamicDisplacements(){ auto pos = pd->getPos(access::location::gpu, access::mode::read); auto force = pd->getForce(access::location::gpu, access::mode::read); @@ -15,6 +16,7 @@ namespace uammd{ temperature, 1.0/sqrt(dt), st); } + inline void FCMIntegrator::updateInteractors(){ for(auto forceComp: interactors) forceComp->updateSimulationTime(steps*dt); if(steps==1){ @@ -26,6 +28,7 @@ namespace uammd{ } } + inline void FCMIntegrator::resetForces(){ int numberParticles = pg->getNumberParticles(); auto force = pd->getForce(access::location::gpu, access::mode::write); @@ -34,6 +37,7 @@ namespace uammd{ CudaCheckError(); } + inline void FCMIntegrator::resetTorques(){ int numberParticles = pg->getNumberParticles(); auto torque = pd->getTorque(access::location::gpu, access::mode::write); @@ -42,6 +46,7 @@ namespace uammd{ CudaCheckError(); } + inline void FCMIntegrator::computeCurrentForces(){ resetForces(); if (pd->isDirAllocated()) resetTorques(); @@ -56,6 +61,7 @@ namespace uammd{ /*With all the terms computed, update the positions*/ /*T=0 case is templated*/ template + inline __global__ void integrateEulerMaruyamaD(real4* pos, real4* dir, IndexIterator indexIterator, @@ -75,7 +81,7 @@ namespace uammd{ /*Write to global memory*/ pos[i] = make_real4(p,c); /*Update the orientation*/ - if(dir){ + if(dir){ Quat dirc = dir[i]; //printf("W %f %f %f\n", angularV[id].x, angularV[id].y, angularV[id].z); //printf("V %f %f %f\n", linearV[id].x, linearV[id].y, linearV[id].z); @@ -86,6 +92,7 @@ namespace uammd{ } } + inline void FCMIntegrator::forwardTime(){ steps++; sys->log("[BDHI::FCM] Performing integration step %d", steps); @@ -97,12 +104,12 @@ namespace uammd{ auto angularVelocities = disp.second; auto indexIter = pg->getIndexIterator(access::location::gpu); auto pos = pd->getPos(access::location::gpu, access::mode::readwrite); - auto dir = pd->getDirIfAllocated(access::location::gpu, access::mode::readwrite); + auto dir = pd->getDirIfAllocated(access::location::gpu, access::mode::readwrite); real3* d_linearV = thrust::raw_pointer_cast(linearVelocities.data()); real3* d_angularV = dir.raw()?thrust::raw_pointer_cast(angularVelocities.data()):nullptr; int BLOCKSIZE = 128; /*threads per block*/ int nthreads = BLOCKSIZE>>(pos.raw(), dir.raw(), indexIter, diff --git a/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh b/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh index a02a22b4..58aa5dd7 100644 --- a/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh +++ b/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh @@ -169,7 +169,7 @@ else throw std::runtime_error("[DPStokesSlab] Can only average in direction X (0 real gw; real tolerance; WallMode mode; - shared_ptr bvpSolver; + shared_ptr bvpSolver; }; diff --git a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/Correction.cuh b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/Correction.cuh index f6e5f6cf..a0a2a07d 100644 --- a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/Correction.cuh +++ b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/Correction.cuh @@ -506,7 +506,7 @@ namespace uammd{ } auto computeZeroModeBoundaryConditions(int nz, real H, WallMode mode){ - BVP::SchurBoundaryCondition bcs(nz, H); + BVP::SchurBoundaryConditionReal bcs(nz, H); if(mode == WallMode::bottom){ correction_ns::TopBoundaryConditions top(0, H); correction_ns::BottomBoundaryConditions bot(0, H); diff --git a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/initialization.cu b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/initialization.cu index d89ed298..253a9ba0 100644 --- a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/initialization.cu +++ b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/initialization.cu @@ -144,7 +144,7 @@ namespace uammd{ auto botBC = thrust::make_transform_iterator(thrust::make_counting_iterator(0), botdispatch); int numberSystems = (nk.x/2+1)*nk.y; int nz = grid.cellDim.z; - this->bvpSolver = std::make_shared(klist, topBC, botBC, numberSystems, halfH, nz); + this->bvpSolver = std::make_shared(klist, topBC, botBC, numberSystems, halfH, nz); CudaCheckError(); } diff --git a/src/Integrator/BDHI/FCM/FCM_impl.cuh b/src/Integrator/BDHI/FCM/FCM_impl.cuh index 989b0ba3..36fd9890 100644 --- a/src/Integrator/BDHI/FCM/FCM_impl.cuh +++ b/src/Integrator/BDHI/FCM/FCM_impl.cuh @@ -224,6 +224,7 @@ namespace uammd{ }; template + inline cached_vector spreadForces(IterPos& pos, IterForce& force, int numberParticles, std::shared_ptr kernel, @@ -248,6 +249,7 @@ namespace uammd{ }; template + inline auto getCoordinateVector(Container &v, int coord){ cached_vector v_a(v.size()); T* ptr= (T*)thrust::raw_pointer_cast(v.data()); @@ -264,7 +266,9 @@ namespace uammd{ return {thrust::get<0>(a), thrust::get<1>(a),thrust::get<2>(a)}; } }; + template + inline auto interleave(Container &a, Container &b, Container &c){ auto zip = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin(), c.begin())); cached_vector res(a.size()); @@ -272,6 +276,7 @@ namespace uammd{ return res; } + inline cached_vector forwardTransform(cached_vector& gridReal, int3 n, cufftHandle plan, cudaStream_t st){ @@ -286,6 +291,7 @@ namespace uammd{ } + inline __global__ void addTorqueCurl(complex3 *gridTorquesFourier, complex3* gridVelsFourier, Grid grid){ int id = blockDim.x*blockIdx.x + threadIdx.x; const int3 nk = grid.cellDim; @@ -307,6 +313,7 @@ namespace uammd{ template + inline void addSpreadTorquesFourier(IterPos& pos, IterTorque& torque, int numberParticles, Grid grid, std::shared_ptr kernel, @@ -339,6 +346,7 @@ namespace uammd{ Output:gridVels = B·FFTf·S·F -> B \propto (I-k^k/|k|^2) */ /*A thread per fourier node*/ + inline __global__ void forceFourier2Vel(const complex3 * gridForces, /*Input array*/ complex3 * gridVels, /*Output array, can be the same as input*/ real vis, @@ -361,6 +369,7 @@ namespace uammd{ gridVels[id] = projectFourier(k2, dk, factor)*(B/real(ncells.x*ncells.y*ncells.z)); } + inline void convolveFourier(cached_vector& gridVelsFourier, real viscosity, Grid grid, cudaStream_t st){ System::log("[BDHI::FCM] Wave space velocity scaling"); /*Scale the wave space grid forces, transforming in velocities -> B·FFT·S·F*/ @@ -379,6 +388,7 @@ namespace uammd{ This kernel gets v_k = gridVelsFourier = B·FFtt·S·F as input and adds 1/√σ·√B(k)·dWw. Keeping special care that v_k = v*_{N-k}, which implies that dWw_k = dWw*_{N-k} */ + inline __global__ void fourierBrownianNoise(complex3 * gridVelsFourier, Grid grid, real prefactor,/* sqrt(2·T/dt)*/ @@ -451,6 +461,7 @@ namespace uammd{ } } + inline void addBrownianNoise(cached_vector& gridVelsFourier, real temperature, real viscosity, real prefactor, uint seed, @@ -487,6 +498,7 @@ namespace uammd{ } } + inline cached_vector inverseTransform(cached_vector& gridFourier, int3 n, cufftHandle plan, cudaStream_t st){ int nx = 2*(n.x/2+1); @@ -501,6 +513,7 @@ namespace uammd{ } template + inline cached_vector interpolateVelocity(IterPos& pos, cached_vector& gridVels, Grid grid, std::shared_ptr kernel, int numberParticles, cudaStream_t st){ @@ -524,6 +537,7 @@ namespace uammd{ // = 0.5( i*k_y*V_z - i*k_z(V_y), i*k_z(V_x) - i*k_x*V_z, i*k_x*V_y - i*k_y*V_x) //Overwrite the output vector with the angular velocities in Fourier space //The input velocity vector is overwritten + inline __global__ void computeVelocityCurlFourier(const complex3 *gridVelsFourier, complex3* gridAngVelsFourier, Grid grid){ @@ -549,6 +563,7 @@ namespace uammd{ gridAngVelsFourier[id] = gridAng; } + inline cached_vector computeGridAngularVelocityFourier(cached_vector& gridVelsFourier, Grid grid, cudaStream_t st){ const int3 n = grid.cellDim; @@ -565,6 +580,7 @@ namespace uammd{ } template + inline cached_vector interpolateAngularVelocity(IterPos& pos, cached_vector& gridAngVels, Grid grid, @@ -584,6 +600,7 @@ namespace uammd{ } template + inline std::pair, cached_vector> FCM_impl::computeHydrodynamicDisplacements(real4* pos, real4* force, real4* torque, diff --git a/src/Integrator/BDHI/FCM/utils.cuh b/src/Integrator/BDHI/FCM/utils.cuh index c3d628ab..00cddd2d 100644 --- a/src/Integrator/BDHI/FCM/utils.cuh +++ b/src/Integrator/BDHI/FCM/utils.cuh @@ -22,6 +22,7 @@ namespace uammd{ using complex = cufftComplex_t; using complex3 = cufftComplex3_t; + inline __device__ int3 indexToWaveNumber(int i, int3 nk){ int ikx = i%(nk.x/2+1); int iky = (i/(nk.x/2+1))%nk.y; @@ -32,10 +33,12 @@ namespace uammd{ return make_int3(ikx, iky, ikz); } + inline __device__ real3 waveNumberToWaveVector(int3 ik, real3 L){ return (real(2.0)*real(M_PI)/L)*make_real3(ik.x, ik.y, ik.z); } + inline __device__ real3 getGradientFourier(int3 ik, int3 nk, real3 L){ const bool isUnpairedX = ik.x == (nk.x - ik.x); const bool isUnpairedY = ik.y == (nk.y - ik.y); @@ -55,6 +58,7 @@ namespace uammd{ unpaired modes set to zero fr is the factor to project */ + inline __device__ real3 projectFourier(real k2, real3 dk, real3 fr){ const real invk2 = real(1.0)/k2; real3 vr = fr - dk*dot(fr, dk*invk2); @@ -68,6 +72,7 @@ namespace uammd{ unpaired modes set to zero fr is the factor to project */ + inline __device__ complex3 projectFourier(real k2, real3 dk, complex3 factor){ real3 re = projectFourier(k2, dk, make_real3(factor.x.x, factor.y.x, factor.z.x)); real3 imag = projectFourier(k2, dk, make_real3(factor.x.y, factor.y.y, factor.z.y)); @@ -77,6 +82,7 @@ namespace uammd{ /*Compute gaussian complex noise dW, std = prefactor -> ||z||^2 = /sqrt(2)+/sqrt(2) = prefactor*/ /*A complex random number for each direction*/ + inline __device__ complex3 generateNoise(real prefactor, uint id, uint seed1, uint seed2){ //Uncomment to use uniform numbers instead of gaussian Saru saru(id, seed1, seed2); complex3 noise; @@ -91,6 +97,7 @@ namespace uammd{ return noise; } + inline __device__ bool isNyquistWaveNumber(int3 cell, int3 ncells){ /*Beware of nyquist points! They only appear with even cell dimensions There are 8 nyquist points at most (cell=0,0,0 is excluded at the start of the kernel) diff --git a/src/Integrator/BDHI/PSE/initialization.cu b/src/Integrator/BDHI/PSE/initialization.cu index 06f0dd8e..883674a5 100644 --- a/src/Integrator/BDHI/PSE/initialization.cu +++ b/src/Integrator/BDHI/PSE/initialization.cu @@ -6,7 +6,7 @@ Initialization #include"Integrator/BDHI/BDHI_PSE.cuh" namespace uammd{ namespace BDHI{ - namespace pse_ns{ + namespace pse_ns{ void checkInputValidity(BDHI::PSE::Parameters par){ real3 L = par.box.boxSize; if(L.x == real(0.0) && L.y == real(0.0) && L.z == real(0.0)){ @@ -16,10 +16,10 @@ namespace uammd{ if(L.x != L.y || L.y != L.z || L.x != L.z){ System::log("[BDHI::PSE] Non cubic boxes are not really tested!"); } - - + + } - + long double computeSelfMobility(PSE::Parameters par){ //O(a^8) accuracy. See Hashimoto 1959. //With a Gaussian this expression has a minimum deviation from measuraments of 7e-7*rh at L=64*rh. @@ -34,11 +34,12 @@ namespace uammd{ long double a6pref = 16.0l*M_PIl*M_PIl/45.0l + 630.0L*b*b; return 1.0l/(6.0l*M_PIl*par.viscosity*rh)*(1.0l-c*a+(4.0l/3.0l)*M_PIl*a3-a6pref*a3*a3); } - + } - - PSE::PSE(shared_ptr pg, Parameters par): - pg(pg), hydrodynamicRadius(par.hydrodynamicRadius){ + +PSE::PSE(shared_ptr pg, Parameters par): + pg(pg), hydrodynamicRadius(par.hydrodynamicRadius), + temperature(par.temperature), dt(par.dt){ System::log("[BDHI::PSE] Initialized"); this->M0 = pse_ns::computeSelfMobility(par); System::log("[BDHI::PSE] Self mobility: %f", M0); diff --git a/src/Integrator/VerletNVE.cu b/src/Integrator/VerletNVE.cu index 8689e602..698eb7fe 100644 --- a/src/Integrator/VerletNVE.cu +++ b/src/Integrator/VerletNVE.cu @@ -30,6 +30,7 @@ namespace uammd{ + inline VerletNVE::VerletNVE(shared_ptr pg, VerletNVE::Parameters par): Integrator(pg, "VerletNVE"), dt(par.dt), energy(par.energy), is2D(par.is2D), @@ -56,6 +57,7 @@ namespace uammd{ CudaSafeCall(cudaStreamCreate(&stream)); } + inline VerletNVE::~VerletNVE(){ cudaStreamDestroy(stream); } @@ -88,6 +90,7 @@ namespace uammd{ } } + inline void VerletNVE::initializeVelocities(){ //In the first step, compute energy in the system //in order to adapt the initial kinetic energy to match the input total energy @@ -131,6 +134,7 @@ namespace uammd{ } template + inline void VerletNVE::callIntegrate(){ int numberParticles = pg->getNumberParticles(); int Nthreads = 128; @@ -152,6 +156,7 @@ namespace uammd{ numberParticles, dt, is2D); } + inline void VerletNVE::resetForces(){ int numberParticles = pg->getNumberParticles(); auto force = pd->getForce(access::location::gpu, access::mode::write); @@ -159,6 +164,7 @@ namespace uammd{ thrust::fill(thrust::cuda::par.on(stream), force_gr, force_gr + numberParticles, real4()); } + inline void VerletNVE::firstStepPreparation(){ if(initVelocities){ initializeVelocities(); @@ -173,6 +179,7 @@ namespace uammd{ } //Move the particles in my group 1 dt in time. + inline void VerletNVE::forwardTime(){ steps++; sys->log("[VerletNVE] Performing integration step %d", steps); @@ -191,6 +198,7 @@ namespace uammd{ namespace verletnve_ns{ template + inline __global__ void sumKineticEnergy(const VelocityIterator vel, EnergyIterator energy, const MassIterator mass, @@ -203,6 +211,7 @@ namespace uammd{ }; + inline real VerletNVE::sumEnergy(){ int numberParticles = pg->getNumberParticles(); auto vel_gr = pg->getPropertyIterator(pd->getVel(access::location::gpu, access::mode::read)); diff --git a/src/Interactor/DoublyPeriodic/PoissonSlab/BVPPoisson.cuh b/src/Interactor/DoublyPeriodic/PoissonSlab/BVPPoisson.cuh index 161ff7ab..42c3e2d9 100644 --- a/src/Interactor/DoublyPeriodic/PoissonSlab/BVPPoisson.cuh +++ b/src/Interactor/DoublyPeriodic/PoissonSlab/BVPPoisson.cuh @@ -90,7 +90,7 @@ namespace uammd{ } class BVPPoissonSlab{ - std::shared_ptr bvpSolver; + std::shared_ptr bvpSolver; real2 Lxy; real H; int3 cellDim; @@ -150,7 +150,7 @@ namespace uammd{ BoundaryConditionsDispatch(klist, H)); int numberSystems = (nk.x/2+1)*nk.y; - this->bvpSolver = std::make_shared(klist, topBC, bottomBC, numberSystems, H, cellDim.z); + this->bvpSolver = std::make_shared(klist, topBC, bottomBC, numberSystems, H, cellDim.z); CudaCheckError(); } }; diff --git a/src/Interactor/NeighbourList/BasicList/BasicListBase.cuh b/src/Interactor/NeighbourList/BasicList/BasicListBase.cuh index 568213a7..71e6426a 100644 --- a/src/Interactor/NeighbourList/BasicList/BasicListBase.cuh +++ b/src/Interactor/NeighbourList/BasicList/BasicListBase.cuh @@ -34,6 +34,7 @@ This class maintains a CellList and uses it to generate a neighbour list. It sho namespace uammd{ namespace BasicNeighbourList_ns{ + inline __global__ void fillBasicNeighbourList(CellList_ns::NeighbourContainer ni, int *neighbourList, int* numberNeighbours, int maxNeighboursPerParticle, diff --git a/src/Interactor/Potential/DPD.cuh b/src/Interactor/Potential/DPD.cuh index 6a8f544c..7860ec76 100644 --- a/src/Interactor/Potential/DPD.cuh +++ b/src/Interactor/Potential/DPD.cuh @@ -168,11 +168,13 @@ namespace uammd{ }; template<> + inline void DPD_impl::printGamma(){ System::log("[Potential::DPD] gamma: %f", gamma.gamma); } template + inline void DPD_impl::printGamma(){ System::log("[Potential::DPD] Using %s for dissipation", type_name().c_str()); } diff --git a/src/ParticleData/ParticleData.cuh b/src/ParticleData/ParticleData.cuh index 2bbe2ccd..33630fee 100644 --- a/src/ParticleData/ParticleData.cuh +++ b/src/ParticleData/ParticleData.cuh @@ -316,7 +316,7 @@ namespace uammd{ #define INIT_PROPERTIES_T(NAME, name) , name(BOOST_PP_STRINGIZE(NAME), sys) #define INIT_PROPERTIES(r,data, tuple) INIT_PROPERTIES_T(PROPNAME_CAPS(tuple), PROPNAME(tuple)) - ParticleData::ParticleData(int numberParticles, shared_ptr sys): + inline ParticleData::ParticleData(int numberParticles, shared_ptr sys): numberParticles(numberParticles), originalOrderIndexCPUNeedsUpdate(true), sys(sys) @@ -334,7 +334,7 @@ namespace uammd{ } //Sort the particles to improve data locality - void ParticleData::sortParticles(cudaStream_t st=0){ + inline void ParticleData::sortParticles(cudaStream_t st=0){ sys->log("[ParticleData] Sorting particles..."); { @@ -364,7 +364,7 @@ namespace uammd{ - void ParticleData::changeNumParticles(int Nnew){ + inline void ParticleData::changeNumParticles(int Nnew){ sys->log("[ParticleData] CHANGE PARTICLES FUNCTIONALITY NOT IMPLEMENTED YET!!!"); sys->log("[ParticleData] Adding/Removing particles..."); this->numberParticles = Nnew; diff --git a/src/ParticleData/ParticleGroup.cuh b/src/ParticleData/ParticleGroup.cuh index 0318a696..f246e1f6 100644 --- a/src/ParticleData/ParticleGroup.cuh +++ b/src/ParticleData/ParticleGroup.cuh @@ -135,7 +135,7 @@ namespace uammd{ namespace ParticleGroup_ns{ //Updates the indices of the particles in a group using pd->getIdOrderedIndices() - __global__ void updateGroupIndices(//An array that stores the indices of the particles in the group per id. + inline __global__ void updateGroupIndices(//An array that stores the indices of the particles in the group per id. const int * __restrict__ id2index, //Out: the current ParticleData indices of the particles in the group int * __restrict__ particlesIndices, @@ -372,7 +372,7 @@ namespace uammd{ }; template - ParticleGroup::ParticleGroup(ParticleSelector selector, + inline ParticleGroup::ParticleGroup(ParticleSelector selector, std::shared_ptr pd, std::string name): pd(pd), sys(pd->getSystem()), name(name){ sys->log("[ParticleGroup] Group %s created with selector %s", @@ -398,7 +398,7 @@ namespace uammd{ //Specialization of a particle group with an All selector template<> - ParticleGroup::ParticleGroup(particle_selector::All selector, + inline ParticleGroup::ParticleGroup(particle_selector::All selector, std::shared_ptr pd, std::string name): pd(pd), sys(pd->getSystem()), name(name){ @@ -411,13 +411,13 @@ namespace uammd{ name.c_str(), numberParticles); } - ParticleGroup::ParticleGroup(std::shared_ptr pd, + inline ParticleGroup::ParticleGroup(std::shared_ptr pd, std::string name): ParticleGroup(particle_selector::All(), pd, name){} //Specialization of an empty particle group template<> - ParticleGroup::ParticleGroup(particle_selector::None selector, + inline ParticleGroup::ParticleGroup(particle_selector::None selector, std::shared_ptr pd, std::string name): pd(pd), sys(pd->getSystem()), name(name){ @@ -430,7 +430,7 @@ namespace uammd{ //Constructor of ParticleGroup when an ID list is provided template - ParticleGroup::ParticleGroup(InputIterator begin, InputIterator end, + inline ParticleGroup::ParticleGroup(InputIterator begin, InputIterator end, std::shared_ptr pd, std::string name): pd(pd), sys(pd->getSystem()), name(name){ @@ -455,7 +455,7 @@ namespace uammd{ //This is trivial with pd->getIdOrderedIndices()! //Handle a reordering of the particles (which invalids the previous relation between IDs and indices) - void ParticleGroup::computeIndexList(bool forceUpdate){ + inline void ParticleGroup::computeIndexList(bool forceUpdate){ if(numberParticles==0) return; if(this->needsIndexListUpdate || forceUpdate){//Update only if needed sys->log("[ParticleGroup] Updating group %s after last particle sorting", name.c_str()); @@ -474,7 +474,7 @@ namespace uammd{ } } //Add particles to the group via an array with ids - void ParticleGroup::addParticlesById(access::location loc, const int *ids, int N){ + inline void ParticleGroup::addParticlesById(access::location loc, const int *ids, int N){ sys->log("[ParticleGroup] Adding %d particles to group %s via ids!", N, name.c_str()); int numberParticlesPrev = numberParticles; numberParticles += N; @@ -511,7 +511,8 @@ namespace uammd{ } namespace ParticleGroup_ns{ - __global__ void IdsFromIndices(const int *indices, const int *index2Id, int* groupParticleIds, int N){ + + inline __global__ void IdsFromIndices(const int *indices, const int *index2Id, int* groupParticleIds, int N){ int tid = blockIdx.x*blockDim.x + threadIdx.x; if(tid>=N) return; int index = indices[tid]; @@ -521,7 +522,7 @@ namespace uammd{ } //Add particles to the group via an array with the current indices of the particles in pd (faster) - void ParticleGroup::addParticlesByCurrentIndex(access::location loc, const int *indices, int N){ + inline void ParticleGroup::addParticlesByCurrentIndex(access::location loc, const int *indices, int N){ sys->log("[ParticleGroup] Adding %d particles to group %s via indices!", N, name.c_str()); if(N==0) return; int numberParticlesPrev = numberParticles; diff --git a/src/System/Log.h b/src/System/Log.h index e2cd189d..5d9e8f62 100644 --- a/src/System/Log.h +++ b/src/System/Log.h @@ -18,7 +18,7 @@ namespace uammd{ constexpr int maxLogLevel = 6; #endif - ElementType getLogLevelInfo(int level){ + inline ElementType getLogLevelInfo(int level){ static const std::map printMap{ {CRITICAL , std::make_tuple(stderr, "\e[101m[CRITICAL] ", "\e[0m\n")}, {ERROR , std::make_tuple(stderr, "\e[91m[ERROR] \e[0m", "\n")}, diff --git a/src/System/System.h b/src/System/System.h index 4eb7cd83..a10e37c9 100644 --- a/src/System/System.h +++ b/src/System/System.h @@ -68,8 +68,8 @@ namespace uammd{ SystemParameters sysPar; Timer tim; - void printWelcome(); - void printFarewell(); + inline void printWelcome(); + inline void printFarewell(); int m_argc = 0; char ** m_argv = nullptr; @@ -202,7 +202,7 @@ namespace uammd{ }; - void System::printWelcome(){ + inline void System::printWelcome(){ std::string separator; fori(0,60) separator += "━"; separator += "┓"; @@ -223,7 +223,7 @@ namespace uammd{ log("Computation started at %s", std::ctime(&time)); } - void System::printFarewell(){ + inline void System::printFarewell(){ cudaDeviceSynchronize(); std::time_t time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); log("Computation finished at %s",std::ctime(&time)); diff --git a/src/misc/BoundaryValueProblem/BVPMemory.cuh b/src/misc/BoundaryValueProblem/BVPMemory.cuh index 1fafcdc9..d4f49227 100644 --- a/src/misc/BoundaryValueProblem/BVPMemory.cuh +++ b/src/misc/BoundaryValueProblem/BVPMemory.cuh @@ -19,7 +19,7 @@ namespace uammd{ }; namespace detail{ - size_t computeExtraAlignment(size_t addressOffset, size_t alignment){ + inline size_t computeExtraAlignment(size_t addressOffset, size_t alignment){ size_t extraAlignment = 0; bool isMissAligned = addressOffset%alignment != 0; if(isMissAligned) @@ -120,4 +120,4 @@ namespace uammd{ } } -#endif \ No newline at end of file +#endif diff --git a/src/misc/BoundaryValueProblem/BVPSchurComplementMatrices.cuh b/src/misc/BoundaryValueProblem/BVPSchurComplementMatrices.cuh index 4611db81..d405493c 100644 --- a/src/misc/BoundaryValueProblem/BVPSchurComplementMatrices.cuh +++ b/src/misc/BoundaryValueProblem/BVPSchurComplementMatrices.cuh @@ -117,16 +117,17 @@ namespace uammd{ } }; - class SchurBoundaryCondition{ + template + class SchurBoundaryCondition_impl{ int nz; real H; BCRows bcs; public: - SchurBoundaryCondition(int nz, real H):nz(nz), H(H), bcs(nz){} + SchurBoundaryCondition_impl(int nz, real H):nz(nz), H(H), bcs(nz){} template - std::vector computeBoundaryConditionMatrix(const TopBC &top, const BottomBC &bottom){ - std::vector CandD(2*nz+4, 0); + std::vector computeBoundaryConditionMatrix(const TopBC &top, const BottomBC &bottom){ + std::vector CandD(2*nz+4, 0); auto topRow = computeTopRow(top, bottom); auto bottomRow = computeBottomRow(top, bottom); std::copy(topRow.begin(), topRow.end()-2, CandD.begin()); @@ -141,8 +142,8 @@ namespace uammd{ private: template - std::vector computeTopRow(const TopBC &top, const BottomBC &bottom){ - std::vector topRow(nz+2, 0); + std::vector computeTopRow(const TopBC &top, const BottomBC &bottom){ + std::vector topRow(nz+2, 0); auto tfi = bcs.topFirstIntegral(); auto tsi = bcs.topSecondIntegral(); auto tfiFactor = top.getFirstIntegralFactor(); @@ -156,8 +157,8 @@ namespace uammd{ } template - std::vector computeBottomRow(const TopBC &top, const BottomBC &bottom){ - std::vector bottomRow(nz+2, 0); + std::vector computeBottomRow(const TopBC &top, const BottomBC &bottom){ + std::vector bottomRow(nz+2, 0); auto bfi = bcs.bottomFirstIntegral(); auto bsi = bcs.bottomSecondIntegral(); auto bfiFactor = bottom.getFirstIntegralFactor(); @@ -172,10 +173,15 @@ namespace uammd{ }; - std::vector computeSecondIntegralMatrix(real k, real H, int nz){ - std::vector A(nz*nz, 0); + using SchurBoundaryConditionReal = SchurBoundaryCondition_impl; + using SchurBoundaryConditionComplex = SchurBoundaryCondition_impl>; + using SchurBoundaryConditionComplexDouble = SchurBoundaryCondition_impl>; + + template + std::vector computeSecondIntegralMatrix(T k, real H, int nz){ + std::vector A(nz*nz, 0); SecondIntegralMatrix sim(nz); - real kH2 = k*k*H*H; + T kH2 = k*k*H*H; fori(0, nz){ forj(0,nz){ A[i+nz*j] = (i==j) - kH2*sim.getElement(i, j); @@ -184,9 +190,10 @@ namespace uammd{ return std::move(A); } - std::vector computeInverseSecondIntegralMatrix(real k, real H, int nz){ + template + std::vector computeInverseSecondIntegralMatrix(T k, real H, int nz){ if(k==0){ - std::vector invA(nz*nz, 0); + std::vector invA(nz*nz, 0); fori(0, nz){ invA[i+nz*i] = 1; } diff --git a/src/misc/BoundaryValueProblem/BVPSolver.cuh b/src/misc/BoundaryValueProblem/BVPSolver.cuh index 4e54b094..bd23c2b3 100644 --- a/src/misc/BoundaryValueProblem/BVPSolver.cuh +++ b/src/misc/BoundaryValueProblem/BVPSolver.cuh @@ -42,39 +42,40 @@ namespace uammd{ namespace BVP{ namespace detail{ + template class SubsystemSolver{ int nz; real H; - StorageHandle CinvA_storage; - StorageHandle CinvABmD_storage; + StorageHandle CinvA_storage; + StorageHandle CinvABmD_storage; public: SubsystemSolver(int nz, real H):nz(nz), H(H){} void registerRequiredStorage(StorageRegistration &memoryManager){ - CinvA_storage = memoryManager.registerStorageRequirement(2*nz+2); - CinvABmD_storage = memoryManager.registerStorageRequirement(1); + CinvA_storage = memoryManager.registerStorageRequirement(2*nz+2); + CinvABmD_storage = memoryManager.registerStorageRequirement(4); } template - void precompute(real k, const TopBC &top, const BottomBC &bottom, StorageRetriever &memoryManager){ + void precompute(U k, const TopBC &top, const BottomBC &bottom, StorageRetriever &memoryManager){ auto CinvA_it = memoryManager.retrieveStorage(CinvA_storage); auto CinvABmD_it = memoryManager.retrieveStorage(CinvABmD_storage); auto invA = computeInverseSecondIntegralMatrix(k, H, nz); - SchurBoundaryCondition bcs(nz, H); + SchurBoundaryCondition_impl bcs(nz, H); auto CandD = bcs.computeBoundaryConditionMatrix(top, bottom); - real4 D = make_real4(CandD[2*nz], CandD[2*nz+1], CandD[2*nz+2], CandD[2*nz+3]); + U D[4] = {CandD[2*nz], CandD[2*nz+1], CandD[2*nz+2], CandD[2*nz+3]}; auto CinvA = matmul(CandD, nz, 2, invA, nz, nz); std::copy(CinvA.begin(), CinvA.end(), CinvA_it); - real4 CinvAB; - real B00 = -k*k*H*H; - real B11 = -k*k*H*H; - CinvAB.x = CinvA[0]*B00; - CinvAB.y = CinvA[1]*B11; - CinvAB.z = CinvA[0+nz]*B00; - CinvAB.w = CinvA[1+nz]*B11; - CinvABmD_it[0] = CinvAB - D; + U CinvAB[4]; + U B00 = -k*k*H*H; + U B11 = -k*k*H*H; + CinvAB[0] = CinvA[0]*B00; + CinvAB[1] = CinvA[1]*B11; + CinvAB[2] = CinvA[0+nz]*B00; + CinvAB[3] = CinvA[1+nz]*B11; + fori(0, 4) CinvABmD_it[i] = CinvAB[i] - D[i]; } template @@ -83,7 +84,10 @@ namespace uammd{ StorageRetriever &memoryManager){ const auto CinvA = memoryManager.retrieveStorage(CinvA_storage); const auto CinvAfmab = computeRightHandSide(alpha, beta, fn, CinvA); - const real4 CinvABmD = *(memoryManager.retrieveStorage(CinvABmD_storage)); + const auto CinvABmD_it = memoryManager.retrieveStorage(CinvABmD_storage); + U CinvABmD[4] = {CinvABmD_it[0], CinvABmD_it[1], + CinvABmD_it[2], CinvABmD_it[3]}; + const auto c0d0 = solveSubsystem(CinvABmD, CinvAfmab); return c0d0; } @@ -91,8 +95,8 @@ namespace uammd{ private: template - __device__ thrust::pair solveSubsystem(real4 CinvABmD, thrust::pair CinvAfmab) const{ - auto c0d0 = solve2x2System(CinvABmD, CinvAfmab); + __device__ thrust::pair solveSubsystem(U CinvABmD[4], thrust::pair CinvAfmab) const{ + auto c0d0 = solve2x2System(CinvABmD, CinvAfmab); return c0d0; } @@ -113,25 +117,27 @@ namespace uammd{ }; + template class PentadiagonalSystemSolver{ int nz; real H; - KBPENTA_mod pentasolve; + KBPENTA_mod pentasolve; public: PentadiagonalSystemSolver(int nz, real H): nz(nz), H(H), pentasolve(nz){} + void registerRequiredStorage(StorageRegistration &memoryManager){ pentasolve.registerRequiredStorage(memoryManager); } - void precompute(real k, StorageRetriever &memoryManager){ - real diagonal[nz]; - real diagonal_p2[nz]; diagonal_p2[nz-2] = diagonal_p2[nz-1] = 0; - real diagonal_m2[nz]; diagonal_m2[0] = diagonal_m2[1] = 0; + void precompute(U k, StorageRetriever &memoryManager){ + U diagonal[nz]; + U diagonal_p2[nz]; diagonal_p2[nz-2] = diagonal_p2[nz-1] = 0; + U diagonal_m2[nz]; diagonal_m2[0] = diagonal_m2[1] = 0; SecondIntegralMatrix sim(nz); - const real kH2 = k*k*H*H; + const U kH2 = k*k*H*H; for(int i = 0; i waveVector; + template + class BoundaryValueProblemSolver_impl{ + detail::PentadiagonalSystemSolver pent; + detail::SubsystemSolver sub; + StorageHandle waveVector; int nz; real H; public: - BoundaryValueProblemSolver(int nz, real H): nz(nz), H(H), sub(nz, H), pent(nz, H){} + + BoundaryValueProblemSolver_impl(int nz, real H): nz(nz), H(H), sub(nz, H), pent(nz, H){} void registerRequiredStorage(StorageRegistration &mem){ - waveVector = mem.registerStorageRequirement(1); + waveVector = mem.registerStorageRequirement(1); sub.registerRequiredStorage(mem); pent.registerRequiredStorage(mem); } template - void precompute(StorageRetriever &mem, real k, const TopBC &top, const BottomBC &bot){ + void precompute(StorageRetriever &mem, U k, const TopBC &top, const BottomBC &bot){ auto k_access = mem.retrieveStorage(waveVector); k_access[0] = k; pent.precompute(k, mem); @@ -177,10 +185,10 @@ namespace uammd{ AnIterator& an, CnIterator& cn, StorageRetriever &mem){ - const real k = *(mem.retrieveStorage(waveVector)); + const auto k = *(mem.retrieveStorage(waveVector)); T c0, d0; thrust::tie(c0, d0) = sub.solve(fn, alpha, beta, mem); - const real kH2 = k*k*H*H; + const auto kH2 = k*k*H*H; fn[0] += kH2*c0; fn[1] += kH2*d0; pent.solve(fn, an, mem); @@ -201,15 +209,20 @@ namespace uammd{ }; - class BatchedBVPHandler; + using BoundaryValueProblemSolverReal = BoundaryValueProblemSolver_impl; + using BoundaryValueProblemSolverComplex = BoundaryValueProblemSolver_impl>; + + template + class BatchedBVPHandler_impl; - struct BatchedBVPGPUSolver{ + template + struct BatchedBVPGPUSolver_impl{ private: int numberSystems; - BoundaryValueProblemSolver bvpSolver; + BoundaryValueProblemSolver_impl bvpSolver; char* gpuMemory; - friend class BatchedBVPHandler; - BatchedBVPGPUSolver(int numberSystems, BoundaryValueProblemSolver bvpSolver, char *raw): + friend class BatchedBVPHandler_impl; + BatchedBVPGPUSolver_impl(int numberSystems, BoundaryValueProblemSolver_impl bvpSolver, char *raw): numberSystems(numberSystems), bvpSolver(bvpSolver), gpuMemory(raw){} public: @@ -225,24 +238,28 @@ namespace uammd{ }; - class BatchedBVPHandler{ + using BatchedBVPGPUSolverReal = BatchedBVPGPUSolver_impl; + using BatchedBVPGPUSolverComplex = BatchedBVPGPUSolver_impl>; + + template + class BatchedBVPHandler_impl{ int numberSystems; - BoundaryValueProblemSolver bvp; + BoundaryValueProblemSolver_impl bvp; thrust::device_vector gpuMemory; public: template - BatchedBVPHandler(const WaveVectorIterator &klist, - BatchedTopBC top, BatchedBottomBC bot, - int numberSystems, real H, int nz): + BatchedBVPHandler_impl(const WaveVectorIterator &klist, + BatchedTopBC top, BatchedBottomBC bot, + int numberSystems, real H, int nz): numberSystems(numberSystems), bvp(nz, H){ precompute(klist, top, bot); } - BatchedBVPGPUSolver getGPUSolver(){ + BatchedBVPGPUSolver_impl getGPUSolver(){ auto raw = thrust::raw_pointer_cast(gpuMemory.data()); - BatchedBVPGPUSolver d_solver(numberSystems, bvp, raw); + BatchedBVPGPUSolver_impl d_solver(numberSystems, bvp, raw); return d_solver; } @@ -267,6 +284,9 @@ namespace uammd{ }; + using BatchedBVPHandlerReal = BatchedBVPHandler_impl; + using BatchedBVPHandlerComplex = BatchedBVPHandler_impl>; + } } diff --git a/src/misc/BoundaryValueProblem/KBPENTA.cuh b/src/misc/BoundaryValueProblem/KBPENTA.cuh index 636b217b..52f67c0b 100644 --- a/src/misc/BoundaryValueProblem/KBPENTA.cuh +++ b/src/misc/BoundaryValueProblem/KBPENTA.cuh @@ -5,21 +5,22 @@ namespace uammd{ namespace BVP{ -//Algorithm adapted from http://dx.doi.org/10.1080/00207160802326507 for a special case of only three diagonals being non zero + //Algorithm adapted from http://dx.doi.org/10.1080/00207160802326507 for a special case of only three diagonals being non zero + template class KBPENTA_mod{ - StorageHandle storageHandle; + StorageHandle storageHandle; int nz; public: KBPENTA_mod(int nz): nz(nz){} void registerRequiredStorage(StorageRegistration &memoryManager){ - storageHandle = memoryManager.registerStorageRequirement(3*nz+2); + storageHandle = memoryManager.registerStorageRequirement(3*nz+2); } - void store(real *diagonal, real *diagonal_p2, real *diagonal_m2, StorageRetriever &memoryManager){ + void store(U *diagonal, U *diagonal_p2, U *diagonal_m2, StorageRetriever &memoryManager){ auto storage = memoryManager.retrieveStorage(storageHandle); - std::vector beta(nz+1, 0); + std::vector beta(nz+1, 0); beta[0] = 0; beta[1] = diagonal[nz-nz]; beta[2] = diagonal[nz-(nz-1)]; @@ -56,6 +57,9 @@ namespace uammd{ }; + using KBPENTA_mod_real = KBPENTA_mod; + using KBPENTA_mod_complex = KBPENTA_mod>; + } } #endif diff --git a/src/misc/BoundaryValueProblem/MatrixUtils.h b/src/misc/BoundaryValueProblem/MatrixUtils.h index 7a1689af..d9d062e4 100644 --- a/src/misc/BoundaryValueProblem/MatrixUtils.h +++ b/src/misc/BoundaryValueProblem/MatrixUtils.h @@ -1,5 +1,5 @@ /*Raul P. Pelaez 2019-2021. Boundary Value Problem Matrix algebra utilities - */ +*/ #ifndef BVP_MATRIX_UTILS_H #define BVP_MATRIX_UTILS_H #include "utils/cufftPrecisionAgnostic.h" @@ -10,97 +10,131 @@ #ifdef USE_MKL #include #else -#include +#include #endif - +#include namespace uammd{ namespace BVP{ - + using complex = thrust::complex; template struct LapackeUAMMD; template<> - struct LapackeUAMMD{ - template static int getrf(T... args){return LAPACKE_sgetrf(args...);} - template static int getri(T... args){return LAPACKE_sgetri(args...);} - }; + struct LapackeUAMMD{ + template static int getrf(T... args){return LAPACKE_sgetrf(args...);} + template static int getri(T... args){return LAPACKE_sgetri(args...);} + }; template<> - struct LapackeUAMMD{ - template static int getrf(T... args){return LAPACKE_dgetrf(args...);} - template static int getri(T... args){return LAPACKE_dgetri(args...);} - }; + struct LapackeUAMMD{ + template static int getrf(T... args){return LAPACKE_dgetrf(args...);} + template static int getri(T... args){return LAPACKE_dgetri(args...);} + }; template<> - struct LapackeUAMMD{ - template static int getrf(T... args){return LAPACKE_cgetrf(args...);} - template static int getri(T... args){return LAPACKE_cgetri(args...);} - }; + struct LapackeUAMMD{ + template static int getrf(T... args){return LAPACKE_cgetrf(args...);} + template static int getri(T... args){return LAPACKE_cgetri(args...);} + }; template<> - struct LapackeUAMMD{ - template static int getrf(T... args){return LAPACKE_zgetrf(args...);} - template static int getri(T... args){return LAPACKE_zgetri(args...);} - }; + struct LapackeUAMMD{ + template static int getrf(T... args){return LAPACKE_zgetrf(args...);} + template static int getri(T... args){return LAPACKE_zgetri(args...);} + }; template<> - struct LapackeUAMMD>{ - static int getrf(int matrix_layout, lapack_int n, lapack_int m, - cufftComplex_t* a, lapack_int lda, - lapack_int* ipiv){ - return LAPACKE_cgetrf(matrix_layout, n, m, (lapack_complex_float*)(a), lda, ipiv); - } - static int getri(int matrix_layout, lapack_int n, - cufftComplex_t* a, lapack_int lda, - lapack_int* ipiv){ - return LAPACKE_cgetri(matrix_layout, n, (lapack_complex_float*)(a), lda, ipiv); - } - }; + struct LapackeUAMMD>{ + static int getrf(int matrix_layout, lapack_int n, lapack_int m, + cufftComplex_t* a, lapack_int lda, + lapack_int* ipiv){ + return LAPACKE_cgetrf(matrix_layout, n, m, (lapack_complex_float*)(a), lda, ipiv); + } + static int getri(int matrix_layout, lapack_int n, + cufftComplex_t* a, lapack_int lda, + lapack_int* ipiv){ + return LAPACKE_cgetri(matrix_layout, n, (lapack_complex_float*)(a), lda, ipiv); + } + }; template<> - struct LapackeUAMMD>{ - static int getrf(int matrix_layout, lapack_int n, lapack_int m, - cufftComplex_t* a, lapack_int lda, - lapack_int* ipiv){ - return LAPACKE_zgetrf(matrix_layout, n, m, (lapack_complex_double*)(a), lda, ipiv); - } - static int getri(int matrix_layout, lapack_int n, - cufftComplex_t* a, lapack_int lda, - lapack_int* ipiv){ - return LAPACKE_zgetri(matrix_layout, n, (lapack_complex_double*)(a), lda, ipiv); - } - }; + struct LapackeUAMMD>{ + static int getrf(int matrix_layout, lapack_int n, lapack_int m, + cufftComplex_t* a, lapack_int lda, + lapack_int* ipiv){ + return LAPACKE_zgetrf(matrix_layout, n, m, (lapack_complex_double*)(a), lda, ipiv); + } + static int getri(int matrix_layout, lapack_int n, + cufftComplex_t* a, lapack_int lda, + lapack_int* ipiv){ + return LAPACKE_zgetri(matrix_layout, n, (lapack_complex_double*)(a), lda, ipiv); + } + }; + template<> + struct LapackeUAMMD>{ + static int getrf(int matrix_layout, lapack_int n, lapack_int m, + thrust::complex* a, lapack_int lda, + lapack_int* ipiv){ + return LAPACKE_cgetrf(matrix_layout, n, m, (lapack_complex_float*)(a), lda, ipiv); + } + static int getri(int matrix_layout, lapack_int n, + thrust::complex* a, lapack_int lda, + lapack_int* ipiv){ + return LAPACKE_cgetri(matrix_layout, n, (lapack_complex_float*)(a), lda, ipiv); + } + }; + template<> + struct LapackeUAMMD>{ + static int getrf(int matrix_layout, lapack_int n, lapack_int m, + thrust::complex* a, lapack_int lda, + lapack_int* ipiv){ + return LAPACKE_zgetrf(matrix_layout, n, m, (lapack_complex_double*)(a), lda, ipiv); + } + static int getri(int matrix_layout, lapack_int n, + thrust::complex* a, lapack_int lda, + lapack_int* ipiv){ + return LAPACKE_zgetri(matrix_layout, n, (lapack_complex_double*)(a), lda, ipiv); + } + }; template - std::vector invertSquareMatrix(const std::vector &A, lapack_int N){ - lapack_int pivotArray[N]; - int errorHandler; - auto invA = A; - errorHandler = LapackeUAMMD::getrf(LAPACK_ROW_MAJOR, N, N, invA.data(), N, pivotArray); - if(errorHandler){ - throw std::runtime_error("Lapacke getrf failed with error code: " + std::to_string(errorHandler)); + std::vector invertSquareMatrix(const std::vector &A, lapack_int N){ + lapack_int pivotArray[N]; + int errorHandler; + auto invA = A; + errorHandler = LapackeUAMMD::getrf(LAPACK_ROW_MAJOR, N, N, invA.data(), N, pivotArray); + if(errorHandler){ + throw std::runtime_error("Lapacke getrf failed with error code: " + std::to_string(errorHandler)); + } + errorHandler = LapackeUAMMD::getri(LAPACK_ROW_MAJOR, N, invA.data(), N, pivotArray); + if(errorHandler){ + throw std::runtime_error("Lapacke getri failed with error code: " + std::to_string(errorHandler)); + } + return invA; } - errorHandler = LapackeUAMMD::getri(LAPACK_ROW_MAJOR, N, invA.data(), N, pivotArray); - if(errorHandler){ - throw std::runtime_error("Lapacke getri failed with error code: " + std::to_string(errorHandler)); - } - return invA; - } template - std::vector matmul(const T &A, int ncol_a, int nrow_a, - const T2 &B, int ncol_b, int nrow_b){ - std::vector C; - C.resize(ncol_b*nrow_a); - for(int i = 0; i C; + C.resize(ncol_b*nrow_a); + for(int i = 0; i + __device__ thrust::pair solve2x2System(thrust::complex A[4], thrust::pair b){ + const auto det = A[0]*A[3] - A[1]*A[2]; + const T c0 = (b.first*A[3] - A[1]*b.second)/det; + const T d0 = (b.second*A[0] - b.first*A[2])/det; + return thrust::make_pair(c0, d0); + } + template __device__ thrust::pair solve2x2System(real4 A, thrust::pair b){ const real det = A.x*A.w - A.y*A.z; @@ -109,6 +143,11 @@ namespace uammd{ return thrust::make_pair(c0, d0); } + template + __device__ thrust::pair solve2x2System(real A[4], thrust::pair b){ + real4 A_r4 = {A[0], A[1], A[2], A[3]}; + return solve2x2System(A_r4, b); + } } } #endif diff --git a/src/misc/IBM_kernels.cuh b/src/misc/IBM_kernels.cuh index 22ba9214..904ebd23 100644 --- a/src/misc/IBM_kernels.cuh +++ b/src/misc/IBM_kernels.cuh @@ -41,6 +41,7 @@ namespace uammd{ //Sum all values in a container using Kahan Summation, which increases floating point accuracy template + inline auto kahanSum(Container &v){ auto c = v[0]*0; auto sum = c; @@ -55,6 +56,7 @@ namespace uammd{ //Integrate the function foo(x) from x=rmin to x=rmax using the Simpson rule with Nr intervals template + inline auto integrate(Foo foo, real rmin, real rmax, int Nr){ using T = decltype(foo(rmin)); if(Nr%2 == 1) Nr++; //Need an even number of points @@ -74,6 +76,7 @@ namespace uammd{ } //[1] Taken from https://arxiv.org/pdf/1712.04732.pdf + inline __host__ __device__ real BM(real zz, real alpha, real beta){ const real z = zz/alpha; const real z2 = z*z; @@ -114,7 +117,7 @@ namespace uammd{ const real invh; static constexpr int support = 3; threePoint(real h):invh(1.0/h){} - __host__ __device__ real phi(real rr, real3 pos = real3()) const{ + inline __host__ __device__ real phi(real rr, real3 pos = real3()) const{ const real r = fabs(rr)*invh; if(rlastRunRequiredSteps = steps_needed; if(steps_needed-2 > check_convergence_steps){ diff --git a/src/misc/lapack_and_blas_defines.h b/src/misc/lapack_and_blas_defines.h index 6df2ee72..af27d0a1 100644 --- a/src/misc/lapack_and_blas_defines.h +++ b/src/misc/lapack_and_blas_defines.h @@ -4,8 +4,8 @@ #ifdef USE_MKL #include #else -#include -#include +#include +#include #endif #ifdef SINGLE_PRECISION #define LAPACKE_steqr LAPACKE_ssteqr diff --git a/src/uammd.cuh b/src/uammd.cuh index e3fa8639..fc402041 100644 --- a/src/uammd.cuh +++ b/src/uammd.cuh @@ -9,7 +9,11 @@ #include "System/System.h" #ifdef UAMMD_EXTENSIONS -#include "../extensions/preamble.h" + #ifdef UAMMD_EXTENSIONS_PREAMBLE + #include UAMMD_EXTENSIONS_PREAMBLE + #else + #include "../extensions/preamble.h" + #endif #endif #include "ParticleData/ParticleData.cuh" diff --git a/src/utils/Grid.cuh b/src/utils/Grid.cuh index 5405b50e..243d29f1 100644 --- a/src/utils/Grid.cuh +++ b/src/utils/Grid.cuh @@ -123,7 +123,7 @@ namespace uammd{ }; //Looks for the closest (equal or greater) number of nodes of the form 2^a*3^b*5^c*7^d*11^e - int3 nextFFTWiseSize3D(int3 size){ + inline int3 nextFFTWiseSize3D(int3 size){ int* cdim = &size.x; using integral = uint64_t; integral max_dim = std::max({size.x, size.y, size.z}); diff --git a/src/utils/ParticleSorter.cuh b/src/utils/ParticleSorter.cuh index b50a826e..1e968410 100644 --- a/src/utils/ParticleSorter.cuh +++ b/src/utils/ParticleSorter.cuh @@ -79,7 +79,7 @@ namespace uammd{ } }; - int clz(uint n){ + inline int clz(uint n){ n |= (n >> 1); n |= (n >> 2); n |= (n >> 4); diff --git a/src/utils/atomics.cuh b/src/utils/atomics.cuh index fc592524..6797debd 100644 --- a/src/utils/atomics.cuh +++ b/src/utils/atomics.cuh @@ -7,8 +7,6 @@ namespace uammd{ template inline __device__ T atomicAdd(T* address, T val){ return ::atomicAdd(address, val);} - -#ifndef SINGLE_PRECISION #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 600 inline __device__ double atomicAdd(double* address, double val){ unsigned long long int* address_as_ull = @@ -23,7 +21,20 @@ namespace uammd{ return __longlong_as_double(old); } #endif -#endif + + inline __device__ float2 atomicAdd(float2* address, float2 val){ + float2 newval; + if(val.x) newval.x = atomicAdd(&(*address).x, val.x); + if(val.y) newval.y = atomicAdd(&(*address).y, val.y); + return newval; + } + + inline __device__ double2 atomicAdd(double2* address, double2 val){ + double2 newval; + if(val.x) newval.x = atomicAdd(&(*address).x, val.x); + if(val.y) newval.y = atomicAdd(&(*address).y, val.y); + return newval; + } inline __device__ real4 atomicAdd(real4* address, real4 val){ real4 newval; @@ -41,21 +52,12 @@ namespace uammd{ if(val.z) newval.z = atomicAdd(&(*address).z, val.z); return newval; } - - inline __device__ real2 atomicAdd(real2* address, real2 val){ - real2 newval; - if(val.x) newval.x = atomicAdd(&(*address).x, val.x); - if(val.y) newval.y = atomicAdd(&(*address).y, val.y); - return newval; - } - - + template inline __device__ T2 atomicAdd(T &ref, T2 val){ return atomicAdd(&ref, val); } - template inline __device__ T2 atomicAdd(thrust::tuple &&refs, T2 val){ T2 newval; diff --git a/src/utils/cublasDebug.h b/src/utils/cublasDebug.h index 9293087a..a50b14ef 100644 --- a/src/utils/cublasDebug.h +++ b/src/utils/cublasDebug.h @@ -8,7 +8,7 @@ #define CublasSafeCall(err) __cublasSafeCall(err, __FILE__, __LINE__) - +inline const char* cublasGetErrorString(cublasStatus_t status){ switch(status){ case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS"; diff --git a/src/utils/cufftDebug.h b/src/utils/cufftDebug.h index 07e63abc..3056bb62 100644 --- a/src/utils/cufftDebug.h +++ b/src/utils/cufftDebug.h @@ -12,6 +12,7 @@ #define CufftSafeCall(err) __cufftSafeCall(err, __FILE__, __LINE__) namespace uammd{ + inline const char* cufftGetErrorString(cufftResult_t err){ switch (err) { case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN\n"; diff --git a/src/utils/cufftPrecisionAgnostic.h b/src/utils/cufftPrecisionAgnostic.h index 953778b2..e5b6f6b0 100644 --- a/src/utils/cufftPrecisionAgnostic.h +++ b/src/utils/cufftPrecisionAgnostic.h @@ -28,14 +28,17 @@ namespace uammd{ template<> struct CUFFT_Real2Complex{static constexpr cufftType value=CUFFT_R2C;}; template + inline cufftResult cufftExecReal2Complex(cufftHandle &plan, cufftReal_t* d_in, cufftComplex_t *d_out); template<> + inline cufftResult cufftExecReal2Complex(cufftHandle &plan, cufftReal_t* d_in, cufftComplex_t *d_out){ return cufftExecR2C(plan, d_in, d_out); } template<> + inline cufftResult cufftExecReal2Complex(cufftHandle &plan, cufftReal_t* d_in, cufftComplex_t *d_out){ return cufftExecD2Z(plan, d_in, d_out); } @@ -45,14 +48,17 @@ namespace uammd{ template<> struct CUFFT_Complex2Real{static constexpr cufftType value=CUFFT_C2R;}; template + inline cufftResult cufftExecComplex2Real(cufftHandle &plan, cufftComplex_t* d_in, cufftReal_t *d_out); template<> + inline cufftResult cufftExecComplex2Real(cufftHandle &plan, cufftComplex_t *d_in, cufftReal_t *d_out){ return cufftExecC2R(plan, d_in, d_out); } template<> + inline cufftResult cufftExecComplex2Real(cufftHandle &plan, cufftComplex_t *d_in, cufftReal_t *d_out){ return cufftExecZ2D(plan, d_in, d_out); } @@ -62,14 +68,17 @@ namespace uammd{ template<> struct CUFFT_Complex2Complex{static constexpr cufftType value=CUFFT_C2C;}; template + inline cufftResult cufftExecComplex2Complex(cufftHandle &plan, cufftComplex_t* d_in, cufftComplex_t *d_out, int direction); template<> + inline cufftResult cufftExecComplex2Complex(cufftHandle &plan, cufftComplex_t *d_in, cufftComplex_t *d_out, int direction){ return cufftExecC2C(plan, d_in, d_out, direction); } template<> + inline cufftResult cufftExecComplex2Complex(cufftHandle &plan, cufftComplex_t *d_in, cufftComplex_t *d_out, int direction){ return cufftExecZ2Z(plan, d_in, d_out, direction); } diff --git a/src/utils/curandDebug.h b/src/utils/curandDebug.h index 0f3b2b95..567a90c0 100644 --- a/src/utils/curandDebug.h +++ b/src/utils/curandDebug.h @@ -8,6 +8,7 @@ #define CurandSafeCall(err) __curandSafeCall(err, __FILE__, __LINE__) +inline const char* curandGetErrorString(curandStatus_t err){ switch (err) { case CURAND_STATUS_VERSION_MISMATCH: return "CURAND_STATUS_VERSION_MISMATCH"; diff --git a/src/utils/cxx_utils.h b/src/utils/cxx_utils.h index 5dbe9a72..a44d2c3a 100644 --- a/src/utils/cxx_utils.h +++ b/src/utils/cxx_utils.h @@ -27,7 +27,7 @@ namespace uammd{ } namespace printUtils{ //Transform size in bytes to a pretty string in B, KB, MB... - std::string prettySize(size_t size) { + inline std::string prettySize(size_t size) { static const char *SIZES[] = { "B", "KB", "MB", "GB" }; uint div = 0; size_t rem = 0; diff --git a/src/utils/exception.h b/src/utils/exception.h index 5b424531..1c68172f 100644 --- a/src/utils/exception.h +++ b/src/utils/exception.h @@ -10,7 +10,7 @@ namespace uammd{ using exception = std::exception; - void backtrace_nested_exception(const uammd::exception& e, int level = 0){ + inline void backtrace_nested_exception(const uammd::exception& e, int level = 0){ Logging::log(std::string(level,' ') + "level " + std::to_string(level) + " exception: " + e.what()); diff --git a/src/utils/quaternion.cuh b/src/utils/quaternion.cuh index 2c384549..bdaec2a4 100644 --- a/src/utils/quaternion.cuh +++ b/src/utils/quaternion.cuh @@ -1,7 +1,7 @@ /* P. Palacios Alonso 2021 Some quaternion algebra and useful functions Notation: - n(real) - Scalar part + n(real) - Scalar part v(real3) - vectorial part q(real4) - quaternion */ @@ -13,146 +13,150 @@ Notation: namespace uammd{ class Quat{ - public: - - real n; - real3 v; - - QUATTR Quat(); - QUATTR Quat(real4 q); - QUATTR Quat(real n, real3 v); - QUATTR Quat(real n, real vx, real vy, real vz); - - QUATTR Quat operator+(Quat q1); - QUATTR void operator+=(Quat q); - QUATTR Quat operator-(Quat q); - QUATTR void operator-=(Quat q); - QUATTR Quat operator*(Quat q); - QUATTR void operator*=(Quat q); - QUATTR Quat operator*(real scalar); - QUATTR void operator*=(real scalar); - QUATTR Quat operator/(real scalar); - QUATTR void operator/=(real scalar); - QUATTR void operator=(real4 q); - - QUATTR real3 getVx(); //Returns the first vector of the reference system encoded by the quaternion - QUATTR real3 getVy(); //Returns the second vector of the reference system encoded by the quaternion - QUATTR real3 getVz(); //Returns the third vector of the reference system encoded by the quaternion - QUATTR real4 to_real4(); - - QUATTR Quat getConjugate(); + public: + + real n; + real3 v; + + QUATTR Quat(); + QUATTR Quat(const real4& q); + QUATTR Quat(real n, const real3& v); + QUATTR Quat(real n, real vx, real vy, real vz); + + QUATTR Quat operator+ (const Quat& q) const; + QUATTR void operator+= (const Quat& q); + QUATTR Quat operator- (const Quat& q) const; + QUATTR void operator-= (const Quat& q); + QUATTR Quat operator* (const Quat& q) const; + QUATTR void operator*= (const Quat& q); + QUATTR Quat operator* (real scalar) const; + QUATTR void operator*= (real scalar); + QUATTR Quat operator/ (real scalar) const; + QUATTR void operator/= (real scalar); + QUATTR void operator= (const real4& q); + + QUATTR real3 getVx() const; + QUATTR real3 getVy() const; + QUATTR real3 getVz() const; + QUATTR real4 to_real4() const; + + QUATTR Quat getConjugate() const; }; - + QUATTR Quat::Quat(){ n = real(1.0); v = real3(); } - - QUATTR Quat::Quat(real4 q){ + + QUATTR Quat::Quat(const real4& q){ n = q.x; v.x = q.y; v.y = q.z; v.z = q.w; } - - QUATTR Quat::Quat(real n, real3 v){ + + QUATTR Quat::Quat(real n, const real3& v){ this->n = n; this->v = v; } - - QUATTR Quat::Quat(real n, real vx, real vy, real vz){ + + QUATTR Quat::Quat(real n, + real vx,real vy,real vz){ this -> n = n; v.x = vx; v.y = vy; - v.z = vz; + v.z = vz; } - - QUATTR Quat Quat::operator+(Quat q){ - return Quat(n+q.n,v+q.v); + + QUATTR Quat Quat::operator+(const Quat& q) const { + return Quat(n+q.n,v+q.v); } - QUATTR void Quat::operator+=(Quat q){ + QUATTR void Quat::operator+=(const Quat& q){ n+=q.n; v+=q.v; } - - QUATTR Quat Quat::operator-(Quat q){ - return Quat(n-q.n,v-q.v); - } - - QUATTR void Quat::operator-=(Quat q){ + + QUATTR Quat Quat::operator-(const Quat& q) const { + return Quat(n-q.n,v-q.v); + } + + QUATTR void Quat::operator-=(const Quat& q){ n-=q.n; v-=q.v; } - - QUATTR Quat Quat::operator*(Quat q){ + + QUATTR Quat Quat::operator*(const Quat& q) const { /* - Product of two quaternions: - q3 = q1*q2 = (n1*n2 - v1*v2, n1*v2 + n2*v1 + v1 x v2) - */ - return Quat(n*q.n-dot(v,q.v),n*q.v+v*q.n+cross(v,q.v)); + Product of two quaternions: + q3 = q1*q2 = (n1*n2 - v1*v2, n1*v2 + n2*v1 + v1 x v2) + */ + return Quat(n*q.n-dot(v,q.v),n*q.v+v*q.n+cross(v,q.v)); } - - QUATTR void Quat::operator*=(Quat q){ - n=n*q.n-dot(v,q.v); - v=n*q.v+v*q.n+cross(q.v,v); + + QUATTR void Quat::operator*=(const Quat& q){ + real n_new = n*q.n-dot(v,q.v); + real3 v_new = n*q.v+v*q.n+cross(v,q.v); + + n = n_new; + v = v_new; } - - QUATTR Quat Quat::operator*(real scalar){ - return Quat(scalar*n,scalar*v); + + QUATTR Quat Quat::operator*(real scalar) const { + return Quat(scalar*n,scalar*v); } - + QUATTR void Quat::operator*=(real scalar){ - n*=scalar; - v*=scalar; + n*=scalar; + v*=scalar; } - - QUATTR Quat operator*(real scalar, Quat q){ - return Quat(scalar*q.n,scalar*q.v); + + QUATTR Quat operator*(real scalar, const Quat& q){ + return Quat(scalar*q.n,scalar*q.v); } - - QUATTR Quat Quat::operator/(real scalar){ - return Quat(n/scalar,v/scalar); + + QUATTR Quat Quat::operator/(real scalar) const { + return Quat(n/scalar,v/scalar); } - + QUATTR void Quat::operator/=(real scalar){ n/=scalar; v/=scalar; } - - QUATTR void Quat::operator=(real4 q){ + + QUATTR void Quat::operator=(const real4& q){ n = q.x; v.x = q.y; v.y = q.z; v.z = q.w; } - - QUATTR real3 Quat::getVx(){ + + QUATTR real3 Quat::getVx() const { real a = n; real b = v.x; real c = v.y; real d = v.z; - real3 vx = make_real3(a*a+b*b-c*c-d*d,2*(b*c+a*d),2*(b*d-a*c)); + real3 vx = make_real3(a*a+b*b-c*c-d*d,real(2.0)*(b*c+a*d),real(2.0)*(b*d-a*c)); return vx; } - - QUATTR real3 Quat::getVy(){ + + QUATTR real3 Quat::getVy() const { real a = n; real b = v.x; real c = v.y; real d = v.z; - return make_real3(2*(b*c-a*d),a*a-b*b+c*c-d*d,2*(c*d+a*b)); + return make_real3(real(2.0)*(b*c-a*d),a*a-b*b+c*c-d*d,real(2.0)*(c*d+a*b)); } - - QUATTR real3 Quat::getVz(){ + + QUATTR real3 Quat::getVz() const { real a = n; real b = v.x; real c = v.y; real d = v.z; - return make_real3(2*(b*d+a*c),2*(c*d-a*b),a*a-b*b-c*c+d*d); + return make_real3(real(2.0)*(b*d+a*c),real(2.0)*(c*d-a*b),a*a-b*b-c*c+d*d); } - - QUATTR real4 Quat::to_real4(){ + + QUATTR real4 Quat::to_real4() const { real4 r4; r4.x = n; r4.y = v.x; @@ -160,32 +164,32 @@ namespace uammd{ r4.w = v.z; return r4; } - + QUATTR real4 make_real4(Quat q){ return q.to_real4(); } - QUATTR Quat Quat::getConjugate(){ + QUATTR Quat Quat::getConjugate() const { return Quat(n,-v); } - /* - Returns the quaternion that encondes a rotation of ang radians - around the axis vrot + /* + Returns the quaternion that encondes a rotation of ang radians + around the axis vrot q = (cos(phi/2),vrot·sin(phi/2)) - */ + */ QUATTR Quat rotVec2Quaternion(real3 vrot, real phi){ real norm = (dot(vrot,vrot)); - if (norm==0) return Quat(1.0,0.,0.,0.); + if (norm==real(0.0)) return Quat(1.0, 0., 0., 0.); vrot*=rsqrt(norm); // The rotation axis must be a unitary vector real cphi2, sphi2; real* cphi2_ptr = &cphi2; real* sphi2_ptr = &sphi2; - sincos(phi*0.5,sphi2_ptr,cphi2_ptr); + sincos(phi*real(0.5),sphi2_ptr,cphi2_ptr); Quat q = Quat(cphi2,sphi2*vrot); return q; } - + QUATTR Quat rotVec2Quaternion(real3 vrot){ // If no angle is given the rotation angle is the modulus of vrot real phi = sqrt(dot(vrot,vrot)); @@ -197,10 +201,10 @@ namespace uammd{ To speed up the computation, we write the rotation in the next form: v' = v + 2*q_n*(q_v x v) + 2*q_v * (q_v x v). See https://fgiesen.wordpress.com/2019/02/09/rotating-a-single-vector-using-a-quaternion/ - */ - QUATTR real3 rotateVector(Quat q, real3 v){ - real3 aux = 2*cross(q.v,v); - return v+q.n*aux+cross(q.v,aux); + */ + QUATTR real3 rotateVector(const Quat& q, const real3& v){ + real3 aux = real(2.0)*cross(q.v,v); + return v+q.n*aux+cross(q.v,aux); } } diff --git a/src/utils/tensor.cuh b/src/utils/tensor.cuh index 0acba92c..e67c729b 100644 --- a/src/utils/tensor.cuh +++ b/src/utils/tensor.cuh @@ -36,9 +36,15 @@ namespace uammd{ this->zz = t.zz; } - VECATTR real3 diag(){return {xx,yy,zz};} + VECATTR real3 diag() const {return {xx,yy,zz};} - VECATTR real trace(){return xx+yy+zz;} + VECATTR real trace() const {return xx+yy+zz;} + + VECATTR tensor3 transpose() const { + return tensor3(xx,yx,zx, + xy,yy,zy, + xz,yz,zz); + } }; @@ -152,6 +158,22 @@ namespace uammd{ return sub; } + //Define the unary minus operator + + VECATTR tensor3 operator -(const tensor3 &a){ + tensor3 sub; + sub.xx = -a.xx; + sub.yx = -a.yx; + sub.zx = -a.zx; + sub.xy = -a.xy; + sub.yy = -a.yy; + sub.zy = -a.zy; + sub.xz = -a.xz; + sub.yz = -a.yz; + sub.zz = -a.zz; + return sub; + } + VECATTR void operator -=(tensor3 &a, const real &b){ a.xx -= b; a.yx -= b; diff --git a/src/utils/vector.cuh b/src/utils/vector.cuh index df4ed98c..43a4330f 100644 --- a/src/utils/vector.cuh +++ b/src/utils/vector.cuh @@ -976,4 +976,339 @@ namespace uammd{ } } +namespace uammd{ +////////////////////////////////////VEC2/////////////////////////////////////// + template + struct vec2{ + T x,y; + }; + + template + vec2 make_vec2(T x, T y){ return {x, y};} + + template + VECATTR vec2 operator +(const vec2 &a, const vec2 &b){ + return {a.x + b.x, a.y + b.y}; + } + + template + VECATTR vec2 operator +(const vec2 &a, const T &b){ + return {a.x + b, a.y + b}; + } + + template + VECATTR vec2 operator -(const vec2 &a, const vec2 &b){ + return {a.x - b.x, a.y - b.y}; + } + + + template + VECATTR vec2 operator -(const vec2 &a, const T &b){ + return {a.x - b, a.y - b}; + } + + template + VECATTR vec2 operator -(const T &b, const vec2 &a){ + return {b-a.x, b-a.y}; + } + + + template + VECATTR vec2 operator *(const vec2 &a, const vec2 &b){ + return {a.x * b.x, a.y * b.y}; + } + + + template + VECATTR vec2 operator *(const vec2 &a, const T &b){ + return {a.x * b, a.y * b}; + } + + template + VECATTR vec2 operator /(const vec2 &a, const vec2 &b){ + return {a.x / b.x, a.y / b.y}; + } + + template + VECATTR vec2 operator /(const vec2 &a, const T &b){ + return {a.x/b, a.y/b}; + } + + + template + VECATTR vec2 operator /(const T &b, const vec2 &a){ + return {b / a.x, b / a.y}; + } + + template + VECATTR void operator +=(vec2 &a, const vec2 &b){ + a = a + b; + } + + template + VECATTR vec2 operator +(const T &b, const vec2 &a){return a+b;} + + template + VECATTR void operator +=(vec2 &a, const T &b){ + a = a+b; + } + + template + VECATTR void operator -=(vec2 &a, const vec2 &b){ + a = a-b; + } + template + VECATTR void operator -=(vec2 &a, const T &b){ + a=a-b; + } + template + VECATTR void operator *=(vec2 &a, const vec2 &b){ + a = a*b; + } + + template + VECATTR vec2 operator *(const T &b, const vec2 &a){ + return a*b; + } + + template + VECATTR void operator *=(vec2 &a, const T &b){ + a=a*b; + } + + template + VECATTR void operator /=(vec2 &a, const vec2 &b){ + a = a/b; + } + + template + VECATTR void operator /=(vec2 &a, const T &b){ + a = a/b; + } + +////////////////////////////////////VEC3/////////////////////////////////////// + template + struct vec3{ + T x,y,z; + }; + + template + vec3 make_vec3(T x, T y, T z){ return {x, y, z};} + + template + VECATTR vec3 operator +(const vec3 &a, const vec3 &b){ + return {a.x + b.x, a.y + b.y, a.z + b.z}; + } + + template + VECATTR vec3 operator +(const vec3 &a, const T &b){ + return {a.x + b, a.y + b, a.z + b}; + } + + template + VECATTR vec3 operator -(const vec3 &a, const vec3 &b){ + return {a.x - b.x, a.y - b.y, a.z - b.z}; + } + + + template + VECATTR vec3 operator -(const vec3 &a, const T &b){ + return {a.x - b, a.y - b, a.z - b}; + } + + template + VECATTR vec3 operator -(const T &b, const vec3 &a){ + return {b-a.x, b-a.y, b-a.z}; + } + + + template + VECATTR vec3 operator *(const vec3 &a, const vec3 &b){ + return {a.x * b.x, a.y * b.y, a.z * b.z}; + } + + + template + VECATTR vec3 operator *(const vec3 &a, const T &b){ + return {a.x * b, a.y * b, a.z * b}; + } + + template + VECATTR vec3 operator /(const vec3 &a, const vec3 &b){ + return {a.x / b.x, a.y / b.y, a.z / b.z}; + } + + template + VECATTR vec3 operator /(const vec3 &a, const T &b){ + return {a.x/b, a.y/b, a.z/b}; + } + + + template + VECATTR vec3 operator /(const T &b, const vec3 &a){ + return {b / a.x, b / a.y, b / a.z}; + } + + + + template + VECATTR void operator +=(vec3 &a, const vec3 &b){ + a = a + b; + } + + template + VECATTR vec3 operator +(const T &b, const vec3 &a){return a+b;} + + template + VECATTR void operator +=(vec3 &a, const T &b){ + a = a+b; + } + + template + VECATTR void operator -=(vec3 &a, const vec3 &b){ + a = a-b; + } + template + VECATTR void operator -=(vec3 &a, const T &b){ + a=a-b; + } + template + VECATTR void operator *=(vec3 &a, const vec3 &b){ + a = a*b; + } + + template + VECATTR vec3 operator *(const T &b, const vec3 &a){ + return a*b; + } + + template + VECATTR void operator *=(vec3 &a, const T &b){ + a=a*b; + } + + template + VECATTR void operator /=(vec3 &a, const vec3 &b){ + a = a/b; + } + + template + VECATTR void operator /=(vec3 &a, const T &b){ + a = a/b; + } + + /////////////////////////////////VEC4////////////////////////// + + template + struct vec4{ + T x,y,z,w; + }; + + + + template + vec4 make_vec4(T x, T y, T z, T w){ return {x, y, z, w};} + + template + VECATTR vec4 operator +(const vec4 &a, const vec4 &b){ + return {a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w}; + } + + template + VECATTR vec4 operator +(const vec4 &a, const T &b){ + return {a.x + b, a.y + b, a.z + b, a.w + b}; + } + + template + VECATTR vec4 operator -(const vec4 &a, const vec4 &b){ + return {a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w}; + } + + + template + VECATTR vec4 operator -(const vec4 &a, const T &b){ + return {a.x - b, a.y - b, a.z - b, a.w - b}; + } + + template + VECATTR vec4 operator -(const T &b, const vec4 &a){ + return {b-a.x, b-a.y, b-a.z, b-a.w}; + } + + + template + VECATTR vec4 operator *(const vec4 &a, const vec4 &b){ + return {a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w}; + } + + + template + VECATTR vec4 operator *(const vec4 &a, const T &b){ + return {a.x * b, a.y * b, a.z * b, a.w * b}; + } + + template + VECATTR vec4 operator /(const vec4 &a, const vec4 &b){ + return {a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w}; + } + + template + VECATTR vec4 operator /(const vec4 &a, const T &b){ + return {a.x/b, a.y/b, a.z/b, a.w/b}; + } + + + template + VECATTR vec4 operator /(const T &b, const vec4 &a){ + return {b / a.x, b / a.y, b / a.z, b / a.w}; + } + + + + template + VECATTR void operator +=(vec4 &a, const vec4 &b){ + a = a + b; + } + + template + VECATTR vec4 operator +(const T &b, const vec4 &a){return a+b;} + + template + VECATTR void operator +=(vec4 &a, const T &b){ + a = a+b; + } + + template + VECATTR void operator -=(vec4 &a, const vec4 &b){ + a = a-b; + } + template + VECATTR void operator -=(vec4 &a, const T &b){ + a=a-b; + } + template + VECATTR void operator *=(vec4 &a, const vec4 &b){ + a = a*b; + } + + template + VECATTR vec4 operator *(const T &b, const vec4 &a){ + return a*b; + } + + template + VECATTR void operator *=(vec4 &a, const T &b){ + a=a*b; + } + + template + VECATTR void operator /=(vec4 &a, const vec4 &b){ + a = a/b; + } + + template + VECATTR void operator /=(vec4 &a, const T &b){ + a = a/b; + } +} #endif diff --git a/test/misc/bvp/bvp.cu b/test/misc/bvp/bvp.cu index 3f55625f..825da1ae 100644 --- a/test/misc/bvp/bvp.cu +++ b/test/misc/bvp/bvp.cu @@ -202,7 +202,7 @@ auto getParameters(){ } auto createBVP(std::vector klist, Parameters par){ - auto bvp = std::make_shared (klist, + auto bvp = std::make_shared (klist, make_boundary_dispatcher(klist, par.H), make_boundary_dispatcher(klist, par.H), klist.size(), par.H, par.nz);