Skip to content

Commit

Permalink
Alignment check before calling xxx_sparse functions
Browse files Browse the repository at this point in the history
  • Loading branch information
sunqm committed Jan 9, 2023
1 parent 9bbfe84 commit fb038b4
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 18 deletions.
6 changes: 4 additions & 2 deletions pyscf/dft/gen_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def build(self, mol=None, with_non0tab=False, sort_grids=True, **kwargs):
self.coords = self.coords[idx]
self.weights = self.weights[idx]

if self.alignment:
if self.alignment > 1:
padding = _padding_size(self.size, self.alignment)
logger.debug(self, 'Padding %d grids', padding)
if padding > 0:
Expand Down Expand Up @@ -620,7 +620,7 @@ def prune_by_density_(self, rho, threshold=0):
self.weights.size - numpy.count_nonzero(idx))
self.coords = numpy.asarray(self.coords [idx], order='C')
self.weights = numpy.asarray(self.weights[idx], order='C')
if self.alignment:
if self.alignment > 1:
padding = _padding_size(self.size, self.alignment)
logger.debug(self, 'prune_by_density_: %d padding grids', padding)
if padding > 0:
Expand Down Expand Up @@ -668,4 +668,6 @@ def _default_ang(nuc, level=3):
(65, 65, 65, 65, 65, 65, 65 ),)) # 9

def _padding_size(ngrids, alignment):
if alignment <= 1:
return 0
return (ngrids + alignment - 1) // alignment * alignment - ngrids
33 changes: 21 additions & 12 deletions pyscf/dft/numint.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
warnings.warn('XC functional libraries (libxc or XCfun) are not available.')
raise

from pyscf.dft.gen_grid import BLKSIZE, NBINS, CUTOFF, make_mask
from pyscf.dft.gen_grid import BLKSIZE, NBINS, CUTOFF, ALIGNMENT_UNIT, make_mask
from pyscf.dft import xc_deriv
from pyscf import __config__

Expand Down Expand Up @@ -830,8 +830,7 @@ def _sparse_enough(screen_index):
return numpy.count_nonzero(screen_index) < screen_index.size * threshold

def _dot_ao_ao_dense(ao1, ao2, wv, out=None):
'''Returns bra.T.dot(ket) while sparsity is explicitly considered.
Note the return may have ~1e-13 difference to _dot_ao_ao.
'''Returns (bra*wv).T.dot(ket)
'''
assert ao1.flags.f_contiguous
assert ao2.flags.f_contiguous
Expand All @@ -857,13 +856,13 @@ def _dot_ao_ao_sparse(ao1, ao2, wv, nbins, screen_index, pair_mask, ao_loc,
'''Returns (bra*wv).T.dot(ket) while sparsity is explicitly considered.
Note the return may have ~1e-13 difference to _dot_ao_ao.
'''
if screen_index is None or pair_mask is None:
ngrids, nao = ao1.shape
if screen_index is None or pair_mask is None or ngrids % ALIGNMENT_UNIT != 0:
return _dot_ao_ao_dense(ao1, ao2, wv, out)

assert ao1.flags.f_contiguous
assert ao2.flags.f_contiguous
assert ao1.dtype == ao2.dtype == numpy.double
ngrids, nao = ao1.shape
nbas = screen_index.shape[1]
if out is None:
out = numpy.zeros((nao, nao), dtype=ao1.dtype)
Expand Down Expand Up @@ -898,12 +897,12 @@ def _dot_ao_dm_sparse(ao, dm, nbins, screen_index, pair_mask, ao_loc):
ao matrix, (numpy.dot(ao, dm)*ao).sum(axis=1), their value can be matched up
to ~1e-13.
'''
if screen_index is None or pair_mask is None:
ngrids, nao = ao.shape
if screen_index is None or pair_mask is None or ngrids % ALIGNMENT_UNIT != 0:
return lib.dot(dm.T, ao.T).T

assert ao.flags.f_contiguous
assert ao.dtype == dm.dtype == numpy.double
ngrids, nao = ao.shape
nbas = screen_index.shape[1]
dm = numpy.asarray(dm, order='C')
out = _empty_aligned((nao, ngrids)).T
Expand Down Expand Up @@ -936,6 +935,9 @@ def _scale_ao_sparse(ao, wv, screen_index, ao_loc, out=None):
ngrids, nao = ao.shape
comp = 1
nbas = screen_index.shape[1]
if ngrids % ALIGNMENT_UNIT != 0:
return _scale_ao(ao, wv, out=out)

if out is None:
out = _empty_aligned((nao, ngrids)).T
else:
Expand All @@ -954,13 +956,13 @@ def _contract_rho_sparse(bra, ket, screen_index, ao_loc):
'''Returns numpy.einsum('gi,gi->g', bra, ket) while sparsity is explicitly
considered. Note the return may have ~1e-13 difference to _contract_rho.
'''
if screen_index is None:
ngrids, nao = bra.shape
if screen_index is None or ngrids % ALIGNMENT_UNIT != 0:
return _contract_rho(bra, ket)

assert bra.flags.f_contiguous
assert ket.flags.f_contiguous
assert bra.dtype == ket.dtype == numpy.double
ngrids, nao = bra.shape
nbas = screen_index.shape[1]
rho = numpy.empty(ngrids)
libdft.VXCdcontract_rho_sparse(
Expand Down Expand Up @@ -2527,6 +2529,9 @@ def _uks_mgga_wv2(rho0, rho1, fxc, kxc, weight):
return wva, wvb

def _empty_aligned(shape, alignment=8):
if alignment <= 1:
return numpy.empty(shape)

size = numpy.prod(shape)
buf = numpy.empty(size + alignment - 1)
align8 = alignment * 8
Expand Down Expand Up @@ -2642,8 +2647,9 @@ def _block_loop(ni, mol, grids, nao=None, deriv=0, max_memory=2000,
# NOTE to index grids.non0tab, the blksize needs to be an integer
# multiplier of BLKSIZE
if blksize is None:
blksize = int(max_memory*1e6/((comp+1)*nao*8*BLKSIZE))*BLKSIZE
blksize = max(BLKSIZE, min(blksize, ngrids, BLKSIZE*1200))
blksize = int(max_memory*1e6/((comp+1)*nao*8*BLKSIZE))
blksize = max(4, min(blksize, ngrids//BLKSIZE+1, 1200)) * BLKSIZE
assert blksize % BLKSIZE == 0

if non0tab is None and mol is grids.mol:
non0tab = grids.non0tab
Expand All @@ -2653,6 +2659,9 @@ def _block_loop(ni, mol, grids, nao=None, deriv=0, max_memory=2000,
non0tab[:] = NBINS + 1 # Corresponding to AO value ~= 1
screen_index = non0tab

# the xxx_sparse() functions require ngrids 8-byte aligned
allow_sparse = ngrids % ALIGNMENT_UNIT == 0

if buf is None:
buf = _empty_aligned(comp * blksize * nao)
for ip0, ip1 in lib.prange(0, ngrids, blksize):
Expand All @@ -2662,7 +2671,7 @@ def _block_loop(ni, mol, grids, nao=None, deriv=0, max_memory=2000,
# TODO: pass grids.cutoff to eval_ao
ao = ni.eval_ao(mol, coords, deriv=deriv, non0tab=mask,
cutoff=grids.cutoff, out=buf)
if not _sparse_enough(mask):
if not allow_sparse and not _sparse_enough(mask):
# Unset mask for dense AO tensor. It determines which eval_rho
# to be called in make_rho
mask = None
Expand Down
9 changes: 5 additions & 4 deletions pyscf/pbc/dft/numint.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from pyscf.dft.numint import eval_mat, _dot_ao_ao, _dot_ao_dm, _tau_dot
from pyscf.dft.numint import _scale_ao, _contract_rho
from pyscf.dft.numint import OCCDROP
from pyscf.dft.gen_grid import NBINS, CUTOFF
from pyscf.dft.gen_grid import NBINS, CUTOFF, ALIGNMENT_UNIT
from pyscf.pbc.dft.gen_grid import make_mask, BLKSIZE
from pyscf.pbc.lib.kpts_helper import member, is_zero

Expand Down Expand Up @@ -966,10 +966,11 @@ def block_loop(self, cell, grids, nao=None, deriv=0, kpt=numpy.zeros(3),
grids_weights = grids.weights
ngrids = grids_coords.shape[0]
comp = (deriv+1)*(deriv+2)*(deriv+3)//6
# NOTE to index grids.non0tab, the blksize needs to be the integer multiplier of BLKSIZE
# NOTE to index grids.non0tab, blksize needs to be integer multiplier of BLKSIZE
if blksize is None:
blksize = int(max_memory*1e6/(comp*2*nao*16*BLKSIZE))*BLKSIZE
blksize = max(BLKSIZE, min(blksize, ngrids, BLKSIZE*2400))
blksize = int(max_memory*1e6/(comp*2*nao*16*BLKSIZE))
blksize = max(4, min(blksize, ngrids//BLKSIZE+1, 2400)) * BLKSIZE
assert blksize % BLKSIZE == 0
if non0tab is None:
non0tab = grids.non0tab
if non0tab is None:
Expand Down

0 comments on commit fb038b4

Please sign in to comment.