Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented the ECP part and added several missing factors in the TREXIO interface. #68

Merged
merged 7 commits into from
Nov 15, 2024
96 changes: 90 additions & 6 deletions pyscf/tools/test/test_trexio.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,114 @@
import pyscf
from pyscf.tools import trexio

def test_mol():
filename = 'test.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='6-31g**')
def test_mol_ae_6_31g():
filename = 'test_mol_ae_6_31g_sphe.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='6-31g**', cart=False)
ref = mol.intor('int1e_ovlp')
trexio.to_trexio(mol, filename)
mol = trexio.mol_from_trexio(filename)
s1 = mol.intor('int1e_ovlp')
assert abs(ref - s1).max() < 1e-12

filename = 'test_mol_ae_6_31g_cart.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='6-31g**', cart=True)
ref = mol.intor('int1e_ovlp')
trexio.to_trexio(mol, filename)
mol = trexio.mol_from_trexio(filename)
s1 = mol.intor('int1e_ovlp')
assert abs(ref - s1).max() < 1e-12

def test_mf():
filename = 'test1.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='6-31g*')
def test_mol_ccecp_ccpvqz():
filename = 'test_mol_ccecp_ccpvqz_sphe.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='ccecp-cc-pVQZ', ecp='ccECP', cart=False)
# ref = mol.intor('int1e_ovlp')
trexio.to_trexio(mol, filename)
''' Todo: mol_from_trexio for ecp is yet implemented.
mol = trexio.mol_from_trexio(filename)
s1 = mol.intor('int1e_ovlp')
assert abs(ref - s1).max() < 1e-12
'''

filename = 'test_mol_ccecp_ccpvqz_cart.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='ccecp-cc-pVQZ', ecp='ccECP', cart=True)
# ref = mol.intor('int1e_ovlp')
trexio.to_trexio(mol, filename)
''' Todo: mol_from_trexio for ecp is yet implemented.
mol = trexio.mol_from_trexio(filename)
s1 = mol.intor('int1e_ovlp')
assert abs(ref - s1).max() < 1e-12
'''

''' Todo: it does not work due to the internal contraction in PySCF
def test_mol_ae_ccpvdz():
filename = 'test_mol_ae_ccpvdz_sphe.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='ccpvdz', cart=False)
ref = mol.intor('int1e_ovlp')
trexio.to_trexio(mol, filename)
mol = trexio.mol_from_trexio(filename)
s1 = mol.intor('int1e_ovlp')
assert abs(ref - s1).max() < 1e-12

filename = 'test_mol_ae_ccpvdz_cart.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='ccpvdz', cart=True)
ref = mol.intor('int1e_ovlp')
trexio.to_trexio(mol, filename)
mol = trexio.mol_from_trexio(filename)
s1 = mol.intor('int1e_ovlp')
assert abs(ref - s1).max() < 1e-12
'''

def test_mf_ae_6_31g():
filename = 'test_mf_ae_6_31g_sphe.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='6-31g*', cart=False)
mf = mol.RHF().run()
print(mf.mo_coeff)
trexio.to_trexio(mf, filename)
mf1 = trexio.scf_from_trexio(filename)
assert abs(mf1.mo_coeff - mf.mo_coeff).max() < 1e-12
trexio.write_eri(mf._eri, filename)
eri = trexio.read_eri(filename)
assert abs(mf._eri - eri).max() < 1e-12

filename = 'test_mf_ae_6_31g_cart.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='6-31g*', cart=True)
mf = mol.RHF().run()
print(mf.mo_coeff)
trexio.to_trexio(mf, filename)
mf1 = trexio.scf_from_trexio(filename)
assert abs(mf1.mo_coeff - mf.mo_coeff).max() < 1e-12
trexio.write_eri(mf._eri, filename)
eri = trexio.read_eri(filename)
assert abs(mf._eri - eri).max() < 1e-12

def test_mf_ccecp_ccpvqz():
filename = 'test_mf_ccecp_ccpvqz_sphe.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='ccecp-cc-pVQZ', ecp='ccECP', cart=False)
mf = mol.RHF().run()
trexio.to_trexio(mf, filename)
''' Todo: scf_from_trexio for ecp is yet implemented.
mf1 = trexio.scf_from_trexio(filename)
assert abs(mf1.mo_coeff - mf.mo_coeff).max() < 1e-12
trexio.write_eri(mf._eri, filename)
eri = trexio.read_eri(filename)
assert abs(mf._eri - eri).max() < 1e-12
'''

filename = 'test_mf_ccecp_ccpvqz_cart.h5'
mol = pyscf.M(atom='H 0 0 0; F 0 0 1', basis='ccecp-cc-pVQZ', ecp='ccECP', cart=True)
mf = mol.RHF().run()
trexio.to_trexio(mf, filename)
''' Todo: scf_from_trexio for ecp is yet implemented.
mf1 = trexio.scf_from_trexio(filename)
assert abs(mf1.mo_coeff - mf.mo_coeff).max() < 1e-12
trexio.write_eri(mf._eri, filename)
eri = trexio.read_eri(filename)
assert abs(mf._eri - eri).max() < 1e-12
'''

if __name__ == "__main__":
#test_mol_ae_6_31g()
#test_mol_ccecp_ccpvqz()
#test_mol_ae_ccpvdz()
#test_mf_ae_6_31g()
test_mf_ccecp_ccpvqz()
157 changes: 146 additions & 11 deletions pyscf/tools/trexio.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
https://github.com/TREX-CoE/trexio/blob/master/python/README.md
'''

import re
import numpy as np
import pyscf
from pyscf import lib
from pyscf import gto
from pyscf import scf
Expand All @@ -27,15 +29,39 @@ def to_trexio(obj, filename, backend='h5'):
raise NotImplementedError(f'Conversion function for {obj.__class__}')

def _mol_to_trexio(mol, trexio_file):
trexio.write_nucleus_num(trexio_file, mol.natm)
# 1 Metadata
trexio.write_metadata_code_num(trexio_file, 1)
trexio.write_metadata_code(trexio_file, [f'PySCF-v{pyscf.__version__}'])
#trexio.write_metadata_package_version(trexio_file, f'TREXIO-v{trexio.__version__}')

# 2 System
trexio.write_nucleus_num(trexio_file, mol.natm)
nucleus_charge = [mol.atom_charge(i) for i in range(mol.natm)]
trexio.write_nucleus_charge(trexio_file, nucleus_charge)
trexio.write_nucleus_coord(trexio_file, mol.atom_coords())
labels = [mol.atom_pure_symbol(i) for i in range(mol.natm)]
trexio.write_nucleus_label(trexio_file, labels)
if mol._ecp:
raise NotImplementedError
trexio.write_nucleus_charge(trexio_file, mol.atom_charges())
trexio.write_nucleus_coord(trexio_file, mol.atom_coords())
if mol.symmetry:
trexio.write_nucleus_point_group(trexio_file, mol.groupname)

# 2.3 Periodic boundary calculations
try:
kousuke-nakano marked this conversation as resolved.
Show resolved Hide resolved
mol.a
periodic = True
raise NotImplementedError('PBC is not supported yet.')
# 2.2 Cell

except AttributeError:
periodic = False
trexio.write_pbc_periodic(trexio_file, periodic)

# 2.4 Electron (electron group)
electron_up_num, electron_dn_num = mol.nelec
trexio.write_electron_num(trexio_file, electron_up_num + electron_dn_num )
trexio.write_electron_up_num(trexio_file, electron_up_num)
trexio.write_electron_dn_num(trexio_file, electron_dn_num)

# 3.1 Basis set
trexio.write_basis_type(trexio_file, 'Gaussian')
trexio.write_basis_shell_num(trexio_file, mol.nbas)
trexio.write_basis_prim_num(trexio_file, int(mol._bas[:,gto.NPRIM_OF].sum()))
Expand All @@ -45,22 +71,132 @@ def _mol_to_trexio(mol, trexio_file):
prim2sh = [[ib]*nprim for ib, nprim in enumerate(mol._bas[:,gto.NPRIM_OF])]
trexio.write_basis_shell_index(trexio_file, np.hstack(prim2sh))
trexio.write_basis_exponent(trexio_file, np.hstack(mol.bas_exps()))
assert all(mol._bas[:,gto.NCTR_OF] == 1)
if not all(mol._bas[:,gto.NCTR_OF] == 1):
raise NotImplementedError('The generalized contraction is not supported.')
coef = [mol.bas_ctr_coeff(i).ravel() for i in range(mol.nbas)]
trexio.write_basis_coefficient(trexio_file, np.hstack(coef))
prim_norms = [gto.gto_norm(mol.bas_angular(i), mol.bas_exp(i))
for i in range(mol.nbas)]
prim_norms = [gto.gto_norm(mol.bas_angular(i), mol.bas_exp(i)) for i in range(mol.nbas)]
trexio.write_basis_prim_factor(trexio_file, np.hstack(prim_norms))

if mol.symmetry:
trexio.write_nucleus_point_group(trexio_file, mol.groupname)
# 3.2 Effective core potentials (ecp group)
if len(mol._pseudo) > 0:
raise NotImplementedError(
"TREXIO does not support 'pseudo' format. Please use 'ecp'"
)

if mol._ecp:
# internal format of pyscf is described in
# https://pyscf.org/pyscf_api_docs/pyscf.gto.html?highlight=ecp#module-pyscf.gto.ecp

ecp_num = 0
ecp_max_ang_mom_plus_1 = []
ecp_z_core = []
ecp_nucleus_index = []
ecp_ang_mom = []
ecp_coefficient = []
ecp_exponent = []
ecp_power = []

chemical_symbol_list = [mol.atom_pure_symbol(i) for i in range(mol.natm)]
atom_symbol_list = [mol.atom_symbol(i) for i in range(mol.natm)]

for nuc_index, (chemical_symbol, atom_symbol) in enumerate(
zip(chemical_symbol_list, atom_symbol_list)
):
if re.match(r"X-.*", chemical_symbol) or re.match(r"X-.*", atom_symbol):
ecp_num += 1
ecp_max_ang_mom_plus_1.append(1)
ecp_z_core.append(0)
ecp_nucleus_index.append(nuc_index)
ecp_ang_mom.append(0)
ecp_coefficient.append(0.0)
ecp_exponent.append(1.0)
ecp_power.append(0)
continue

# atom_symbol is superior to atom_pure_symbol
try:
z_core, ecp_list = mol._ecp[atom_symbol]
except KeyError:
z_core, ecp_list = mol._ecp[chemical_symbol]

# ecp zcore
ecp_z_core.append(z_core)

# max_ang_mom
max_ang_mom = max([ecp[0] for ecp in ecp_list])
if max_ang_mom == -1:
# Special treatments are needed for H and He.
# PySCF database does not define the ul-s part for these elements.
dummy_ul_s_part = True
max_ang_mom = 0
max_ang_mom_plus_1 = 1
else:
dummy_ul_s_part = False
max_ang_mom_plus_1 = max_ang_mom + 1

ecp_max_ang_mom_plus_1.append(max_ang_mom_plus_1)

for ecp in ecp_list:
ang_mom = ecp[0]
if ang_mom == -1:
ang_mom = max_ang_mom_plus_1
for r, exp_coeff_list in enumerate(ecp[1]):
for exp_coeff in exp_coeff_list:
exp, coeff = exp_coeff
ecp_num += 1
ecp_nucleus_index.append(nuc_index)
ecp_ang_mom.append(ang_mom)
ecp_coefficient.append(coeff)
ecp_exponent.append(exp)
ecp_power.append(r - 2)

if dummy_ul_s_part:
# A dummy ECP is put for the ul-s part here for H and He atoms.
ecp_num += 1
ecp_nucleus_index.append(nuc_index)
ecp_ang_mom.append(0)
ecp_coefficient.append(0.0)
ecp_exponent.append(1.0)
ecp_power.append(0)

trexio.write_ecp_num(trexio_file, ecp_num)
trexio.write_ecp_max_ang_mom_plus_1(trexio_file, ecp_max_ang_mom_plus_1)
trexio.write_ecp_z_core(trexio_file, ecp_z_core)
trexio.write_ecp_nucleus_index(trexio_file, ecp_nucleus_index)
trexio.write_ecp_ang_mom(trexio_file, ecp_ang_mom)
trexio.write_ecp_coefficient(trexio_file, ecp_coefficient)
trexio.write_ecp_exponent(trexio_file, ecp_exponent)
trexio.write_ecp_power(trexio_file, ecp_power)

# 4.1 Atomic orbitals (ao group)
if mol.cart:
trexio.write_ao_cartesian(trexio_file, 1)
ao_shell = []
ao_normalization = []
for i, ang_mom in enumerate(mol._bas[:,gto.ANG_OF]):
ao_shell += [i] * int((ang_mom+1) * (ang_mom+2) / 2)
# note: PySCF(libintc) normalizes s and p only.
if ang_mom == 0 or ang_mom == 1:
ao_normalization += [float(np.sqrt((2*ang_mom+1)/(4*np.pi)))] * int((ang_mom+1) * (ang_mom+2) / 2)
else:
ao_normalization += [1.0] * int((ang_mom+1) * (ang_mom+2) / 2)
else:
trexio.write_ao_cartesian(trexio_file, 0)
ao_shell = []
ao_normalization = []
for i, ang_mom in enumerate(mol._bas[:,gto.ANG_OF]):
ao_shell += [i] * (2*ang_mom+1)
# note: TREXIO employs the solid harmonics notation,; thus, we need these factors.
ao_normalization += [float(np.sqrt((2*ang_mom+1)/(4*np.pi)))] * (2*ang_mom+1)

trexio.write_ao_num(trexio_file, int(mol.nao))
trexio.write_ao_shell(trexio_file, ao_shell)
trexio.write_ao_normalization(trexio_file, ao_normalization)


def _scf_to_trexio(mf, trexio_file):
# 4.2 Molecular orbitals (mo group)
mol = mf.mol
_mol_to_trexio(mol, trexio_file)
if isinstance(mf, scf.uhf.UHF):
Expand All @@ -78,7 +214,6 @@ def _scf_to_trexio(mf, trexio_file):
mo_type = 'RHF'
trexio.write_mo_type(trexio_file, mo_type)
idx = _order_ao_index(mf.mol)
trexio.write_ao_num(trexio_file, int(mol.nao))
trexio.write_mo_num(trexio_file, mo_energy.size)
trexio.write_mo_coefficient(trexio_file, mo[idx].T.ravel())
trexio.write_mo_energy(trexio_file, mo_energy)
Expand Down
Loading