Skip to content

Commit

Permalink
Fix a bug when parsing PROCAR with spin polarsation (#32)
Browse files Browse the repository at this point in the history
* Fix a bug when parsing PROCAR with spin polarsation

The list of parsed kpoints should only be upated after parsing is
completed, otherwise the second set of spin down projections is
completely skipped.

* PROCAR normalisation to be on by default

To make it more consistent

* Add missing PROCAR files

* Some small tidy up

* Remove unused kidx_internal

* Small fix for parsed kpoints handling (so it skips a repeated kpoint in the _same_ PROCAR, if in the same (spin) section)

* Update tests (revert to previous numbers with fix, and explicitly test expected normalisation behaviour)

* Small fix for k-weighting (though rarely used) and add test

---------

Co-authored-by: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
  • Loading branch information
zhubonan and kavanase authored Sep 14, 2023
1 parent 06f3c84 commit 40318e1
Show file tree
Hide file tree
Showing 6 changed files with 18,766 additions and 45 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ POTCAR
*.check
*.cst_esp
KPOINTS
PROCAR
*.json
*.DS_Store

Expand Down
159 changes: 128 additions & 31 deletions easyunfold/procar.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
class Procar(MSONable):
"""Reader for PROCAR file"""

def __init__(self, fobjs_or_paths=None, is_soc=False):
def __init__(self, fobjs_or_paths=None, is_soc=False, normalise=True):
"""
Read the PROCAR file from a handle or path
:param fobjs_or_paths: Either a string or list of file-like objs or paths
:param is_soc: Whether the PROCAR is from a calculation with spin-orbit coupling
:param normalise: Whether to normalise the projection for every band or not
"""
self._is_soc = is_soc
self.eigenvalues = None
Expand All @@ -36,6 +37,7 @@ def __init__(self, fobjs_or_paths=None, is_soc=False):
self.proj_data = None
self.header = None
self.proj_xyz = None
self.normalise = normalise

# Read the PROCAR
if isinstance(fobjs_or_paths, (str, Path)):
Expand All @@ -61,14 +63,25 @@ def _read(self, fobj, parsed_kpoints=None):
fobj.seek(0)

line = fobj.readline()
# Counter for the number of sections in the PROCAR
section_counter = 1
_last_kid = 0
this_procar_parsed_kpoints = set() # set with tuples of parsed (kvec tuple, section_counter) for this PROCAR
while line:
if line.startswith(' k-point'):
line = re.sub(r'(\d)-', r'\1 -', line)
tokens = line.strip().split()
kvec = tuple(round(float(val), 5) for val in # tuple to make it hashable
tokens[-6:-3]) # round to 5 decimal places to ensure proper kpoint matching
if kvec not in parsed_kpoints:
parsed_kpoints.add(kvec)
_kid = int(tokens[1])

# Check if the VASP PROCAR k index decreases – if so we have entered a second section
if _kid < _last_kid:
section_counter += 1
_last_kid = _kid

kvec = tuple(round(float(val), 5) for val in tokens[-6:-3] # tuple to make it hashable
) # round to 5 decimal places to ensure proper kpoint matching
if (kvec not in parsed_kpoints and (kvec, section_counter) not in this_procar_parsed_kpoints):
this_procar_parsed_kpoints.add((kvec, section_counter))
kvecs.append(list(kvec))
kweights.append(float(tokens[-1]))
else:
Expand All @@ -77,7 +90,7 @@ def _read(self, fobj, parsed_kpoints=None):
line = fobj.readline()
continue

elif not re.search(r'[a-zA-Z]', line) and line.strip() and len(line.strip().split()) - 2 == len(self.proj_names):
elif (not re.search(r'[a-zA-Z]', line) and line.strip() and len(line.strip().split()) - 2 == len(self.proj_names)):
# only parse data if line is expected length, in case of LORBIT >= 12
proj_data.append([float(token) for token in line.strip().split()[1:-1]])

Expand Down Expand Up @@ -107,39 +120,68 @@ def _read(self, fobj, parsed_kpoints=None):

proj_data = np.array(proj_data, dtype=float)

# redetermine nkpts in case some were skipped due to already being parsed
nkpts = len(kvecs)
nkpts = len(kvecs) # redetermine nkpts in case some were skipped due to already being parsed

self.nspins = proj_data.shape[0] // (self.nion * nbands * nkpts)
self.nspins //= 4 if self._is_soc else 1
# For spin-polarised calcs, the data from the second (down) spin are located after that of the first (up) spin
# Hence, the number of spins is simply the number of sections
self.nspins = section_counter

# Reshape
occs.resize((self.nspins, nkpts, nbands))
kvecs.resize((self.nspins, nkpts, 3))
kweights.resize((self.nspins, nkpts))
eigenvalues.resize((self.nspins, nkpts, nbands))
occs.resize((self.nspins, nkpts // self.nspins, nbands))
kvecs.resize((self.nspins, nkpts // self.nspins, 3))
kweights.resize((self.nspins, nkpts // self.nspins))
eigenvalues.resize((self.nspins, nkpts // self.nspins, nbands))

# Reshape the array
if self._is_soc is False:
proj_data = proj_data.reshape((self.nspins, nkpts, nbands, self.nion, len(self.proj_names)))
proj_data = proj_data.reshape((
self.nspins,
nkpts // self.nspins,
nbands,
self.nion,
len(self.proj_names),
))
proj_xyz = None
else:
proj_data = proj_data.reshape((self.nspins, nkpts, nbands, 4, self.nion, len(self.proj_names)))
# Split the data into xyz projection and total
proj_xyz = proj_data[:, :, :, 1:, :, :]
proj_data = proj_data[:, :, :, 0, :, :]

# normalise: (for each nspin, nkpt, nband, the sum of the projections over nion and proj_names should be 1)
proj_sum = np.sum(proj_data, axis=(-2, -1), keepdims=True)
proj_sum[proj_sum == 0] = 1 # just in case, avoid division by zero
proj_data /= proj_sum
if self.normalise:
self.normalise_projs(proj_data)

if proj_xyz is not None:
proj_sum = np.sum(proj_xyz, axis=(-3, -2, -1), keepdims=True)
proj_sum[proj_sum == 0] = 1
proj_xyz /= proj_sum

return self.nspins, occs, kvecs, kweights, eigenvalues, proj_data, proj_xyz, parsed_kpoints
# Update the parsed kpoints
parsed_kpoints.update({kvec_section_counter_tuple[0] for kvec_section_counter_tuple in this_procar_parsed_kpoints})

return (
self.nspins,
occs,
kvecs,
kweights,
eigenvalues,
proj_data,
proj_xyz,
parsed_kpoints,
)

def normalise_projs(self, proj_data):
"""
Normalise the projections
For each nspin, nkpt, nband, normalise the sum of projections over nion and proj_names to be 1.
Atomic & orbital projections do not sum to 1 in most cases in VASP, as only those falling inside
the atomic radii and overlapping with spd spherical harmonics are counted.
"""
proj_sum = np.sum(proj_data, axis=(-2, -1), keepdims=True)
proj_sum[proj_sum == 0] = 1 # just in case, avoid division by zero
proj_data /= proj_sum
self.normalise = True

def _read_header_nion_proj_names(self, fobj):
"""Read the header, nion and proj_names from the PROCAR"""
Expand Down Expand Up @@ -174,8 +216,16 @@ def open_file(fobj_or_path):
self._read_header_nion_proj_names(fobj)

current_nspins = self.nspins # check spin consistency between PROCARs
nspins, occs, kvecs, kweights, eigenvalues, proj_data, proj_xyz, parsed_kpoints = self._read(fobj,
parsed_kpoints=parsed_kpoints)
(
nspins,
occs,
kvecs,
kweights,
eigenvalues,
proj_data,
proj_xyz,
parsed_kpoints,
) = self._read(fobj, parsed_kpoints=parsed_kpoints)
if current_nspins is not None and current_nspins != nspins:
raise ValueError(f'Mismatch in number of spins in PROCARs supplied: ({nspins} vs {current_nspins})!')

Expand All @@ -199,12 +249,36 @@ def open_file(fobj_or_path):
for i, arr in enumerate(array_list):
if arr is not None and arr.shape[2] < max_nbands:
if len(arr.shape) == 3: # occs_list, eigenvalues_list
array_list[i] = np.pad(arr, ((0, 0), (0, 0), (0, max_nbands - arr.shape[2])), mode='constant')
array_list[i] = np.pad(
arr,
((0, 0), (0, 0), (0, max_nbands - arr.shape[2])),
mode='constant',
)
elif len(arr.shape) == 5: # proj_xyz_list
array_list[i] = np.pad(arr, ((0, 0), (0, 0), (0, max_nbands - arr.shape[2]), (0, 0), (0, 0)), mode='constant')
array_list[i] = np.pad(
arr,
(
(0, 0),
(0, 0),
(0, max_nbands - arr.shape[2]),
(0, 0),
(0, 0),
),
mode='constant',
)
elif len(arr.shape) == 6: # proj_xyz_list
array_list[i] = np.pad(arr, ((0, 0), (0, 0), (0, max_nbands - arr.shape[2]), (0, 0), (0, 0), (0, 0)),
mode='constant')
array_list[i] = np.pad(
arr,
(
(0, 0),
(0, 0),
(0, max_nbands - arr.shape[2]),
(0, 0),
(0, 0),
(0, 0),
),
mode='constant',
)
else:
raise ValueError('Unexpected array shape encountered!')

Expand Down Expand Up @@ -239,12 +313,19 @@ def get_projection(self, atom_idx: List[int], proj: Union[List[str], str], weigh
proj = [
proj,
]

# replace any instance of "p" with "px,py,pz" and "d" with "dxy,dyz,dz2,dxz,dx2-y2"
def _replace_p_d(single_proj):
if single_proj == 'p':
return ['px', 'py', 'pz']
if single_proj == 'd':
return ['dxy', 'dyz', 'dz2', 'dxz', 'x2-y2'] # dx2-y2 labelled differently in VASP
return [
'dxy',
'dyz',
'dz2',
'dxz',
'x2-y2',
] # dx2-y2 labelled differently in VASP
# PROCAR

return [single_proj]
Expand All @@ -259,15 +340,31 @@ def _replace_p_d(single_proj):

if weight_by_k:
for kidx in range(self.nkpts):
out[:, kidx, :] *= self.kweights[kidx]
out[:, kidx, :] *= self.kweights[:, kidx]
return out

def as_dict(self) -> dict:
"""Convert the object into a dictionary representation (so it can be saved to json)"""
output = {'@module': self.__class__.__module__, '@class': self.__class__.__name__, '@version': __version__}
output = {
'@module': self.__class__.__module__,
'@class': self.__class__.__name__,
'@version': __version__,
}
for key in [
'_is_soc', 'eigenvalues', 'kvecs', 'kweights', 'nbands', 'nkpts', 'nspins', 'nion', 'occs', 'proj_names', 'proj_data',
'header', 'proj_xyz'
'_is_soc',
'eigenvalues',
'kvecs',
'kweights',
'nbands',
'nkpts',
'nspins',
'nion',
'occs',
'proj_names',
'proj_data',
'header',
'proj_xyz',
'normalise',
]:
output[key] = getattr(self, key)
return output
Expand Down
12 changes: 7 additions & 5 deletions easyunfold/unfold.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def load_procars(self, procars: Union[str, List[str]]):

# Load the procars
# Note that this method should be generalised for non-VASP as well.
self.transient_quantities['procars'] = Procar(procars)
self.transient_quantities['procars'] = Procar(procars, normalise=True)
# Construct mapping from the primitive cell kpoints to those in the PROCAR
self.transient_quantities['procars_kmap'] = self._construct_procar_kmap()

Expand Down Expand Up @@ -507,10 +507,12 @@ def _get_spectral_weights(self,
return sws, e0, spectral_function
return sws

def get_band_weight_sets(self,
atoms_idx: List[int],
orbitals: List[Union[List[str], str]],
procars: Union[None, List[str], str] = None) -> list:
def get_band_weight_sets(
self,
atoms_idx: List[int],
orbitals: List[Union[List[str], str]],
procars: Union[None, List[str], str] = None,
) -> list:
"""
Get weights array sets for bands
Expand Down
Loading

0 comments on commit 40318e1

Please sign in to comment.