From 793d481f5d478baad18bd86cc5e05c4b00727b3b Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Mon, 27 May 2024 17:03:45 +0100 Subject: [PATCH 01/10] Return per site concentrations as percentages in `get_equilibrium_concentrations` --- doped/core.py | 3 ++- doped/thermodynamics.py | 21 +++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/doped/core.py b/doped/core.py index ab84e5d1..8a6c27fe 100644 --- a/doped/core.py +++ b/doped/core.py @@ -1085,7 +1085,8 @@ def equilibrium_concentration( referenced to the VBM. Default is 0 (i.e. the VBM). per_site (bool): Whether to return the concentration as fractional concentration per site, - rather than the default of per cm^3. (default: False) + rather than the default of per cm^3. Multiply by 100 for concentration in + percent. (default: False) symprec (float): Symmetry tolerance for ``spglib`` to use when determining relaxed defect point symmetries and thus orientational degeneracies. Default is ``0.1`` diff --git a/doped/thermodynamics.py b/doped/thermodynamics.py index 5b3dbc59..bc5da28a 100644 --- a/doped/thermodynamics.py +++ b/doped/thermodynamics.py @@ -1227,7 +1227,7 @@ def get_equilibrium_concentrations( states (e.g. ``v_Cd_0``, ``v_Cd_-1``, ``v_Cd_-2`` instead of ``v_Cd``). (default: True) per_site (bool): - Whether to return the concentrations as fractional concentrations per site, + Whether to return the concentrations as percent concentrations per site, rather than the default of per cm^3. (default: False) skip_formatting (bool): Whether to skip formatting the defect charge states and concentrations as @@ -1252,7 +1252,7 @@ def get_equilibrium_concentrations( fermi_level=fermi_level, vbm=defect_entry.calculation_metadata.get("vbm", self.vbm), ) - concentration = defect_entry.equilibrium_concentration( + raw_concentration = defect_entry.equilibrium_concentration( chempots=chempots, limit=limit, el_refs=el_refs, @@ -1261,6 +1261,17 @@ def get_equilibrium_concentrations( temperature=temperature, per_site=per_site, ) + if skip_formatting: + concentration = raw_concentration + + elif per_site: + concentration = ( + f"{raw_concentration:.3%}" # greater than 0.001% + if raw_concentration > 1e-5 + else f"{raw_concentration * 100:.3e} %" + ) + else: + concentration = f"{raw_concentration:.3e}" energy_concentration_list.append( { "Defect": defect_entry.name.rsplit("_", 1)[0], # name without charge @@ -1271,10 +1282,8 @@ def get_equilibrium_concentrations( else f"{'+' if defect_entry.charge_state > 0 else ''}{defect_entry.charge_state}" ), "Formation Energy (eV)": round(formation_energy, 3), - "Raw Concentration": concentration, - "Concentration (per site)" if per_site else "Concentration (cm^-3)": ( - concentration if skip_formatting else f"{concentration:.3e}" - ), + "Raw Concentration": raw_concentration, + "Concentration (per site)" if per_site else "Concentration (cm^-3)": concentration, } ) From 8e0ec578c517fbd68d9fa6592e3516ffcc4e3abd Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Mon, 27 May 2024 14:13:09 -0400 Subject: [PATCH 02/10] Add `bulk_site_concentration` property and tests --- doped/core.py | 15 ++++++++++++--- tests/test_analysis.py | 4 ++++ tests/test_generation.py | 7 +++++++ tests/test_thermodynamics.py | 13 ++++++++++--- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/doped/core.py b/doped/core.py index 8a6c27fe..2c3e9815 100644 --- a/doped/core.py +++ b/doped/core.py @@ -1150,10 +1150,19 @@ def equilibrium_concentration( if per_site: return exp_factor * degeneracy_factor - volume_in_cm3 = self.defect.structure.volume * 1e-24 # convert volume in Å^3 to cm^3 - with np.errstate(over="ignore"): - return self.defect.multiplicity * degeneracy_factor * exp_factor / volume_in_cm3 + return self.bulk_site_concentration * degeneracy_factor * exp_factor + + @property + def bulk_site_concentration(self): + """ + Return the site concentration (in cm^-3) of the corresponding atomic + site of the defect in the pristine bulk material (e.g. if the defect is + V_O in SrTiO3, returns the site concentration of (symmetry-equivalent) + oxygen atoms in SrTiO3). + """ + volume_in_cm3 = self.defect.structure.volume * 1e-24 # convert volume in Å^3 to cm^3 + return self.defect.multiplicity / volume_in_cm3 def __repr__(self): """ diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 98aefa41..782b8fc4 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -970,6 +970,10 @@ def test_SrTiO3_diff_ISYM_bulk_defect(self): assert dp.defect_dict["vac_O_0"].calculation_metadata["relaxed point symmetry"] == "C2v" thermo = dp.get_defect_thermodynamics() + # hardcoded check of bulk_site_concentration property: + for defect_entry in dp.defect_dict.values(): # oxygen site concentration + assert np.isclose(defect_entry.bulk_site_concentration, 5.06752e22, rtol=1e-4) + print(thermo.get_symmetries_and_degeneracies()) return thermo.plot() diff --git a/tests/test_generation.py b/tests/test_generation.py index d58650d3..7fcec6c6 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -1772,6 +1772,13 @@ def CdTe_defect_gen_check(self, CdTe_defect_gen, generate_supercell=True): + "\n---------------------------------------------------------\n" + self.CdTe_defect_gen_info ) + for defect_entry in CdTe_defect_gen.defect_entries.values(): + if defect_entry.defect.defect_type != DefectType.Interstitial: + assert np.isclose(defect_entry.bulk_site_concentration, 1e24 / self.prim_cdte.volume) + else: + assert np.isclose( + defect_entry.bulk_site_concentration, 1e24 / self.prim_cdte.volume + ) or np.isclose(defect_entry.bulk_site_concentration, 4e24 / self.prim_cdte.volume) # explicitly test defect entry charge state log: assert CdTe_defect_gen.defect_entries["v_Cd_-1"].charge_state_guessing_log == [ diff --git a/tests/test_thermodynamics.py b/tests/test_thermodynamics.py index d8ddd13f..0f403c33 100644 --- a/tests/test_thermodynamics.py +++ b/tests/test_thermodynamics.py @@ -1445,7 +1445,6 @@ def test_parse_chempots_CdTe(self): self._check_chempots_dict(self.CdTe_defect_thermo.chempots) assert self.CdTe_defect_thermo.chempots == self.CdTe_chempots assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"] - self.CdTe_defect_thermo.el_refs = self.CdTe_chempots["elemental_refs"] self._check_chempots_dict(self.CdTe_defect_thermo.chempots) assert self.CdTe_defect_thermo.chempots == self.CdTe_chempots # the same assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"] # the same @@ -1460,7 +1459,6 @@ def test_parse_chempots_CdTe(self): assert self.CdTe_defect_thermo.chempots == semi_manual_chempots_dict assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"] - self.CdTe_defect_thermo.el_refs = self.CdTe_chempots["elemental_refs"] self._check_chempots_dict(self.CdTe_defect_thermo.chempots) assert self.CdTe_defect_thermo.chempots == semi_manual_chempots_dict # the same assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"] # the same @@ -1477,7 +1475,6 @@ def test_parse_chempots_CdTe(self): ] assert self.CdTe_defect_thermo.chempots == manual_zeroed_rel_chempots_dict assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"] # unchanged - self.CdTe_defect_thermo.el_refs = self.CdTe_chempots["elemental_refs"] self._check_chempots_dict(self.CdTe_defect_thermo.chempots) assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"] # the same assert self.CdTe_defect_thermo.chempots == manual_zeroed_rel_chempots_dict # the same @@ -1834,6 +1831,16 @@ def test_formation_energy_mult_degen(self): ) assert np.isclose(new_conc, orig_conc * 21) + # test per_site and bulk_site_concentration attributes: + new_conc = random_defect_entry.equilibrium_concentration( + chempots=self.CdTe_chempots, + limit="Cd-rich", + fermi_level=fermi_level, + temperature=temperature, + per_site=True, + ) + assert np.isclose(orig_conc, new_conc * random_defect_entry.bulk_site_concentration) + @custom_mpl_image_compare(filename="CdTe_duplicate_entry_names.png") def test_handling_duplicate_entry_names(self): """ From a62743c252ddfccc84a1500ac3b1cce4fb8343cf Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Tue, 28 May 2024 11:45:48 -0400 Subject: [PATCH 03/10] Ensure 'no chempots' warning only shown once for concentration functions, add `per_charge`, `per_site` and `skip_formatting` options to `get_quenched_fermi_level_and_concentrations` as well, plus some general efficiency/robustness updates, and add tests --- doped/thermodynamics.py | 259 ++++++++++++++++++++++++++++------------ tests/test_analysis.py | 158 +++++++++++++++++++++++- 2 files changed, 336 insertions(+), 81 deletions(-) diff --git a/doped/thermodynamics.py b/doped/thermodynamics.py index bc5da28a..f0da71f3 100644 --- a/doped/thermodynamics.py +++ b/doped/thermodynamics.py @@ -1231,7 +1231,7 @@ def get_equilibrium_concentrations( rather than the default of per cm^3. (default: False) skip_formatting (bool): Whether to skip formatting the defect charge states and concentrations as - strings (and keep as ints and floats instead). (default: False) + strings (and keep as ``int``\s and ``float``\s instead). (default: False) Returns: ``pandas`` ``DataFrame`` of defect concentrations (and formation energies) @@ -1261,17 +1261,7 @@ def get_equilibrium_concentrations( temperature=temperature, per_site=per_site, ) - if skip_formatting: - concentration = raw_concentration - - elif per_site: - concentration = ( - f"{raw_concentration:.3%}" # greater than 0.001% - if raw_concentration > 1e-5 - else f"{raw_concentration * 100:.3e} %" - ) - else: - concentration = f"{raw_concentration:.3e}" + energy_concentration_list.append( { "Defect": defect_entry.name.rsplit("_", 1)[0], # name without charge @@ -1283,7 +1273,11 @@ def get_equilibrium_concentrations( ), "Formation Energy (eV)": round(formation_energy, 3), "Raw Concentration": raw_concentration, - "Concentration (per site)" if per_site else "Concentration (cm^-3)": concentration, + ( + "Concentration (per site)" if per_site else "Concentration (cm^-3)" + ): _format_concentration( + raw_concentration, per_site=per_site, skip_formatting=skip_formatting + ), } ) @@ -1294,24 +1288,14 @@ def get_equilibrium_concentrations( "Raw Concentration" ].transform("sum") conc_df["Charge State Population"] = conc_df["Charge State Population"].apply( - lambda x: f"{x:.1%}" + lambda x: f"{x:.2%}" ) conc_df = conc_df.drop(columns=["Raw Charge", "Raw Concentration"]) return conc_df.reset_index(drop=True) # group by defect and sum concentrations: - summed_df = conc_df.groupby("Defect").sum(numeric_only=True) - summed_df[next(k for k in conc_df.columns if k.startswith("Concentration"))] = ( - summed_df["Raw Concentration"] - if skip_formatting - else summed_df["Raw Concentration"].apply(lambda x: f"{x:.3e}") - ) - return summed_df.drop( - columns=[ - i - for i in ["Charge", "Formation Energy (eV)", "Raw Charge", "Raw Concentration"] - if i in summed_df.columns - ] + return _group_defect_charge_state_concentrations( + conc_df, per_site=per_site, skip_formatting=skip_formatting ) def _parse_fermi_dos(self, bulk_dos_vr: Union[str, Vasprun, FermiDos]): @@ -1442,6 +1426,9 @@ def get_equilibrium_fermi_level( chempots, el_refs = self._get_chempots( chempots, el_refs ) # returns self.chempots/self.el_refs if chempots is None + if chempots is None: # handle once here, as otherwise brentq function results in this being + # called many times + _no_chempots_warning() def _get_total_q(fermi_level): conc_df = self.get_equilibrium_concentrations( @@ -1458,6 +1445,8 @@ def _get_total_q(fermi_level): with warnings.catch_warnings(): warnings.filterwarnings("ignore", "overflow") # ignore overflow warnings + warnings.filterwarnings("ignore", "No chemical potentials") # ignore chempots warning, + # as given once above eq_fermi_level: float = brentq(_get_total_q, -1.0, self.band_gap + 1.0) # type: ignore if return_concs: @@ -1477,6 +1466,9 @@ def get_quenched_fermi_level_and_concentrations( annealing_temperature: float = 1000, quenched_temperature: float = 300, delta_gap: float = 0, + per_charge: bool = True, + per_site: bool = False, + skip_formatting: bool = False, **kwargs, ) -> tuple[float, float, float, pd.DataFrame]: r""" @@ -1601,6 +1593,16 @@ def get_quenched_fermi_level_and_concentrations( about the VBM and CBM (i.e. assuming equal up/downshifts of the band-edges around their original eigenvalues) while the defect levels remain fixed. (Default: 0) + per_charge (bool): + Whether to break down the defect concentrations into individual defect charge + states (e.g. ``v_Cd_0``, ``v_Cd_-1``, ``v_Cd_-2`` instead of ``v_Cd``). + (default: True) + per_site (bool): + Whether to return the concentrations as percent concentrations per site, + rather than the default of per cm^3. (default: False) + skip_formatting (bool): + Whether to skip formatting the defect charge states and concentrations as + strings (and keep as ``int``\s and ``float``\s instead). (default: False) **kwargs: Additional keyword arguments to pass to ``scissor_dos`` (if ``delta_gap`` is not 0). @@ -1611,12 +1613,19 @@ def get_quenched_fermi_level_and_concentrations( quenched defect concentrations (in cm^-3). (fermi_level, e_conc, h_conc, conc_df) """ + if kwargs and any(i not in ["verbose", "tol"] for i in kwargs): + raise ValueError(f"Invalid keyword arguments: {', '.join(kwargs.keys())}") + # TODO: Update docstrings after `py-sc-fermi` interface written, to point toward it for more # advanced analysis fermi_dos = self._parse_fermi_dos(bulk_dos_vr) chempots, el_refs = self._get_chempots( chempots, el_refs ) # returns self.chempots/self.el_refs if chempots is None + if chempots is None: # handle once here, as otherwise brentq function results in this being + # called many times + _no_chempots_warning() + annealing_dos = ( fermi_dos if delta_gap == 0 @@ -1627,67 +1636,117 @@ def get_quenched_fermi_level_and_concentrations( tol=kwargs.get("tol", 1e-8), ) ) - annealing_fermi_level = self.get_equilibrium_fermi_level( - annealing_dos, - chempots=chempots, - limit=limit, - el_refs=el_refs, - temperature=annealing_temperature, - return_concs=False, - ) - annealing_defect_concentrations = self.get_equilibrium_concentrations( - chempots=chempots, - limit=limit, - el_refs=el_refs, - fermi_level=annealing_fermi_level, # type: ignore - temperature=annealing_temperature, - per_charge=False, # give total concentrations for each defect - skip_formatting=True, - ) - annealing_defect_concentrations = annealing_defect_concentrations.rename( - columns={"Concentration (cm^-3)": "Total Concentration (cm^-3)"} - ) - def _get_constrained_total_q(fermi_level, return_conc_df=False): - conc_df = self.get_equilibrium_concentrations( + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "overflow") # ignore overflow warnings + warnings.filterwarnings("ignore", "No chemical potentials") # ignore chempots warning, + # as given once above + + annealing_fermi_level = self.get_equilibrium_fermi_level( + annealing_dos, chempots=chempots, limit=limit, el_refs=el_refs, - temperature=quenched_temperature, - fermi_level=fermi_level, + temperature=annealing_temperature, + return_concs=False, + ) + annealing_defect_concentrations = self.get_equilibrium_concentrations( + chempots=chempots, + limit=limit, + el_refs=el_refs, + fermi_level=annealing_fermi_level, # type: ignore + temperature=annealing_temperature, + per_charge=False, # give total concentrations for each defect skip_formatting=True, ) + annealing_defect_concentrations = annealing_defect_concentrations.rename( + columns={"Concentration (cm^-3)": "Total Concentration (cm^-3)"} + ) + + def _get_constrained_concentrations( + fermi_level, per_charge=True, per_site=False, skip_formatting=False + ): + conc_df = self.get_equilibrium_concentrations( + chempots=chempots, + limit=limit, + el_refs=el_refs, + temperature=quenched_temperature, + fermi_level=fermi_level, + skip_formatting=True, + ) + + conc_df = conc_df.merge(annealing_defect_concentrations, on="Defect") + conc_df["Concentration (cm^-3)"] = ( # set total concentration to match annealing conc + conc_df["Concentration (cm^-3)"] # but with same relative concentrations + / conc_df.groupby("Defect")["Concentration (cm^-3)"].transform("sum") + ) * conc_df["Total Concentration (cm^-3)"] + + if not per_charge: + conc_df = _group_defect_charge_state_concentrations( + conc_df, per_site, skip_formatting=True + ) + + if per_site: + cm3_conc_df = self.get_equilibrium_concentrations( + chempots=chempots, + limit=limit, + el_refs=el_refs, + temperature=quenched_temperature, + fermi_level=fermi_level, + skip_formatting=True, + per_charge=per_charge, + ) + per_site_conc_df = self.get_equilibrium_concentrations( + chempots=chempots, + limit=limit, + el_refs=el_refs, + temperature=quenched_temperature, + fermi_level=fermi_level, + skip_formatting=True, + per_site=True, + per_charge=per_charge, + ) + per_site_factors = ( + per_site_conc_df["Concentration (per site)"] / cm3_conc_df["Concentration (cm^-3)"] + ) + conc_df["Concentration (per site)"] = ( + conc_df["Concentration (cm^-3)"] * per_site_factors + ) + conc_df = conc_df.drop(columns=["Concentration (cm^-3)"]) + + if not skip_formatting: + conc_df["Concentration (per site)"] = conc_df["Concentration (per site)"].apply( + _format_per_site_concentration + ) - conc_df = conc_df.merge(annealing_defect_concentrations, on="Defect") - conc_df["Concentration (cm^-3)"] = ( # set total concentration to match annealing conc - conc_df["Concentration (cm^-3)"] # but with same relative concentrations - / conc_df.groupby("Defect")["Concentration (cm^-3)"].transform("sum") - ) * conc_df["Total Concentration (cm^-3)"] + elif not skip_formatting: + conc_df["Concentration (cm^-3)"] = conc_df["Concentration (cm^-3)"].apply( + lambda x: f"{x:.3e}" + ) - if return_conc_df: return conc_df - qd_tot = (conc_df["Charge"] * conc_df["Concentration (cm^-3)"]).sum() - qd_tot += fermi_dos.get_doping( - fermi_level=fermi_level + self.vbm, temperature=quenched_temperature - ) - return qd_tot - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "overflow") # ignore overflow warnings + def _get_constrained_total_q(fermi_level, return_conc_df=False): + conc_df = _get_constrained_concentrations(fermi_level, skip_formatting=True) + qd_tot = (conc_df["Charge"] * conc_df["Concentration (cm^-3)"]).sum() + qd_tot += fermi_dos.get_doping( + fermi_level=fermi_level + self.vbm, temperature=quenched_temperature + ) + return qd_tot eq_fermi_level: float = brentq( _get_constrained_total_q, -1.0, self.band_gap + 1.0 # type: ignore ) + e_conc, h_conc = get_e_h_concs( + fermi_dos, eq_fermi_level + self.vbm, quenched_temperature # type: ignore + ) - e_conc, h_conc = get_e_h_concs( - fermi_dos, eq_fermi_level + self.vbm, quenched_temperature # type: ignore - ) - return ( - eq_fermi_level, - e_conc, - h_conc, - _get_constrained_total_q(eq_fermi_level, return_conc_df=True), - ) + return ( + eq_fermi_level, + e_conc, + h_conc, + _get_constrained_concentrations(eq_fermi_level, per_charge, per_site, skip_formatting), + ) def get_formation_energy( self, @@ -2568,7 +2627,7 @@ def get_formation_energies( If None (default), set to the mid-gap Fermi level (E_g/2). skip_formatting (bool): Whether to skip formatting the defect charge states as - strings (and keep as ints and floats instead). + strings (and keep as ``int``\s and ``float``\s instead). (default: False) Returns: @@ -2617,7 +2676,7 @@ def _single_formation_energy_table( fermi_level: float = 0, skip_formatting: bool = False, ) -> pd.DataFrame: - """ + r""" Returns a defect formation energy table for a single chemical potential limit as a pandas ``DataFrame``. @@ -2659,7 +2718,7 @@ def _single_formation_energy_table( used, or if not present, ``self.vbm``). skip_formatting (bool): Whether to skip formatting the defect charge states as - strings (and keep as ints and floats instead). + strings (and keep as ``int``\s and ``float``\s instead). (default: False) Returns: @@ -2791,7 +2850,7 @@ def get_symmetries_and_degeneracies( Args: skip_formatting (bool): Whether to skip formatting the defect charge states as - strings (and keep as ints and floats instead). + strings (and keep as ``int``\s and ``float``\s instead). (default: False) symprec (float): Symmetry tolerance for ``spglib`` to use when determining @@ -2870,6 +2929,54 @@ def __repr__(self): ) +def _group_defect_charge_state_concentrations(conc_df, per_site=False, skip_formatting=False): + summed_df = conc_df.groupby("Defect").sum(numeric_only=True) + raw_concentrations = ( + summed_df["Raw Concentration"] + if "Raw Concentration" in summed_df.columns + else summed_df[next(k for k in conc_df.columns if k.startswith("Concentration"))] + ) + summed_df[next(k for k in conc_df.columns if k.startswith("Concentration"))] = ( + raw_concentrations.apply( + lambda x: _format_concentration(x, per_site=per_site, skip_formatting=skip_formatting) + ) + ) + return summed_df.drop( + columns=[ + i + for i in [ + "Charge", + "Formation Energy (eV)", + "Raw Charge", + "Raw Concentration", + "Charge State Population", + ] + if i in summed_df.columns + ] + ) + + +def _format_concentration(raw_concentration, per_site=False, skip_formatting=False): + """ + Format concentration values for ``DataFrame`` outputs. + """ + if skip_formatting: + return raw_concentration + if per_site: + return _format_per_site_concentration(raw_concentration) + + return f"{raw_concentration:.3e}" + + +def _format_per_site_concentration(raw_concentration): + """ + Format per-site concentrations for ``DataFrame`` outputs. + """ + if raw_concentration > 1e-5: + return f"{raw_concentration:.3%}" + return f"{raw_concentration * 100:.3e} %" + + def get_e_h_concs(fermi_dos: FermiDos, fermi_level: float, temperature: float) -> tuple[float, float]: """ Get the corresponding electron and hole concentrations (in cm^-3) for a diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 782b8fc4..1df04384 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -1,6 +1,7 @@ """ -Tests for the `doped.analysis` module, which also implicitly tests most of the -`doped.utils.parsing` module. +Tests for the ``doped.analysis`` module, which also implicitly tests most of +the ``doped.utils.parsing`` module, and some ``doped.thermodynamics`` +functions. """ import gzip @@ -920,10 +921,12 @@ def test_V2O5_same_named_defects(self): return thermo.plot(limit="V2O5-O2") @custom_mpl_image_compare(filename="SrTiO3_v_O.png") - def test_SrTiO3_diff_ISYM_bulk_defect(self): + def test_SrTiO3_diff_ISYM_bulk_defect_and_concentration_funcs(self): """ Test parsing SrTiO3 defect calculations, where a different ISYM was - used for the bulk (= 3) compared to the defect (= 0) calculations. + used for the bulk (= 3) compared to the defect (= 0) calculations, as + well as the ``DefectThermodynamics`` concentration functions with + various options. Previously this failed because the bulk/defect kpoints could not be properly compared. @@ -971,11 +974,156 @@ def test_SrTiO3_diff_ISYM_bulk_defect(self): thermo = dp.get_defect_thermodynamics() # hardcoded check of bulk_site_concentration property: + sto_O_site_conc = 5.067520709900586e22 for defect_entry in dp.defect_dict.values(): # oxygen site concentration - assert np.isclose(defect_entry.bulk_site_concentration, 5.06752e22, rtol=1e-4) + assert np.isclose(defect_entry.bulk_site_concentration, sto_O_site_conc, rtol=1e-4) print(thermo.get_symmetries_and_degeneracies()) + conc_df = thermo.get_equilibrium_concentrations() # no chempots or Fermi level + print("conc_df", conc_df) # for debugging + srtio3_V_O_conc_lists = [ # with no chempots or Fermi level (so using Eg/2) + ["vac_O", "+2", 9.742, "4.456e-141", "100.00%"], + ["vac_O", "+1", 11.043, "2.497e-162", "0.00%"], + ["vac_O", "0", 12.635, "1.109e-189", "0.00%"], + ] + for i, row in enumerate(srtio3_V_O_conc_lists): + print(i, row) + assert list(conc_df.iloc[i]) == row + + per_site_conc_df = thermo.get_equilibrium_concentrations(per_site=True) + print("per_site_conc_df", per_site_conc_df) # for debugging + for i, row in enumerate(srtio3_V_O_conc_lists): + print(i, row) + for j, val in enumerate(row): + if j != 3: + assert per_site_conc_df.iloc[i, j] == val + else: + assert np.isclose( + float(per_site_conc_df.iloc[i, j][:-2]), 100 * float(row[3]) / sto_O_site_conc + ) + + per_site_conc_df = thermo.get_equilibrium_concentrations(per_site=True, skip_formatting=True) + print("per_site_conc_df", per_site_conc_df) # for debugging + for i, row in enumerate(srtio3_V_O_conc_lists): + print(i, row) + for j, val in enumerate(row): + if j not in [1, 3]: + assert per_site_conc_df.iloc[i, j] == val + elif j == 3: + assert np.isclose( + per_site_conc_df.iloc[i, j], float(row[3]) / sto_O_site_conc, rtol=1e-5 + ) + elif j == 1: + assert per_site_conc_df.iloc[i, j] == int(val) # charge states + + assert thermo.get_equilibrium_concentrations(per_charge=False).to_numpy().tolist() == [ + ["4.456e-141"] + ] + assert np.isclose( + float( + thermo.get_equilibrium_concentrations( + per_charge=False, per_site=True, skip_formatting=True + ) + .to_numpy() + .tolist()[0][0] + ), + 4.456e-141 / sto_O_site_conc, + ) + assert np.isclose( + float( + thermo.get_equilibrium_concentrations(per_charge=False, per_site=True) + .to_numpy() + .tolist()[0][0][:-2] + ), + 100 * 4.456e-141 / sto_O_site_conc, + rtol=1e-3, + ) + + assert next( + iter( + thermo.get_equilibrium_concentrations(per_charge=False, fermi_level=1.710795) + .to_numpy() + .tolist() + ) + ) == ["2.004e-142"] + + per_site_conc_df = thermo.get_equilibrium_concentrations(per_site=True, fermi_level=1.710795) + print("per_site_conc_df", per_site_conc_df) # for debugging + custom_fermi_concs = ["3.954e-163 %", "1.045e-183 %", "2.189e-210 %"] + for i, val in enumerate(custom_fermi_concs): + print(i, val) + assert per_site_conc_df.iloc[i, 3] == val + + # test get_quenched_fermi_level_and_concentrations + fermi_level, e_conc, h_conc, conc_df = thermo.get_quenched_fermi_level_and_concentrations( + bulk_dos_vr=f"{self.SrTiO3_DATA_DIR}/bulk_sp333/vasprun.xml", + annealing_temperature=300, + per_charge=False, + ) + print("conc_df", conc_df) # for debugging + assert np.isclose(fermi_level, 1.710795, rtol=1e-4) + assert np.isclose(e_conc, h_conc, rtol=1e-4) # same because defect concentration negligible + # without chempots + assert np.isclose(e_conc, 6.129e-7, rtol=1e-3) + assert conc_df.to_numpy().tolist() == [["2.004e-142", 6.010973640124676e-142]] + assert conc_df.index[0] == "vac_O" + assert conc_df.index.name == "Defect" + + fermi_level, e_conc, h_conc, conc_df = thermo.get_quenched_fermi_level_and_concentrations( + bulk_dos_vr=f"{self.SrTiO3_DATA_DIR}/bulk_sp333/vasprun.xml", + annealing_temperature=300, + skip_formatting=True, + ) + print("quenched conc_df", conc_df) # for debugging + assert np.isclose(fermi_level, 1.710795, rtol=1e-4) + assert np.isclose(e_conc, h_conc, rtol=1e-4) # same because defect concentration negligible + # without chempots + assert np.isclose(e_conc, 6.129e-7, rtol=1e-3) + quenched_conc_df_lists = [ + ["vac_O", 2, 9.822, 2.0036578800415587e-142, "100.00%", 2.0036578800415587e-142], + ["vac_O", 1, 11.083, 5.2947236022031443e-163, "0.00%", 2.0036578800415587e-142], + ["vac_O", 0, 12.635, 1.1092122529257341e-189, "0.00%", 2.0036578800415587e-142], + ] + for i, row in enumerate(quenched_conc_df_lists): + print(i, row) + assert list(conc_df.iloc[i]) == row + + fermi_level, e_conc, h_conc, conc_df = thermo.get_quenched_fermi_level_and_concentrations( + bulk_dos_vr=f"{self.SrTiO3_DATA_DIR}/bulk_sp333/vasprun.xml", + annealing_temperature=300, + per_site=True, + ) + print("per_site quenched conc_df", conc_df) # for debugging + assert np.isclose(fermi_level, 1.710795, rtol=1e-4) + assert np.isclose(e_conc, h_conc, rtol=1e-4) # same because defect concentration negligible + # without chempots + assert np.isclose(e_conc, 6.129e-7, rtol=1e-3) + quenched_per_site_conc_df_lists = [ + ["vac_O", 2, 9.822, "100.00%", 2.0036578800415587e-142, "3.954e-163 %"], + ["vac_O", 1, 11.083, "0.00%", 2.0036578800415587e-142, "1.045e-183 %"], + ["vac_O", 0, 12.635, "0.00%", 2.0036578800415587e-142, "2.189e-210 %"], + ] + for i, row in enumerate(quenched_per_site_conc_df_lists): + print(i, row) + assert list(conc_df.iloc[i]) == row + + fermi_level, e_conc, h_conc, conc_df = thermo.get_quenched_fermi_level_and_concentrations( + bulk_dos_vr=f"{self.SrTiO3_DATA_DIR}/bulk_sp333/vasprun.xml", + annealing_temperature=300, + per_site=True, + per_charge=False, + skip_formatting=True, + ) + print("per_site not per_charge quenched conc_df", conc_df) # for debugging + assert np.isclose(fermi_level, 1.710795, rtol=1e-4) + assert np.isclose(e_conc, h_conc, rtol=1e-4) # same because defect concentration negligible + # without chempots + assert np.isclose(e_conc, 6.129e-7, rtol=1e-3) + assert conc_df.to_numpy().tolist() == [[6.010973640124676e-142, 3.953921443531439e-165]] + assert conc_df.index[0] == "vac_O" + assert conc_df.index.name == "Defect" + return thermo.plot() @custom_mpl_image_compare(filename="ZnS_defects.png") From 47a88b8d462faa7291d97762f4ebf8a69b90103b Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Fri, 31 May 2024 17:06:57 -0400 Subject: [PATCH 04/10] Remove user potcar functional typing import (moved to `TYPE_CHECKING` block in new `pymatgen`) --- docs/Troubleshooting.rst | 2 +- doped/vasp.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/Troubleshooting.rst b/docs/Troubleshooting.rst index 5af2ab75..9ed26a38 100644 --- a/docs/Troubleshooting.rst +++ b/docs/Troubleshooting.rst @@ -14,7 +14,7 @@ message about the origin of the problem, it is likely to be an issue with your v .. code:: bash - pip install pymatgen pymatgen-analysis-defects --upgrade + pip install pymatgen pymatgen-analysis-defects monty --upgrade pip install doped --upgrade If this does not solve your issue, please check the specific cases noted below. If your issue still isn't diff --git a/doped/vasp.py b/doped/vasp.py index 62fca734..80d2ea10 100644 --- a/doped/vasp.py +++ b/doped/vasp.py @@ -20,7 +20,7 @@ from pymatgen.core import SETTINGS from pymatgen.core.structure import Structure from pymatgen.io.vasp.inputs import BadIncarWarning, Kpoints, Poscar, Potcar -from pymatgen.io.vasp.sets import DictSet, UserPotcarFunctional +from pymatgen.io.vasp.sets import DictSet from tqdm import tqdm from doped import _doped_obj_properties_methods, _ignore_pmg_warnings @@ -69,9 +69,7 @@ def deep_dict_update(d: dict, u: dict) -> dict: } -def _test_potcar_functional_choice( - potcar_functional: UserPotcarFunctional = "PBE", symbols: Optional[list] = None -): +def _test_potcar_functional_choice(potcar_functional: str = "PBE", symbols: Optional[list] = None): """ Check if the potcar functional choice needs to be changed to match those available. @@ -138,7 +136,7 @@ def __init__( structure: Structure, user_incar_settings: Optional[dict] = None, user_kpoints_settings: Optional[Union[dict, Kpoints]] = None, - user_potcar_functional: UserPotcarFunctional = "PBE", + user_potcar_functional: str = "PBE", user_potcar_settings: Optional[dict] = None, auto_kpar: bool = True, poscar_comment: Optional[str] = None, @@ -251,7 +249,7 @@ def potcar(self) -> Potcar: Redefined to intelligently handle ``pymatgen`` ``POTCAR`` issues. """ if any("VASP_PSP_DIR" in i for i in SETTINGS): - self.user_potcar_functional: UserPotcarFunctional = _test_potcar_functional_choice( + self.user_potcar_functional: str = _test_potcar_functional_choice( self.user_potcar_functional, self.potcar_symbols ) @@ -319,7 +317,7 @@ def __init__( charge_state: int = 0, user_incar_settings: Optional[dict] = None, user_kpoints_settings: Optional[Union[dict, Kpoints]] = None, - user_potcar_functional: UserPotcarFunctional = "PBE", + user_potcar_functional: str = "PBE", user_potcar_settings: Optional[dict] = None, poscar_comment: Optional[str] = None, **kwargs, @@ -598,7 +596,7 @@ def __init__( soc: Optional[bool] = None, user_incar_settings: Optional[dict] = None, user_kpoints_settings: Optional[Union[dict, Kpoints]] = None, - user_potcar_functional: UserPotcarFunctional = "PBE", + user_potcar_functional: str = "PBE", user_potcar_settings: Optional[dict] = None, **kwargs, ): @@ -1873,7 +1871,7 @@ def __init__( soc: Optional[bool] = None, user_incar_settings: Optional[dict] = None, user_kpoints_settings: Optional[Union[dict, Kpoints]] = None, - user_potcar_functional: UserPotcarFunctional = "PBE", + user_potcar_functional: str = "PBE", user_potcar_settings: Optional[dict] = None, **kwargs, # to allow POTCAR testing on GH Actions ): From abe4ef525d65cfd72a606d29f1cb6ee0446972df Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Fri, 31 May 2024 17:08:10 -0400 Subject: [PATCH 05/10] Temporarily add typing extensions dependency to fix pymatgen bug --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 82ba059e..974d17ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,9 @@ dependencies = [ "vise>=0.9.0", # for eigenvalue analysis, >=0.9.0 required for SOC compatibility "easyunfold>=0.3.6", # for efficient PROCAR parsing (and supporting SOC), # if any dependency issues arise can make easyunfold an optional dependency + "typing-extensions" # to fix `pymatgen` typing issues + # (https://github.com/materialsproject/pymatgen/pull/3752) + # TODO: Remove once new pymatgen released ] [project.urls] From 697e4898fb8d1ee1ed1c4349840c9d17b28fd2a9 Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Fri, 31 May 2024 17:21:09 -0400 Subject: [PATCH 06/10] Use new `Structure.from_ase_atoms()` method instead of `AseAtomsAdaptor` (nice one @Andrew-S-Rosen) --- examples/GGA_workflow_tutorial.ipynb | 16 +++++++++------- tests/test_generation.py | 4 +--- tests/test_vasp.py | 4 +--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/examples/GGA_workflow_tutorial.ipynb b/examples/GGA_workflow_tutorial.ipynb index 43ea55fe..75f3a8cc 100644 --- a/examples/GGA_workflow_tutorial.ipynb +++ b/examples/GGA_workflow_tutorial.ipynb @@ -387,9 +387,11 @@ "from kgrid.series import cutoff_series\n", "from kgrid import calc_kpt_tuple\n", "import numpy as np\n", + "from pymatgen.core.structure import Structure\n", + "from pymatgen.io.ase import AseAtomsAdaptor\n", "\n", "def generate_kgrids_cutoffs(\n", - " structure: pymatgen.core.structure.Structure,\n", + " structure: Structure,\n", " kmin: int = 4,\n", " kmax: int = 20,\n", ") -> list:\n", @@ -414,7 +416,6 @@ " l_min=kmin,\n", " l_max=kmax,\n", " )\n", - " kspacing = [np.pi / c for c in kpoint_cutoffs]\n", " kgrid_samples = [\n", " calc_kpt_tuple(atoms, cutoff_length=(cutoff - 1e-4))\n", " for cutoff in kpoint_cutoffs\n", @@ -4694,17 +4695,19 @@ ] }, { - "cell_type": "code", - "execution_count": 48, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": 9, "source": [ "from kgrid.series import cutoff_series\n", "from kgrid import calc_kpt_tuple\n", "import numpy as np\n", + "from pymatgen.core.structure import Structure\n", + "from pymatgen.io.ase import AseAtomsAdaptor\n", "\n", "def generate_kgrids_cutoffs(\n", - " structure: pymatgen.core.structure.Structure,\n", + " structure: Structure,\n", " kmin: int = 4,\n", " kmax: int = 20,\n", ") -> list:\n", @@ -4718,7 +4721,7 @@ " kmax (int, optional): _description_. Defaults to 20.\n", "\n", " Returns:\n", - " _type_: _description_\n", + " list: list of kgrids\n", " \"\"\"\n", " # Transform struct to atoms\n", " aaa = AseAtomsAdaptor()\n", @@ -4729,7 +4732,6 @@ " l_min=kmin,\n", " l_max=kmax,\n", " )\n", - " kspacing = [np.pi / c for c in kpoint_cutoffs]\n", " kgrid_samples = [\n", " calc_kpt_tuple(atoms, cutoff_length=(cutoff - 1e-4))\n", " for cutoff in kpoint_cutoffs\n", diff --git a/tests/test_generation.py b/tests/test_generation.py index 7fcec6c6..a1b95ec0 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -27,7 +27,6 @@ from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher from pymatgen.core.structure import PeriodicSite, Structure from pymatgen.entries.computed_entries import ComputedStructureEntry -from pymatgen.io.ase import AseAtomsAdaptor from pymatgen.io.vasp import Poscar from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from pymatgen.util.coord import pbc_diff @@ -295,8 +294,7 @@ def setUp(self): atoms = bulk("Cu") atoms = make_supercell(atoms, [[2, 0, 0], [0, 2, 0], [0, 0, 2]]) atoms.set_chemical_symbols(["Cu", "Ag"] * 4) - aaa = AseAtomsAdaptor() - self.agcu = aaa.get_structure(atoms) + self.agcu = Structure.from_ase_atoms(atoms) self.agcu_defect_gen_string = ( "DefectsGenerator for input composition AgCu, space group R-3m with 28 defect entries created." ) diff --git a/tests/test_vasp.py b/tests/test_vasp.py index f0265d78..02ca1b56 100644 --- a/tests/test_vasp.py +++ b/tests/test_vasp.py @@ -19,7 +19,6 @@ from monty.serialization import loadfn from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher from pymatgen.core.structure import Structure -from pymatgen.io.ase import AseAtomsAdaptor from pymatgen.io.vasp.inputs import BadIncarWarning, Incar, Kpoints, Poscar, Potcar from test_generation import if_present_rm @@ -106,8 +105,7 @@ def setUp(self): atoms = bulk("Cu") atoms = make_supercell(atoms, [[2, 0, 0], [0, 2, 0], [0, 0, 2]]) atoms.set_chemical_symbols(["Cu", "Ag"] * 4) - aaa = AseAtomsAdaptor() - self.agcu = aaa.get_structure(atoms) + self.agcu = Structure.from_ase_atoms(atoms) self.sqs_agsbte2 = Structure.from_file(f"{self.data_dir}/AgSbTe2_SQS_POSCAR") self.neutral_def_incar_min = { From 9a0b60d9deec34b3ec8a350f106c0c235596df14 Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Sun, 2 Jun 2024 17:33:49 -0400 Subject: [PATCH 07/10] Overhaul oxi state guessing; use `BVAnalyzer` if possible (faster and better), make oxi-guessing functions public, use `SimpleQueue` to imporove timeout efficiency... --- doped/analysis.py | 24 ++--- doped/core.py | 224 ++++++++++++++++++++++++++++----------- doped/generation.py | 24 ++--- tests/test_generation.py | 64 +++++------ 4 files changed, 214 insertions(+), 122 deletions(-) diff --git a/doped/analysis.py b/doped/analysis.py index b5f970ec..160a5cb0 100644 --- a/doped/analysis.py +++ b/doped/analysis.py @@ -10,7 +10,7 @@ import contextlib import os import warnings -from multiprocessing import Pool, Queue, cpu_count +from multiprocessing import Pool, cpu_count from typing import TYPE_CHECKING, Optional, Union import numpy as np @@ -26,7 +26,7 @@ from tqdm import tqdm from doped import _doped_obj_properties_methods, _ignore_pmg_warnings -from doped.core import DefectEntry, _guess_and_set_oxi_states_with_timeout, _rough_oxi_state_cost_from_comp +from doped.core import DefectEntry, guess_and_set_oxi_states_with_timeout from doped.generation import get_defect_name_from_defect, get_defect_name_from_entry, name_defect_entries from doped.thermodynamics import DefectThermodynamics from doped.utils.parsing import ( @@ -706,18 +706,14 @@ def __init__( # try parsing the bulk oxidation states first, for later assigning defect "oxi_state"s (i.e. # fully ionised charge states): - if _rough_oxi_state_cost_from_comp(self.bulk_vr.final_structure.composition) > 1e6: - self._bulk_oxi_states: Union[dict, bool] = False # will take very long to guess oxi_state - else: - queue: Queue = Queue() - self._bulk_oxi_states = _guess_and_set_oxi_states_with_timeout( - self.bulk_vr.final_structure, queue=queue - ) - if self._bulk_oxi_states: - self.bulk_vr.final_structure = queue.get() # oxi-state decorated structure - self._bulk_oxi_states = { - el.symbol: el.oxi_state for el in self.bulk_vr.final_structure.composition.elements - } + self._bulk_oxi_states: Union[dict, bool] = False + if bulk_struct_w_oxi := guess_and_set_oxi_states_with_timeout( + self.bulk_vr.final_structure, break_early_if_expensive=True + ): + self.bulk_vr.final_structure = bulk_struct_w_oxi + self._bulk_oxi_states = { + el.symbol: el.oxi_state for el in self.bulk_vr.final_structure.composition.elements # type: ignore + } self.defect_dict = {} self.bulk_corrections_data = { # so we only load and parse bulk data once diff --git a/doped/core.py b/doped/core.py index 2c3e9815..9b38ebc6 100644 --- a/doped/core.py +++ b/doped/core.py @@ -7,12 +7,12 @@ import warnings from dataclasses import asdict, dataclass, field from functools import reduce -from itertools import combinations_with_replacement -from multiprocessing import Process, Queue, current_process +from multiprocessing import Process, SimpleQueue, current_process from typing import TYPE_CHECKING, Any, Optional, Union import numpy as np from monty.serialization import dumpfn, loadfn +from pymatgen.analysis.bond_valence import BVAnalyzer from pymatgen.analysis.defects import core, thermo from pymatgen.analysis.defects.utils import CorrectionResult from pymatgen.core.composition import Composition, Element @@ -1270,9 +1270,33 @@ def _get_dft_chempots(chempots, el_refs, limit): return chempots -def _guess_and_set_struct_oxi_states(structure, try_without_max_sites=False, queue=None): +def _guess_and_set_struct_oxi_states(structure): """ - Tries to guess (and set) the oxidation states of the input structure. + Tries to guess (and set) the oxidation states of the input structure, using + the ``pymatgen`` ``BVAnalyzer`` class. + + Args: + structure (Structure): The structure for which to guess the oxidation states. + + Returns: + Structure: The structure with oxidation states guessed and set, or ``False`` + if oxidation states could not be guessed. + """ + bv_analyzer = BVAnalyzer() + with contextlib.suppress(ValueError): # ValueError raised if oxi states can't be assigned + oxi_dec_structure = bv_analyzer.get_oxi_state_decorated_structure(structure) + if all( + np.isclose(int(specie.oxi_state), specie.oxi_state) for specie in oxi_dec_structure.species + ): + return oxi_dec_structure + + return False # if oxi states could not be guessed + + +def _guess_and_set_struct_oxi_states_icsd_prob(structure, try_without_max_sites=False): + """ + Tries to guess (and set) the oxidation states of the input structure, using + the ``pymatgen``-tabulated ICSD oxidation state probabilities. Args: structure (Structure): The structure for which to guess the oxidation states. @@ -1280,26 +1304,24 @@ def _guess_and_set_struct_oxi_states(structure, try_without_max_sites=False, que Whether to try to guess the oxidation states without using the ``max_sites=-1`` argument (``True``)(which attempts to use the reduced composition for guessing oxi states) or not (``False``). - queue (Queue): - A multiprocessing queue to put the guessed structure in, if provided. - Only really intended for use internally in ``doped``, during defect - generation. + + Returns: + Structure: The structure with oxidation states guessed and set, or ``False`` + if oxidation states could not be guessed. """ + structure = structure.copy() # don't modify original structure if try_without_max_sites: with contextlib.suppress(Exception): structure.add_oxidation_state_by_guess() # check all oxidation states are whole numbers: if all(np.isclose(int(specie.oxi_state), specie.oxi_state) for specie in structure.species): - if queue is not None: - queue.put(structure) - return + return structure # else try to use the reduced cell since oxidation state assignment scales poorly with system size: try: attempt = 0 structure.add_oxidation_state_by_guess(max_sites=-1) - # check oxi_states assigned and not all zero: - while ( + while ( # check oxi_states assigned and not all zero: attempt < 3 and all(specie.oxi_state == 0 for specie in structure.species) or not all(np.isclose(int(specie.oxi_state), specie.oxi_state) for specie in structure.species) @@ -1312,16 +1334,112 @@ def _guess_and_set_struct_oxi_states(structure, try_without_max_sites=False, que except Exception: structure.add_oxidation_state_by_guess() - if queue is not None: - queue.put(structure) + if all(hasattr(site.specie, "oxi_state") for site in structure.sites) and all( + isinstance(site.specie.oxi_state, (int, float)) for site in structure.sites + ): + return structure + + return False -def _guess_and_set_oxi_states_with_timeout(structure, timeout_1=10, timeout_2=15, queue=None) -> bool: +def guess_and_set_struct_oxi_states(structure, try_without_max_sites=False): + """ + Tries to guess (and set) the oxidation states of the input structure, first + using the ``pymatgen`` ``BVAnalyzer`` class, and if that fails, using the + ICSD oxidation state probabilities to guess. + + Args: + structure (Structure): The structure for which to guess the oxidation states. + try_without_max_sites (bool): + Whether to try to guess the oxidation states + without using the ``max_sites=-1`` argument (``True``)(which attempts + to use the reduced composition for guessing oxi states) or not (``False``), + when using the ICSD oxidation state probability guessing. + + Returns: + Structure: The structure with oxidation states guessed and set, or ``False`` + if oxidation states could not be guessed. + """ + if structure_with_oxi := _guess_and_set_struct_oxi_states(structure): + return structure_with_oxi + + return _guess_and_set_struct_oxi_states_icsd_prob(structure, try_without_max_sites) + + +def guess_and_set_oxi_states_with_timeout( + structure, timeout_1=10, timeout_2=15, break_early_if_expensive=False +) -> bool: """ Tries to guess (and set) the oxidation states of the input structure, with a timeout catch for cases where the structure is complex and oxi state guessing will take a very very long time. + Tries first without using the ``pymatgen`` ``BVAnalyzer`` class, and if + this fails, tries using the ICSD oxidation state probabilities (with + timeouts) to guess. + + Args: + structure (Structure): The structure for which to guess the oxidation states. + timeout_1 (float): + Timeout in seconds for the second attempt to guess the oxidation states, + using ICSD oxidation state probabilities (with ``max_sites=-1``). + Default is 10 seconds. + timeout_2 (float): + Timeout in seconds for the third attempt to guess the oxidation states, + using ICSD oxidation state probabilities (without ``max_sites=-1``). + Default is 15 seconds. + break_early_if_expensive (bool): + Whether to stop the function if the first oxi state guessing attempt + (with ``BVAnalyzer``) fails and the cost estimate for the ICSD probability + guessing is high (expected to take a long time; > 10 seconds). + Default is ``False``. + + Returns: + Structure: The structure with oxidation states guessed and set, or ``False`` + if oxidation states could not be guessed. + """ + if structure_with_oxi := _guess_and_set_struct_oxi_states(structure): + return structure_with_oxi # BVAnalyzer succeeded + + if ( # if BVAnalyzer failed and cost estimate is high, break early: + ( + break_early_if_expensive or current_process().daemon + ) # if in a daemon process, can't spawn new `Process`s + and _rough_oxi_state_cost_icsd_prob_from_comp(structure.composition) > 1e6 + ): + return False + + if current_process().daemon: # if in a daemon process, can't spawn new `Process`s + return _guess_and_set_struct_oxi_states_icsd_prob(structure) + + return _guess_and_set_oxi_states_with_timeout_icsd_prob(structure, timeout_1, timeout_2) + + +def _guess_and_set_struct_oxi_states_icsd_prob_process(structure, queue, try_without_max_sites=False): + """ + Implements the ``_guess_and_set_struct_oxi_states_icsd_prob`` function + above, but also putting the results into the supplied ``multiprocessing`` + queue object (for use with timeouts via ``Process``). + + For internal ``doped`` usage. + """ + if structure_with_oxi := _guess_and_set_struct_oxi_states_icsd_prob(structure, try_without_max_sites): + queue.put(structure_with_oxi) + else: + queue.put(False) + + +def _guess_and_set_oxi_states_with_timeout_icsd_prob( + structure, + timeout_1=10, + timeout_2=15, +) -> bool: + """ + Tries to guess (and set) the oxidation states of the input structure using + the ICSD oxidation state probabilities approach, with a timeout catch for + cases where the structure is complex and oxi state guessing will take a + very very long time. + Tries first without using the ``max_sites=-1`` argument with ``pymatgen``'s oxidation state guessing functions (which attempts to use the reduced composition for guessing oxi states, but can be a little less reliable for tricky @@ -1329,25 +1447,21 @@ def _guess_and_set_oxi_states_with_timeout(structure, timeout_1=10, timeout_2=15 Args: structure (Structure): The structure for which to guess the oxidation states. - queue (Queue): - A multiprocessing queue to put the guessed structure in, if provided. - Only really intended for use internally in ``doped``, during defect - generation. timeout_1 (float): Timeout in seconds for the first attempt to guess the oxidation states (with ``max_sites=-1``). Default is 10 seconds. timeout_2 (float): Timeout in seconds for the second attempt to guess the oxidation states (without ``max_sites=-1``). Default is 15 seconds. + + Returns: + Structure: The structure with oxidation states guessed and set, or ``False`` + if oxidation states could not be guessed. """ - if queue is None: - queued_struct = False - queue = Queue() - else: - queued_struct = True + queue: SimpleQueue = SimpleQueue() guess_oxi_process_wout_max_sites = Process( - target=_guess_and_set_struct_oxi_states, args=(structure, True, queue) + target=_guess_and_set_struct_oxi_states_icsd_prob_process, args=(structure, queue, True) ) # try without max sites first, if fails, try with max sites guess_oxi_process_wout_max_sites.start() guess_oxi_process_wout_max_sites.join(timeout=timeout_1) @@ -1357,8 +1471,8 @@ def _guess_and_set_oxi_states_with_timeout(structure, timeout_1=10, timeout_2=15 guess_oxi_process_wout_max_sites.join() guess_oxi_process = Process( - target=_guess_and_set_struct_oxi_states, - args=(structure, False, queue), + target=_guess_and_set_struct_oxi_states_icsd_prob_process, + args=(structure, queue, False), ) guess_oxi_process.start() guess_oxi_process.join(timeout=timeout_2) # wait for pymatgen to guess oxi states, @@ -1370,20 +1484,15 @@ def _guess_and_set_oxi_states_with_timeout(structure, timeout_1=10, timeout_2=15 return False - if not queued_struct: - # apply oxi states to structure: - structure_from_subprocess = queue.get() - structure.add_oxidation_state_by_element( - {el.symbol: el.oxi_state for el in structure_from_subprocess.composition.elements} - ) - - return True + # apply oxi states to structure: + return queue.get() -def _rough_oxi_state_cost_from_comp(comp: Union[str, Composition], max_sites=True) -> float: +def _rough_oxi_state_cost_icsd_prob_from_comp(comp: Union[str, Composition], max_sites=True) -> float: """ A cost function which roughly estimates the computational cost of guessing - the oxidation states of a given composition. + the oxidation states of a given composition, using the ICSD oxidation state + probabilities approach. """ if isinstance(comp, str): comp = Composition(comp) @@ -1393,13 +1502,16 @@ def _rough_oxi_state_cost_from_comp(comp: Union[str, Composition], max_sites=Tru el_amt = comp.get_el_amt_dict() elements = list(el_amt) + + def num_possible_combinations(n, r): + from math import factorial + + return factorial(n + r - 1) / factorial(r) / factorial(n - 1) + return np.prod( [ - sum( - 1 - for _i in combinations_with_replacement( - Element(el).icsd_oxidation_states or Element(el).oxidation_states, int(el_amt[el]) - ) + num_possible_combinations( + len(Element(el).icsd_oxidation_states or Element(el).oxidation_states), int(el_amt[el]) ) for el in elements ] @@ -1488,26 +1600,16 @@ def _set_oxi_state(self): all(hasattr(site.specie, "oxi_state") for site in self.structure.sites) and all(isinstance(site.specie.oxi_state, (int, float)) for site in self.structure.sites) ): - if _rough_oxi_state_cost_from_comp(self.structure.composition) > 1e6: - self.oxi_state = "Undetermined" # likely will take very long to guess oxi_state - return - - if current_process().daemon: # if in a daemon process, can't spawn new `Process`s - _guess_and_set_struct_oxi_states(self.structure) - self.oxi_state = self._guess_oxi_state() + # try guess oxi-states but with timeout: + if struct_w_oxi := guess_and_set_oxi_states_with_timeout( + self.structure, timeout_1=5, timeout_2=5, break_early_if_expensive=True + ): + self.structure = struct_w_oxi + else: + self.oxi_state = "Undetermined" return - # else try guess oxi-states but with timeout, using new `Process`s: - _guess_and_set_oxi_states_with_timeout(self.structure, timeout_1=5, timeout_2=5) - - if not ( # oxi states unable to be parsed, set to "Undetermined" - all(hasattr(site.specie, "oxi_state") for site in self.structure.sites) - and all(isinstance(site.specie.oxi_state, (int, float)) for site in self.structure.sites) - ): - self.oxi_state = "Undetermined" - - else: - self.oxi_state = self._guess_oxi_state() + self.oxi_state = self._guess_oxi_state() @classmethod def _from_pmg_defect(cls, defect: core.Defect, bulk_oxi_states=False, **doped_kwargs) -> "Defect": diff --git a/doped/generation.py b/doped/generation.py index 49d9c505..b0aa7ab0 100644 --- a/doped/generation.py +++ b/doped/generation.py @@ -9,7 +9,7 @@ import warnings from functools import partial, reduce from itertools import chain -from multiprocessing import Pool, Queue, cpu_count +from multiprocessing import Pool, cpu_count from typing import Optional, Union, cast from unittest.mock import MagicMock @@ -39,8 +39,8 @@ Interstitial, Substitution, Vacancy, - _guess_and_set_oxi_states_with_timeout, doped_defect_from_pmg_defect, + guess_and_set_oxi_states_with_timeout, ) from doped.utils import parsing, supercells, symmetry @@ -789,10 +789,9 @@ def guess_defect_charge_states( possible_oxi_states, orig_oxi, max_host_oxi_magnitude, return_log=True ) - charge_state_list = [ + if charge_state_list := [ k for k, v in possible_charge_states.items() if v["probability"] > probability_threshold - ] - if charge_state_list: + ]: charge_state_range = (int(min(charge_state_list)), int(max(charge_state_list))) else: charge_state_range = (0, 0) @@ -844,10 +843,9 @@ def guess_defect_charge_states( sorted(possible_charge_states.items(), key=lambda x: x[1]["probability"], reverse=True) ) - charge_state_list = [ + if charge_state_list := [ k for k, v in sorted_charge_state_dict.items() if v["probability"] > probability_threshold - ] - if charge_state_list: + ]: charge_state_range = (int(min(charge_state_list)), int(max(charge_state_list))) else: # if no charge states are included, take most probable (if probability > 0.1*threshold) @@ -1332,7 +1330,7 @@ class (such as ``clustering_tol``, ``stol``, ``min_dist`` etc), or to ) self._bulk_oxi_states: Union[bool, dict] = ( - True # to check if pymatgen can guess the bulk oxidation states + False # to check if pymatgen can guess the bulk oxidation states ) # if input structure was oxi-state-decorated, use these oxi states for defect generation: if all(hasattr(site.specie, "oxi_state") for site in self.structure.sites) and all( @@ -1343,12 +1341,8 @@ class (such as ``clustering_tol``, ``stol``, ``min_dist`` etc), or to } else: # guess & set oxidation states now, to speed up oxi state handling in defect generation - queue: Queue = Queue() - self._bulk_oxi_states = _guess_and_set_oxi_states_with_timeout( - self.primitive_structure, queue=queue - ) - if self._bulk_oxi_states: - self.primitive_structure = queue.get() + if prim_struct_w_oxi := guess_and_set_oxi_states_with_timeout(self.primitive_structure): + self.primitive_structure = prim_struct_w_oxi self._bulk_oxi_states = { el.symbol: el.oxi_state for el in self.primitive_structure.composition.elements } diff --git a/tests/test_generation.py b/tests/test_generation.py index a1b95ec0..8413c460 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -605,28 +605,28 @@ def setUp(self): self.sb2si2te6 = Structure.from_file(f"{self.data_dir}/Sb2Si2Te6_POSCAR") self.sb2si2te6_defect_gen_info = ( - """Vacancies Guessed Charges Conv. Cell Coords Wyckoff ------------ ------------------ ------------------- --------- -v_Si [+1,0,-1,-2,-3,-4] [0.000,0.000,0.445] 6c -v_Sb [+1,0,-1,-2] [0.000,0.000,0.166] 6c -v_Te [+2,+1,0,-1] [0.335,0.003,0.073] 18f + """Vacancies Guessed Charges Conv. Cell Coords Wyckoff +----------- ----------------- ------------------- --------- +v_Si [+1,0,-1,-2,-3] [0.000,0.000,0.445] 6c +v_Sb [+1,0,-1,-2,-3] [0.000,0.000,0.166] 6c +v_Te [+2,+1,0,-1] [0.335,0.003,0.073] 18f Substitutions Guessed Charges Conv. Cell Coords Wyckoff --------------- --------------------------- ------------------- --------- -Si_Sb [+2,+1,0] [0.000,0.000,0.166] 6c +Si_Sb [+1,0,-1] [0.000,0.000,0.166] 6c Si_Te [+6,+5,+4,+3,+2,+1,0,-1,-2] [0.335,0.003,0.073] 18f -Sb_Si [+1,0,-1,-2,-3,-4,-5,-6,-7] [0.000,0.000,0.445] 6c +Sb_Si [+2,+1,0,-1,-2,-3,-4,-5,-6] [0.000,0.000,0.445] 6c Sb_Te [+7,+6,+5,+4,+3,+2,+1,0,-1] [0.335,0.003,0.073] 18f -Te_Si [+2,+1,0,-1,-2,-3,-4,-5,-6] [0.000,0.000,0.445] 6c -Te_Sb [+4,+3,+2,+1,0,-1,-2,-3,-4] [0.000,0.000,0.166] 6c +Te_Si [+3,+2,+1,0,-1,-2,-3,-4,-5] [0.000,0.000,0.445] 6c +Te_Sb [+3,+2,+1,0,-1,-2,-3,-4,-5] [0.000,0.000,0.166] 6c Interstitials Guessed Charges Conv. Cell Coords Wyckoff --------------- --------------------------- ------------------- --------- -Si_i_C1_Si2.20 [+4,+3,+2,+1,0,-1,-2,-3,-4] [0.179,0.359,0.167] 18f -Si_i_C1_Si2.43 [+4,+3,+2,+1,0,-1,-2,-3,-4] [0.341,0.341,0.458] 18f -Si_i_C1_Te2.44 [+4,+3,+2,+1,0,-1,-2,-3,-4] [0.001,0.336,0.289] 18f -Si_i_C3_Si2.64 [+4,+3,+2,+1,0,-1,-2,-3,-4] [0.000,0.000,0.318] 6c -Si_i_C3i_Te2.81 [+4,+3,+2,+1,0,-1,-2,-3,-4] [0.000,0.000,0.000] 3a +Si_i_C1_Si2.20 [+4,+3,+2,+1,0] [0.179,0.359,0.167] 18f +Si_i_C1_Si2.43 [+4,+3,+2,+1,0] [0.341,0.341,0.458] 18f +Si_i_C1_Te2.44 [+4,+3,+2,+1,0] [0.001,0.336,0.289] 18f +Si_i_C3_Si2.64 [+4,+3,+2,+1,0] [0.000,0.000,0.318] 6c +Si_i_C3i_Te2.81 [+4,+3,+2,+1,0] [0.000,0.000,0.000] 3a Sb_i_C1_Si2.20 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.179,0.359,0.167] 18f Sb_i_C1_Si2.43 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.341,0.341,0.458] 18f Sb_i_C1_Te2.44 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.001,0.336,0.289] 18f @@ -1001,25 +1001,25 @@ def _generate_and_test_no_warnings(self, structure, min_image_distance=None, **k with warnings.catch_warnings(record=True) as w: warnings.resetwarnings() defect_gen = DefectsGenerator(structure, **kwargs) - if w: - print([str(warning.message) for warning in w]) # for debugging - if min_image_distance is None: - assert not w - else: - assert len(w) == 1 - assert issubclass(w[-1].category, UserWarning) - assert ( - f"Input structure is <10 Å in at least one direction (minimum image distance =" - f" {min_image_distance:.2f} Å, which is usually too small for accurate defect " - f"calculations, but generate_supercell = False, so using input structure as " - f"defect & bulk supercells. Caution advised!" in str(w[-1].message) - ) output = sys.stdout.getvalue() # Return a str containing the printed output finally: sys.stdout = original_stdout # Reset standard output to its original value. if w: print([str(warning.message) for warning in w]) # for debugging + print(output) # for debugging + + if min_image_distance is None: + assert not w + else: + assert len(w) == 1 + assert issubclass(w[-1].category, UserWarning) + assert ( + f"Input structure is <10 Å in at least one direction (minimum image distance =" + f" {min_image_distance:.2f} Å, which is usually too small for accurate defect " + f"calculations, but generate_supercell = False, so using input structure as " + f"defect & bulk supercells. Caution advised!" in str(w[-1].message) + ) return defect_gen, output @@ -3209,11 +3209,11 @@ def test_charge_state_gen_kwargs(self): assert ( # different charge states than when max_sites = -1 is used: ( - """Vacancies Guessed Charges Conv. Cell Coords Wyckoff ------------ --------------------- ------------------- --------- -v_Si [+2,+1,0,-1,-2,-3,-4] [0.000,0.000,0.445] 6c -v_Sb [+2,+1,0,-1,-2] [0.000,0.000,0.166] 6c -v_Te [+2,+1,0,-1,-2] [0.335,0.003,0.073] 18f + """Vacancies Guessed Charges Conv. Cell Coords Wyckoff +----------- ------------------ ------------------- --------- +v_Si [+2,+1,0,-1,-2,-3] [0.000,0.000,0.445] 6c +v_Sb [+2,+1,0,-1,-2,-3] [0.000,0.000,0.166] 6c +v_Te [+2,+1,0,-1,-2] [0.335,0.003,0.073] 18f \n""" ) in output From d98252e6de4c941c54f30d074a0395355fd60d3d Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Sun, 2 Jun 2024 21:20:09 -0400 Subject: [PATCH 08/10] Update tests --- doped/vasp.py | 138 +++++++++++++++++++------------------- tests/data/VO2_POSCAR | 14 ++++ tests/test_analysis.py | 5 ++ tests/test_corrections.py | 9 ++- 4 files changed, 92 insertions(+), 74 deletions(-) create mode 100644 tests/data/VO2_POSCAR diff --git a/doped/vasp.py b/doped/vasp.py index 80d2ea10..f50cb967 100644 --- a/doped/vasp.py +++ b/doped/vasp.py @@ -20,7 +20,7 @@ from pymatgen.core import SETTINGS from pymatgen.core.structure import Structure from pymatgen.io.vasp.inputs import BadIncarWarning, Kpoints, Poscar, Potcar -from pymatgen.io.vasp.sets import DictSet +from pymatgen.io.vasp.sets import VaspInputSet from tqdm import tqdm from doped import _doped_obj_properties_methods, _ignore_pmg_warnings @@ -123,9 +123,9 @@ def __repr__(self): return super().__repr__() -class DopedDictSet(DictSet): +class DopedDictSet(VaspInputSet): """ - Modified version of ``pymatgen`` ``DictSet``, to have more robust + Modified version of ``pymatgen`` ``VaspInputSet``, to have more robust ``POTCAR`` handling, expedited I/O (particularly for ``POTCAR`` generation, which can be slow when generating many folders), ensure ``POSCAR`` atom sorting, avoid encoding issues with ``KPOINTS`` comments etc. @@ -152,7 +152,7 @@ def __init__( quotation marks (e.g. ``{"ALGO": "All"}``). (default: None) user_kpoints_settings (dict or Kpoints): - Dictionary of user ``KPOINTS`` settings (in ``pymatgen`` ``DictSet`` + Dictionary of user ``KPOINTS`` settings (in ``pymatgen`` ``VaspInputSet`` format) e.g., ``{"reciprocal_density": 123}``, or a ``Kpoints`` object. Default is Gamma-only. user_potcar_functional (str): @@ -170,7 +170,7 @@ def __init__( Default is ``True``. poscar_comment (str): Comment line to use for ``POSCAR`` file. Default is structure formula. - **kwargs: Additional kwargs to pass to ``DictSet``. + **kwargs: Additional kwargs to pass to ``VaspInputSet``. """ _ignore_pmg_warnings() self.auto_kpar = auto_kpar @@ -205,7 +205,7 @@ def __init__( user_potcar_settings = user_potcar_settings["POTCAR"] potcar_settings.update(user_potcar_settings or {}) - base_config_dict = {"POTCAR": potcar_settings} # needs to be set with ``DictSet`` + base_config_dict = {"POTCAR": potcar_settings} # needs to be set with ``VaspInputSet`` config_dict = deep_dict_update(base_config_dict, kwargs.pop("config_dict", {})) config_dict["INCAR"] = user_incar_settings or {} @@ -221,8 +221,8 @@ def __init__( @property def incar(self): """ - Returns the ``Incar`` object generated from the ``DictSet`` config, - with a warning if ``KPAR > 1`` and only one k-point. + Returns the ``Incar`` object generated from the ``VaspInputSet`` + config, with a warning if ``KPAR > 1`` and only one k-point. """ incar_obj = super().incar @@ -307,7 +307,7 @@ def __repr__(self): class DefectDictSet(DopedDictSet): """ - Extension to ``pymatgen`` ``DictSet`` object for ``VASP`` defect + Extension to ``pymatgen`` ``VaspInputSet`` object for ``VASP`` defect calculations. """ @@ -335,7 +335,7 @@ def __init__( as strings with quotation marks (e.g. ``{"ALGO": "All"}``). (default: None) user_kpoints_settings (dict or Kpoints): - Dictionary of user ``KPOINTS`` settings (in ``pymatgen`` ``DictSet`` format) e.g., + Dictionary of user ``KPOINTS`` settings (in ``pymatgen`` ``VaspInputSet`` format) e.g., ``{"reciprocal_density": 123}``, or a ``Kpoints`` object. Default is Gamma-centred, reciprocal_density = 100 [Å⁻³]. user_potcar_functional (str): @@ -347,7 +347,7 @@ def __init__( poscar_comment (str): Comment line to use for ``POSCAR`` files. Default is defect name, fractional coordinates of initial site and charge state. - **kwargs: Additional kwargs to pass to ``DictSet``. + **kwargs: Additional kwargs to pass to ``VaspInputSet``. """ _ignore_pmg_warnings() self.charge_state = charge_state @@ -491,7 +491,7 @@ def write_input( ): """ Writes out all input to a directory. Refactored slightly from - ``pymatgen`` ``DictSet.write_input()`` to allow checking of user + ``pymatgen`` ``VaspInputSet.write_input()`` to allow checking of user ``POTCAR`` setup. Args: @@ -520,7 +520,7 @@ def write_input( else: potcars = self._check_user_potcars(unperturbed_poscar=unperturbed_poscar, snb=snb) - if unperturbed_poscar and potcars: # write everything, use DictSet.write_input() + if unperturbed_poscar and potcars: # write everything, use VaspInputSet.write_input() try: super().write_input( output_path, @@ -627,7 +627,7 @@ def __init__( is only generated for systems with a max atomic number (Z) >= 31 (i.e. further down the periodic table than Zn). - where ``DefectDictSet`` is an extension of ``pymatgen``'s ``DictSet`` class for + where ``DefectDictSet`` is an extension of ``pymatgen``'s ``VaspInputSet`` class for defect calculations, with ``incar``, ``poscar``, ``kpoints`` and ``potcar`` attributes for the corresponding VASP defect calculations (see docstring). Also creates the corresponding ``bulk_vasp_...`` attributes for singlepoint @@ -669,10 +669,10 @@ def __init__( as strings with quotation marks (e.g. ``{"ALGO": "All"}``). (default: None) user_kpoints_settings (dict or Kpoints): - Dictionary of user KPOINTS settings (in pymatgen DictSet format) + Dictionary of user KPOINTS settings (in ``pymatgen`` ``VaspInputSet`` format) e.g. {"reciprocal_density": 123}, or a Kpoints object, to use for the - ``vasp_std``, ``vasp_nkred_std`` and ``vasp_ncl`` DefectDictSets (Γ-only for - ``vasp_gam``). Default is Gamma-centred, reciprocal_density = 100 [Å⁻³]. + ``vasp_std``, ``vasp_nkred_std`` and ``vasp_ncl`` ``DefectDictSet``\s (Γ-only + for ``vasp_gam``). Default is Gamma-centred, reciprocal_density = 100 [Å⁻³]. user_potcar_functional (str): POTCAR functional to use. Default is "PBE" and if this fails, tries "PBE_52", then "PBE_54". @@ -683,22 +683,22 @@ def __init__( Attributes: vasp_gam (DefectDictSet): - DefectDictSet for Gamma-point only relaxation. Usually not needed + ``DefectDictSet`` for Gamma-point only relaxation. Usually not needed if ShakeNBreak structure searching has been performed (recommended), unless only Γ-point `k`-point sampling is required (converged) for your system, and no vasp_std calculations with multiple `k`-points are required (determined from kpoints settings). vasp_nkred_std (DefectDictSet): - DefectDictSet for relaxation with a non-Γ-only kpoint mesh, using + ``DefectDictSet`` for relaxation with a non-Γ-only kpoint mesh, using ``NKRED(X,Y,Z)`` INCAR tag(s) to downsample kpoints for the HF exchange part of the hybrid DFT calculation. Not generated for GGA calculations (if ``LHFCALC`` is set to ``False`` in user_incar_settings) or if only Gamma kpoint sampling is required. vasp_std (DefectDictSet): - DefectDictSet for relaxation with a non-Γ-only kpoint mesh, not using + ``DefectDictSet`` for relaxation with a non-Γ-only kpoint mesh, not using ``NKRED``. Not generated if only Gamma kpoint sampling is required. vasp_ncl (DefectDictSet): - DefectDictSet for singlepoint (static) energy calculation with SOC + ``DefectDictSet`` for singlepoint (static) energy calculation with SOC included. Generated if ``soc=True``. If ``soc`` is not set, then by default is only generated for systems with a max atomic number (Z) >= 31 (i.e. further down the periodic table than Zn). @@ -719,23 +719,23 @@ def __init__( structure and charge state (if inputting a Structure object), for defects. For the bulk supercell, it's "{formula} - Bulk". bulk_vasp_gam (DefectDictSet): - DefectDictSet for a `bulk` Γ-point-only singlepoint (static) + ``DefectDictSet`` for a `bulk` Γ-point-only singlepoint (static) supercell calculation. Often not used, as the bulk supercell only needs to be calculated once with the same settings as the final defect calculations, which may be with ``vasp_std`` or ``vasp_ncl``. bulk_vasp_nkred_std (DefectDictSet): - DefectDictSet for a singlepoint (static) `bulk` ``vasp_std`` supercell + ``DefectDictSet`` for a singlepoint (static) `bulk` ``vasp_std`` supercell calculation (i.e. with a non-Γ-only kpoint mesh) and ``NKRED(X,Y,Z)`` INCAR tag(s) to downsample kpoints for the HF exchange part of the hybrid DFT calculation. Not generated for GGA calculations (if ``LHFCALC`` is set to ``False`` in user_incar_settings) or if only Gamma kpoint sampling is required. bulk_vasp_std (DefectDictSet): - DefectDictSet for a singlepoint (static) `bulk` ``vasp_std`` supercell + ``DefectDictSet`` for a singlepoint (static) `bulk` ``vasp_std`` supercell calculation with a non-Γ-only kpoint mesh, not using ``NKRED``. Not generated if only Gamma kpoint sampling is required. bulk_vasp_ncl (DefectDictSet): - DefectDictSet for singlepoint (static) energy calculation of the `bulk` + ``DefectDictSet`` for singlepoint (static) energy calculation of the `bulk` supercell with SOC included. Generated if ``soc=True``. If ``soc`` is not set, then by default is only generated for systems with a max atomic number (Z) >= 31 (i.e. further down the periodic table than Zn). @@ -801,12 +801,12 @@ def vasp_gam( self, ) -> DefectDictSet: """ - Returns a DefectDictSet object for a VASP Γ-point-only (``vasp_gam``) - defect supercell relaxation. Typically not needed if ShakeNBreak - structure searching has been performed (recommended), unless only - Γ-point `k`-point sampling is required (converged) for your system, and - no vasp_std calculations with multiple `k`-points are required - (determined from kpoints settings). + Returns a ``DefectDictSet`` object for a VASP Γ-point-only + (``vasp_gam``) defect supercell relaxation. Typically not needed if + ShakeNBreak structure searching has been performed (recommended), + unless only Γ-point `k`-point sampling is required (converged) for your + system, and no vasp_std calculations with multiple `k`-points are + required (determined from kpoints settings). See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the ``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings, @@ -859,10 +859,10 @@ def _check_vstd_kpoints(self, vasp_std_defect_set: DefectDictSet) -> Optional[De @property def vasp_std(self) -> Optional[DefectDictSet]: """ - Returns a DefectDictSet object for a VASP defect supercell relaxation - using ``vasp_std`` (i.e. with a non-Γ-only kpoint mesh). Returns None - and a warning if the input kpoint settings correspond to a Γ-only - kpoint mesh (in which case ``vasp_gam`` should be used). + Returns a ``DefectDictSet`` object for a VASP defect supercell + relaxation using ``vasp_std`` (i.e. with a non-Γ-only kpoint mesh). + Returns None and a warning if the input kpoint settings correspond to a + Γ-only kpoint mesh (in which case ``vasp_gam`` should be used). See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the ``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings, @@ -893,8 +893,8 @@ def vasp_std(self) -> Optional[DefectDictSet]: @property def vasp_nkred_std(self) -> Optional[DefectDictSet]: """ - Returns a DefectDictSet object for a VASP defect supercell relaxation - using ``vasp_std`` (i.e. with a non-Γ-only kpoint mesh) and + Returns a ``DefectDictSet`` object for a VASP defect supercell + relaxation using ``vasp_std`` (i.e. with a non-Γ-only kpoint mesh) and ``NKRED(X,Y,Z)`` INCAR tag(s) to downsample kpoints for the HF exchange part of hybrid DFT calculations, following the doped recommended defect calculation workflow (see docs). By default, sets ``NKRED(X,Y,Z)`` to 2 @@ -974,13 +974,13 @@ def vasp_nkred_std(self) -> Optional[DefectDictSet]: @property def vasp_ncl(self) -> Optional[DefectDictSet]: """ - Returns a DefectDictSet object for a VASP defect supercell singlepoint - calculation with spin-orbit coupling (SOC) included (LSORBIT = True), - using ``vasp_ncl``. If ``DefectRelaxSet.soc`` is False, then this - returns None and a warning. If the ``soc`` parameter is not set when - initializing ``DefectRelaxSet``, then this is set to True for systems - with a max atomic number (Z) >= 31 (i.e. further down the periodic - table than Zn), otherwise False. + Returns a ``DefectDictSet`` object for a VASP defect supercell + singlepoint calculation with spin-orbit coupling (SOC) included + (LSORBIT = True), using ``vasp_ncl``. If ``DefectRelaxSet.soc`` is + False, then this returns None and a warning. If the ``soc`` parameter + is not set when initializing ``DefectRelaxSet``, then this is set to + True for systems with a max atomic number (Z) >= 31 (i.e. further down + the periodic table than Zn), otherwise False. See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the ``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings, @@ -1028,7 +1028,7 @@ def _check_bulk_supercell_and_warn(self): @property def bulk_vasp_gam(self) -> Optional[DefectDictSet]: """ - Returns a DefectDictSet object for a VASP `bulk` Γ-point-only + Returns a ``DefectDictSet`` object for a VASP `bulk` Γ-point-only (``vasp_gam``) singlepoint (static) supercell calculation. Often not used, as the bulk supercell only needs to be calculated once with the same settings as the final defect calculations, which is ``vasp_std`` @@ -1081,7 +1081,7 @@ def bulk_vasp_gam(self) -> Optional[DefectDictSet]: @property def bulk_vasp_std(self) -> Optional[DefectDictSet]: """ - Returns a DefectDictSet object for a singlepoint (static) `bulk` + Returns a ``DefectDictSet`` object for a singlepoint (static) `bulk` ``vasp_std`` supercell calculation. Returns None and a warning if the input kpoint settings correspond to a Γ-only kpoint mesh (in which case ``(bulk_)vasp_gam`` should be used). @@ -1135,7 +1135,7 @@ def bulk_vasp_std(self) -> Optional[DefectDictSet]: @property def bulk_vasp_nkred_std(self) -> Optional[DefectDictSet]: """ - Returns a DefectDictSet object for a singlepoint (static) `bulk` + Returns a ``DefectDictSet`` object for a singlepoint (static) `bulk` ``vasp_std`` supercell calculation (i.e. with a non-Γ-only kpoint mesh) and ``NKRED(X,Y,Z)`` INCAR tag(s) to downsample kpoints for the HF exchange part of the hybrid DFT calculation. By default, sets @@ -1205,13 +1205,13 @@ def bulk_vasp_nkred_std(self) -> Optional[DefectDictSet]: @property def bulk_vasp_ncl(self) -> Optional[DefectDictSet]: """ - Returns a DefectDictSet object for VASP `bulk` supercell singlepoint - calculations with spin-orbit coupling (SOC) included (LSORBIT = True), - using ``vasp_ncl``. If ``DefectRelaxSet.soc`` is False, then this - returns None and a warning. If the ``soc`` parameter is not set when - initializing ``DefectRelaxSet``, then this is set to True for systems - with a max atomic number (Z) >= 31 (i.e. further down the periodic - table than Zn), otherwise False. + Returns a ``DefectDictSet`` object for VASP `bulk` supercell + singlepoint calculations with spin-orbit coupling (SOC) included + (LSORBIT = True), using ``vasp_ncl``. If ``DefectRelaxSet.soc`` is + False, then this returns None and a warning. If the ``soc`` parameter + is not set when initializing ``DefectRelaxSet``, then this is set to + True for systems with a max atomic number (Z) >= 31 (i.e. further down + the periodic table than Zn), otherwise False. See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the ``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings, @@ -1899,13 +1899,13 @@ def __init__( is only generated for systems with a max atomic number (Z) >= 31 (i.e. further down the periodic table than Zn). - where ``DefectDictSet`` is an extension of ``pymatgen``'s ``DictSet`` class for - defect calculations, with ``incar``, ``poscar``, ``kpoints`` and ``potcar`` - attributes for the corresponding VASP defect calculations (see docstring). - Also creates the corresponding ``bulk_vasp_...`` attributes for singlepoint - (static) energy calculations of the bulk (pristine, defect-free) supercell. - This needs to be calculated once with the same settings as the final defect - calculations, for the later calculation of defect formation energies. + where ``DefectDictSet`` is an extension of ``pymatgen``'s ``VaspInputSet`` + class for defect calculations, with ``incar``, ``poscar``, ``kpoints`` and + ``potcar`` attributes for the corresponding VASP defect calculations (see + docstring). Also creates the corresponding ``bulk_vasp_...`` attributes for + singlepoint (static) energy calculations of the bulk (pristine, defect-free) + supercell. This needs to be calculated once with the same settings as the + final defect calculations, for the later calculation of defect formation energies. See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the ``doped/VASP_sets`` folder for the default ``INCAR`` settings, and @@ -1925,10 +1925,10 @@ def __init__( set equal to ``defect_species``. If a list or single ``DefectEntry`` object is provided, the defect folder names will be set equal to ``DefectEntry.name`` if the ``name`` attribute is set, otherwise - generated according to the ``doped`` convention (see doped.generation). + generated according to the ``doped`` convention (see ``doped.generation``). Defect charge states are taken from ``DefectEntry.charge_state``. soc (bool): - Whether to generate ``vasp_ncl`` DefectDictSet attribute for spin-orbit + Whether to generate ``vasp_ncl`` ``DefectDictSet`` attribute for spin-orbit coupling singlepoint (static) energy calculations. If not set, then by default is set to True if the max atomic number (Z) in the structure is >= 31 (i.e. further down the periodic table than Zn). @@ -1941,9 +1941,9 @@ def __init__( with quotation marks (e.g. ``{"ALGO": "All"}``). (default: None) user_kpoints_settings (dict or Kpoints): - Dictionary of user KPOINTS settings (in pymatgen DictSet format) + Dictionary of user KPOINTS settings (in ``pymatgen`` ``VaspInputSet`` format) e.g. {"reciprocal_density": 123}, or a Kpoints object, to use for the - ``vasp_std``, ``vasp_nkred_std`` and ``vasp_ncl`` DefectDictSets (Γ-only for + ``vasp_std``, ``vasp_nkred_std`` and ``vasp_ncl`` ``DefectVaspInputSet``\s (Γ-only for ``vasp_gam``). Default is Gamma-centred, reciprocal_density = 100 [Å⁻³]. user_potcar_functional (str): POTCAR functional to use. Default is "PBE" and if this fails, tries @@ -1960,23 +1960,23 @@ def __init__( Dictionary of {defect_species: DefectEntry} for the input defect species, for which to generate VASP input files. bulk_vasp_gam (DefectDictSet): - DefectDictSet for a `bulk` Γ-point-only singlepoint (static) + ``DefectDictSet`` for a `bulk` Γ-point-only singlepoint (static) supercell calculation. Often not used, as the bulk supercell only needs to be calculated once with the same settings as the final defect calculations, which may be with ``vasp_std`` or ``vasp_ncl``. bulk_vasp_nkred_std (DefectDictSet): - DefectDictSet for a singlepoint (static) `bulk` ``vasp_std`` supercell + ``DefectDictSet`` for a singlepoint (static) `bulk` ``vasp_std`` supercell calculation (i.e. with a non-Γ-only kpoint mesh) and ``NKRED(X,Y,Z)`` INCAR tag(s) to downsample kpoints for the HF exchange part of the hybrid DFT calculation. Not generated for GGA calculations (if ``LHFCALC`` is set to ``False`` in user_incar_settings) or if only Gamma kpoint sampling is required. bulk_vasp_std (DefectDictSet): - DefectDictSet for a singlepoint (static) `bulk` ``vasp_std`` supercell + ``DefectDictSet`` for a singlepoint (static) `bulk` ``vasp_std`` supercell calculation with a non-Γ-only kpoint mesh, not using ``NKRED``. Not generated if only Gamma kpoint sampling is required. bulk_vasp_ncl (DefectDictSet): - DefectDictSet for singlepoint (static) energy calculation of the `bulk` + ``DefectDictSet`` for singlepoint (static) energy calculation of the `bulk` supercell with SOC included. Generated if ``soc=True``. If ``soc`` is not set, then by default is only generated for systems with a max atomic number (Z) >= 31 (i.e. further down the periodic table than Zn). diff --git a/tests/data/VO2_POSCAR b/tests/data/VO2_POSCAR new file mode 100644 index 00000000..4e50cd4c --- /dev/null +++ b/tests/data/VO2_POSCAR @@ -0,0 +1,14 @@ +V2 O4 +1.0 + 3.0354292200000002 0.0000000000000000 0.0000000000000000 + 0.0000000000000000 4.5150226299999998 0.0000000000000000 + 0.0000000000000000 0.0000000000000000 4.5150226299999998 +O V +4 2 +direct + 0.0000000000000000 0.7006565100000000 0.7006565100000000 O + 0.4999999300000000 0.7993435900000000 0.2006572000000000 O + 0.0000000000000000 0.2993421100000000 0.2993421100000000 O + 0.4999999300000000 0.2006572000000000 0.7993435900000000 O + 0.4999999300000000 0.4999993100000000 0.4999993100000000 V + 0.0000000000000000 0.0000000000000000 0.0000000000000000 V diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 1df04384..c8abd757 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -2691,6 +2691,7 @@ def test_bulk_defect_compatibility_checks(self): skip_corrections=True, parse_projected_eigen=False, ) + print([str(warning.message) for warning in w]) # for debugging assert len(w) == 1 assert all( i in str(w[-1].message) @@ -2731,6 +2732,7 @@ def test_bulk_defect_compatibility_checks(self): skip_corrections=True, parse_projected_eigen=False, ) + print([str(warning.message) for warning in w]) # for debugging assert len(w) == 1 assert all( i in str(w[-1].message) @@ -2758,6 +2760,7 @@ def test_bulk_defect_compatibility_checks(self): skip_corrections=True, parse_projected_eigen=False, ) + print([str(warning.message) for warning in w]) # for debugging assert len(w) == 2 # now INCAR and KPOINTS warnings! assert any( all( @@ -2789,6 +2792,7 @@ def test_bulk_defect_compatibility_checks(self): charge_state=+1, # manually specify charge state here, as our edited POTCAR doesn't exist parse_projected_eigen=False, ) + print([str(warning.message) for warning in w]) # for debugging assert len(w) == 3 # now INCAR and KPOINTS and POTCAR warnings! assert any( all( @@ -2821,6 +2825,7 @@ def test_bulk_defect_compatibility_checks(self): charge_state=+1, # manually specify charge state here, as our edited POTCAR doesn't exist parse_projected_eigen=False, ) + print([str(warning.message) for warning in w]) # for debugging assert any( "The defect and bulk supercells are not the same size, having volumes of 513790.5 and " "2241.3 Å^3 respectively." in str(warning.message) diff --git a/tests/test_corrections.py b/tests/test_corrections.py index 61c02dfc..5e9a17f3 100644 --- a/tests/test_corrections.py +++ b/tests/test_corrections.py @@ -14,9 +14,8 @@ import matplotlib as mpl import numpy as np import pytest -from pymatgen.core.sites import PeriodicSite +from pymatgen.core.structure import PeriodicSite, Structure from pymatgen.entries.computed_entries import ComputedStructureEntry -from pymatgen.util.testing import PymatgenTest from test_analysis import if_present_rm from doped import analysis @@ -30,15 +29,15 @@ data_dir = os.path.join(module_path, "data") -class FiniteSizeChargeCorrectionTest(PymatgenTest): +class FiniteSizeChargeCorrectionTest(unittest.TestCase): """ - Test functions for getting freysoldt and kumagai corrections. + Test functions for getting Freysoldt (FNV) and Kumagai (eFNV) corrections. """ def setUp(self): self.dielectric = 15.0 - struct = PymatgenTest.get_structure("VO2") + struct = Structure.from_file(os.path.join(data_dir, "VO2_POSCAR")) struct.make_supercell(3) vac = Vacancy(struct, struct.sites[0], charge=-3) From 2b0ca974234d265f99dca6e11a66cc61cfca8f23 Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Tue, 4 Jun 2024 13:48:37 -0400 Subject: [PATCH 09/10] Remove `typing_extensions` requirement now new `pymatgen` released --- docs/Dev_ToDo.md | 1 - pyproject.toml | 3 --- 2 files changed, 4 deletions(-) diff --git a/docs/Dev_ToDo.md b/docs/Dev_ToDo.md index 4375ce75..289a24f3 100644 --- a/docs/Dev_ToDo.md +++ b/docs/Dev_ToDo.md @@ -87,7 +87,6 @@ there's any useful functionality we want to add! ## SK To-Do for next update: -- Remove `typing-extensions` requirement from `pyproject.toml`, once new `pymatgen-analysis-defects` released. - `doped` repo/docs cleanup `TODO`s above - Quick-start tutorial suggested by Alex G - Add chempot grid plotting tool, shown in `JOSS_plots` using Alex's chemical potential grid, and test (and remove TODO from JOSS plots notebook). diff --git a/pyproject.toml b/pyproject.toml index 974d17ab..82ba059e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,9 +35,6 @@ dependencies = [ "vise>=0.9.0", # for eigenvalue analysis, >=0.9.0 required for SOC compatibility "easyunfold>=0.3.6", # for efficient PROCAR parsing (and supporting SOC), # if any dependency issues arise can make easyunfold an optional dependency - "typing-extensions" # to fix `pymatgen` typing issues - # (https://github.com/materialsproject/pymatgen/pull/3752) - # TODO: Remove once new pymatgen released ] [project.urls] From ff209d39c844d823fbdff648bc363fcf6d5151f6 Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Wed, 5 Jun 2024 12:59:09 -0400 Subject: [PATCH 10/10] Bump changelog and version numbers for release --- CHANGELOG.rst | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 25547693..b1474c55 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,12 @@ Change Log ========== +v.2.4.4 +---------- +- Make oxidation state guessing more efficient, semi-significant speed up in generation/parsing for tough cases. +- Add `bulk_site_concentration` property to `DefectEntry`, giving the concentration of the corresponding lattice site of that defect in the pristine bulk. +- Minor updates to ensure compatibility with recent ``pymatgen`` and ``ASE`` releases. + v.2.4.3 ---------- - Remove ``spglib<=2.0.2`` dependency (set to avoid unnecessary warnings), and update installation instructions accordingly. diff --git a/pyproject.toml b/pyproject.toml index 82ba059e..59bdb5f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "doped" -version = "2.4.3" +version = "2.4.4" description = "Python package to setup, process and analyse solid-state defect calculations with VASP" authors = [{name = "Seán Kavanagh", email = "skavanagh@seas.harvard.edu"}] readme = "README.md"