From 793d481f5d478baad18bd86cc5e05c4b00727b3b Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Mon, 27 May 2024 17:03:45 +0100
Subject: [PATCH 01/10] Return per site concentrations as percentages in
 `get_equilibrium_concentrations`

---
 doped/core.py           |  3 ++-
 doped/thermodynamics.py | 21 +++++++++++++++------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/doped/core.py b/doped/core.py
index ab84e5d1..8a6c27fe 100644
--- a/doped/core.py
+++ b/doped/core.py
@@ -1085,7 +1085,8 @@ def equilibrium_concentration(
                 referenced to the VBM. Default is 0 (i.e. the VBM).
             per_site (bool):
                 Whether to return the concentration as fractional concentration per site,
-                rather than the default of per cm^3. (default: False)
+                rather than the default of per cm^3. Multiply by 100 for concentration in
+                percent. (default: False)
             symprec (float):
                 Symmetry tolerance for ``spglib`` to use when determining relaxed defect
                 point symmetries and thus orientational degeneracies. Default is ``0.1``
diff --git a/doped/thermodynamics.py b/doped/thermodynamics.py
index 5b3dbc59..bc5da28a 100644
--- a/doped/thermodynamics.py
+++ b/doped/thermodynamics.py
@@ -1227,7 +1227,7 @@ def get_equilibrium_concentrations(
                 states (e.g. ``v_Cd_0``, ``v_Cd_-1``, ``v_Cd_-2`` instead of ``v_Cd``).
                 (default: True)
             per_site (bool):
-                Whether to return the concentrations as fractional concentrations per site,
+                Whether to return the concentrations as percent concentrations per site,
                 rather than the default of per cm^3. (default: False)
             skip_formatting (bool):
                 Whether to skip formatting the defect charge states and concentrations as
@@ -1252,7 +1252,7 @@ def get_equilibrium_concentrations(
                 fermi_level=fermi_level,
                 vbm=defect_entry.calculation_metadata.get("vbm", self.vbm),
             )
-            concentration = defect_entry.equilibrium_concentration(
+            raw_concentration = defect_entry.equilibrium_concentration(
                 chempots=chempots,
                 limit=limit,
                 el_refs=el_refs,
@@ -1261,6 +1261,17 @@ def get_equilibrium_concentrations(
                 temperature=temperature,
                 per_site=per_site,
             )
+            if skip_formatting:
+                concentration = raw_concentration
+
+            elif per_site:
+                concentration = (
+                    f"{raw_concentration:.3%}"  # greater than 0.001%
+                    if raw_concentration > 1e-5
+                    else f"{raw_concentration * 100:.3e} %"
+                )
+            else:
+                concentration = f"{raw_concentration:.3e}"
             energy_concentration_list.append(
                 {
                     "Defect": defect_entry.name.rsplit("_", 1)[0],  # name without charge
@@ -1271,10 +1282,8 @@ def get_equilibrium_concentrations(
                         else f"{'+' if defect_entry.charge_state > 0 else ''}{defect_entry.charge_state}"
                     ),
                     "Formation Energy (eV)": round(formation_energy, 3),
-                    "Raw Concentration": concentration,
-                    "Concentration (per site)" if per_site else "Concentration (cm^-3)": (
-                        concentration if skip_formatting else f"{concentration:.3e}"
-                    ),
+                    "Raw Concentration": raw_concentration,
+                    "Concentration (per site)" if per_site else "Concentration (cm^-3)": concentration,
                 }
             )
 

From 8e0ec578c517fbd68d9fa6592e3516ffcc4e3abd Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Mon, 27 May 2024 14:13:09 -0400
Subject: [PATCH 02/10] Add `bulk_site_concentration` property and tests

---
 doped/core.py                | 15 ++++++++++++---
 tests/test_analysis.py       |  4 ++++
 tests/test_generation.py     |  7 +++++++
 tests/test_thermodynamics.py | 13 ++++++++++---
 4 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/doped/core.py b/doped/core.py
index 8a6c27fe..2c3e9815 100644
--- a/doped/core.py
+++ b/doped/core.py
@@ -1150,10 +1150,19 @@ def equilibrium_concentration(
         if per_site:
             return exp_factor * degeneracy_factor
 
-        volume_in_cm3 = self.defect.structure.volume * 1e-24  # convert volume in Å^3 to cm^3
-
         with np.errstate(over="ignore"):
-            return self.defect.multiplicity * degeneracy_factor * exp_factor / volume_in_cm3
+            return self.bulk_site_concentration * degeneracy_factor * exp_factor
+
+    @property
+    def bulk_site_concentration(self):
+        """
+        Return the site concentration (in cm^-3) of the corresponding atomic
+        site of the defect in the pristine bulk material (e.g. if the defect is
+        V_O in SrTiO3, returns the site concentration of (symmetry-equivalent)
+        oxygen atoms in SrTiO3).
+        """
+        volume_in_cm3 = self.defect.structure.volume * 1e-24  # convert volume in Å^3 to cm^3
+        return self.defect.multiplicity / volume_in_cm3
 
     def __repr__(self):
         """
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 98aefa41..782b8fc4 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -970,6 +970,10 @@ def test_SrTiO3_diff_ISYM_bulk_defect(self):
         assert dp.defect_dict["vac_O_0"].calculation_metadata["relaxed point symmetry"] == "C2v"
         thermo = dp.get_defect_thermodynamics()
 
+        # hardcoded check of bulk_site_concentration property:
+        for defect_entry in dp.defect_dict.values():  # oxygen site concentration
+            assert np.isclose(defect_entry.bulk_site_concentration, 5.06752e22, rtol=1e-4)
+
         print(thermo.get_symmetries_and_degeneracies())
 
         return thermo.plot()
diff --git a/tests/test_generation.py b/tests/test_generation.py
index d58650d3..7fcec6c6 100644
--- a/tests/test_generation.py
+++ b/tests/test_generation.py
@@ -1772,6 +1772,13 @@ def CdTe_defect_gen_check(self, CdTe_defect_gen, generate_supercell=True):
             + "\n---------------------------------------------------------\n"
             + self.CdTe_defect_gen_info
         )
+        for defect_entry in CdTe_defect_gen.defect_entries.values():
+            if defect_entry.defect.defect_type != DefectType.Interstitial:
+                assert np.isclose(defect_entry.bulk_site_concentration, 1e24 / self.prim_cdte.volume)
+            else:
+                assert np.isclose(
+                    defect_entry.bulk_site_concentration, 1e24 / self.prim_cdte.volume
+                ) or np.isclose(defect_entry.bulk_site_concentration, 4e24 / self.prim_cdte.volume)
 
         # explicitly test defect entry charge state log:
         assert CdTe_defect_gen.defect_entries["v_Cd_-1"].charge_state_guessing_log == [
diff --git a/tests/test_thermodynamics.py b/tests/test_thermodynamics.py
index d8ddd13f..0f403c33 100644
--- a/tests/test_thermodynamics.py
+++ b/tests/test_thermodynamics.py
@@ -1445,7 +1445,6 @@ def test_parse_chempots_CdTe(self):
         self._check_chempots_dict(self.CdTe_defect_thermo.chempots)
         assert self.CdTe_defect_thermo.chempots == self.CdTe_chempots
         assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"]
-        self.CdTe_defect_thermo.el_refs = self.CdTe_chempots["elemental_refs"]
         self._check_chempots_dict(self.CdTe_defect_thermo.chempots)
         assert self.CdTe_defect_thermo.chempots == self.CdTe_chempots  # the same
         assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"]  # the same
@@ -1460,7 +1459,6 @@ def test_parse_chempots_CdTe(self):
         assert self.CdTe_defect_thermo.chempots == semi_manual_chempots_dict
         assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"]
 
-        self.CdTe_defect_thermo.el_refs = self.CdTe_chempots["elemental_refs"]
         self._check_chempots_dict(self.CdTe_defect_thermo.chempots)
         assert self.CdTe_defect_thermo.chempots == semi_manual_chempots_dict  # the same
         assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"]  # the same
@@ -1477,7 +1475,6 @@ def test_parse_chempots_CdTe(self):
         ]
         assert self.CdTe_defect_thermo.chempots == manual_zeroed_rel_chempots_dict
         assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"]  # unchanged
-        self.CdTe_defect_thermo.el_refs = self.CdTe_chempots["elemental_refs"]
         self._check_chempots_dict(self.CdTe_defect_thermo.chempots)
         assert self.CdTe_defect_thermo.el_refs == self.CdTe_chempots["elemental_refs"]  # the same
         assert self.CdTe_defect_thermo.chempots == manual_zeroed_rel_chempots_dict  # the same
@@ -1834,6 +1831,16 @@ def test_formation_energy_mult_degen(self):
                     )
                     assert np.isclose(new_conc, orig_conc * 21)
 
+                    # test per_site and bulk_site_concentration attributes:
+                    new_conc = random_defect_entry.equilibrium_concentration(
+                        chempots=self.CdTe_chempots,
+                        limit="Cd-rich",
+                        fermi_level=fermi_level,
+                        temperature=temperature,
+                        per_site=True,
+                    )
+                    assert np.isclose(orig_conc, new_conc * random_defect_entry.bulk_site_concentration)
+
     @custom_mpl_image_compare(filename="CdTe_duplicate_entry_names.png")
     def test_handling_duplicate_entry_names(self):
         """

From a62743c252ddfccc84a1500ac3b1cce4fb8343cf Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Tue, 28 May 2024 11:45:48 -0400
Subject: [PATCH 03/10] Ensure 'no chempots' warning only shown once for
 concentration functions, add `per_charge`, `per_site` and `skip_formatting`
 options to `get_quenched_fermi_level_and_concentrations` as well, plus some
 general efficiency/robustness updates, and add tests

---
 doped/thermodynamics.py | 259 ++++++++++++++++++++++++++++------------
 tests/test_analysis.py  | 158 +++++++++++++++++++++++-
 2 files changed, 336 insertions(+), 81 deletions(-)

diff --git a/doped/thermodynamics.py b/doped/thermodynamics.py
index bc5da28a..f0da71f3 100644
--- a/doped/thermodynamics.py
+++ b/doped/thermodynamics.py
@@ -1231,7 +1231,7 @@ def get_equilibrium_concentrations(
                 rather than the default of per cm^3. (default: False)
             skip_formatting (bool):
                 Whether to skip formatting the defect charge states and concentrations as
-                strings (and keep as ints and floats instead). (default: False)
+                strings (and keep as ``int``\s and ``float``\s instead). (default: False)
 
         Returns:
             ``pandas`` ``DataFrame`` of defect concentrations (and formation energies)
@@ -1261,17 +1261,7 @@ def get_equilibrium_concentrations(
                 temperature=temperature,
                 per_site=per_site,
             )
-            if skip_formatting:
-                concentration = raw_concentration
-
-            elif per_site:
-                concentration = (
-                    f"{raw_concentration:.3%}"  # greater than 0.001%
-                    if raw_concentration > 1e-5
-                    else f"{raw_concentration * 100:.3e} %"
-                )
-            else:
-                concentration = f"{raw_concentration:.3e}"
+
             energy_concentration_list.append(
                 {
                     "Defect": defect_entry.name.rsplit("_", 1)[0],  # name without charge
@@ -1283,7 +1273,11 @@ def get_equilibrium_concentrations(
                     ),
                     "Formation Energy (eV)": round(formation_energy, 3),
                     "Raw Concentration": raw_concentration,
-                    "Concentration (per site)" if per_site else "Concentration (cm^-3)": concentration,
+                    (
+                        "Concentration (per site)" if per_site else "Concentration (cm^-3)"
+                    ): _format_concentration(
+                        raw_concentration, per_site=per_site, skip_formatting=skip_formatting
+                    ),
                 }
             )
 
@@ -1294,24 +1288,14 @@ def get_equilibrium_concentrations(
                 "Raw Concentration"
             ].transform("sum")
             conc_df["Charge State Population"] = conc_df["Charge State Population"].apply(
-                lambda x: f"{x:.1%}"
+                lambda x: f"{x:.2%}"
             )
             conc_df = conc_df.drop(columns=["Raw Charge", "Raw Concentration"])
             return conc_df.reset_index(drop=True)
 
         # group by defect and sum concentrations:
-        summed_df = conc_df.groupby("Defect").sum(numeric_only=True)
-        summed_df[next(k for k in conc_df.columns if k.startswith("Concentration"))] = (
-            summed_df["Raw Concentration"]
-            if skip_formatting
-            else summed_df["Raw Concentration"].apply(lambda x: f"{x:.3e}")
-        )
-        return summed_df.drop(
-            columns=[
-                i
-                for i in ["Charge", "Formation Energy (eV)", "Raw Charge", "Raw Concentration"]
-                if i in summed_df.columns
-            ]
+        return _group_defect_charge_state_concentrations(
+            conc_df, per_site=per_site, skip_formatting=skip_formatting
         )
 
     def _parse_fermi_dos(self, bulk_dos_vr: Union[str, Vasprun, FermiDos]):
@@ -1442,6 +1426,9 @@ def get_equilibrium_fermi_level(
         chempots, el_refs = self._get_chempots(
             chempots, el_refs
         )  # returns self.chempots/self.el_refs if chempots is None
+        if chempots is None:  # handle once here, as otherwise brentq function results in this being
+            # called many times
+            _no_chempots_warning()
 
         def _get_total_q(fermi_level):
             conc_df = self.get_equilibrium_concentrations(
@@ -1458,6 +1445,8 @@ def _get_total_q(fermi_level):
 
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", "overflow")  # ignore overflow warnings
+            warnings.filterwarnings("ignore", "No chemical potentials")  # ignore chempots warning,
+            # as given once above
 
             eq_fermi_level: float = brentq(_get_total_q, -1.0, self.band_gap + 1.0)  # type: ignore
             if return_concs:
@@ -1477,6 +1466,9 @@ def get_quenched_fermi_level_and_concentrations(
         annealing_temperature: float = 1000,
         quenched_temperature: float = 300,
         delta_gap: float = 0,
+        per_charge: bool = True,
+        per_site: bool = False,
+        skip_formatting: bool = False,
         **kwargs,
     ) -> tuple[float, float, float, pd.DataFrame]:
         r"""
@@ -1601,6 +1593,16 @@ def get_quenched_fermi_level_and_concentrations(
                 about the VBM and CBM (i.e. assuming equal up/downshifts of the band-edges
                 around their original eigenvalues) while the defect levels remain fixed.
                 (Default: 0)
+            per_charge (bool):
+                Whether to break down the defect concentrations into individual defect charge
+                states (e.g. ``v_Cd_0``, ``v_Cd_-1``, ``v_Cd_-2`` instead of ``v_Cd``).
+                (default: True)
+            per_site (bool):
+                Whether to return the concentrations as percent concentrations per site,
+                rather than the default of per cm^3. (default: False)
+            skip_formatting (bool):
+                Whether to skip formatting the defect charge states and concentrations as
+                strings (and keep as ``int``\s and ``float``\s instead). (default: False)
             **kwargs:
                 Additional keyword arguments to pass to ``scissor_dos`` (if ``delta_gap``
                 is not 0).
@@ -1611,12 +1613,19 @@ def get_quenched_fermi_level_and_concentrations(
             quenched defect concentrations (in cm^-3).
             (fermi_level, e_conc, h_conc, conc_df)
         """
+        if kwargs and any(i not in ["verbose", "tol"] for i in kwargs):
+            raise ValueError(f"Invalid keyword arguments: {', '.join(kwargs.keys())}")
+
         # TODO: Update docstrings after `py-sc-fermi` interface written, to point toward it for more
         #  advanced analysis
         fermi_dos = self._parse_fermi_dos(bulk_dos_vr)
         chempots, el_refs = self._get_chempots(
             chempots, el_refs
         )  # returns self.chempots/self.el_refs if chempots is None
+        if chempots is None:  # handle once here, as otherwise brentq function results in this being
+            # called many times
+            _no_chempots_warning()
+
         annealing_dos = (
             fermi_dos
             if delta_gap == 0
@@ -1627,67 +1636,117 @@ def get_quenched_fermi_level_and_concentrations(
                 tol=kwargs.get("tol", 1e-8),
             )
         )
-        annealing_fermi_level = self.get_equilibrium_fermi_level(
-            annealing_dos,
-            chempots=chempots,
-            limit=limit,
-            el_refs=el_refs,
-            temperature=annealing_temperature,
-            return_concs=False,
-        )
-        annealing_defect_concentrations = self.get_equilibrium_concentrations(
-            chempots=chempots,
-            limit=limit,
-            el_refs=el_refs,
-            fermi_level=annealing_fermi_level,  # type: ignore
-            temperature=annealing_temperature,
-            per_charge=False,  # give total concentrations for each defect
-            skip_formatting=True,
-        )
-        annealing_defect_concentrations = annealing_defect_concentrations.rename(
-            columns={"Concentration (cm^-3)": "Total Concentration (cm^-3)"}
-        )
 
-        def _get_constrained_total_q(fermi_level, return_conc_df=False):
-            conc_df = self.get_equilibrium_concentrations(
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "overflow")  # ignore overflow warnings
+            warnings.filterwarnings("ignore", "No chemical potentials")  # ignore chempots warning,
+            # as given once above
+
+            annealing_fermi_level = self.get_equilibrium_fermi_level(
+                annealing_dos,
                 chempots=chempots,
                 limit=limit,
                 el_refs=el_refs,
-                temperature=quenched_temperature,
-                fermi_level=fermi_level,
+                temperature=annealing_temperature,
+                return_concs=False,
+            )
+            annealing_defect_concentrations = self.get_equilibrium_concentrations(
+                chempots=chempots,
+                limit=limit,
+                el_refs=el_refs,
+                fermi_level=annealing_fermi_level,  # type: ignore
+                temperature=annealing_temperature,
+                per_charge=False,  # give total concentrations for each defect
                 skip_formatting=True,
             )
+            annealing_defect_concentrations = annealing_defect_concentrations.rename(
+                columns={"Concentration (cm^-3)": "Total Concentration (cm^-3)"}
+            )
+
+            def _get_constrained_concentrations(
+                fermi_level, per_charge=True, per_site=False, skip_formatting=False
+            ):
+                conc_df = self.get_equilibrium_concentrations(
+                    chempots=chempots,
+                    limit=limit,
+                    el_refs=el_refs,
+                    temperature=quenched_temperature,
+                    fermi_level=fermi_level,
+                    skip_formatting=True,
+                )
+
+                conc_df = conc_df.merge(annealing_defect_concentrations, on="Defect")
+                conc_df["Concentration (cm^-3)"] = (  # set total concentration to match annealing conc
+                    conc_df["Concentration (cm^-3)"]  # but with same relative concentrations
+                    / conc_df.groupby("Defect")["Concentration (cm^-3)"].transform("sum")
+                ) * conc_df["Total Concentration (cm^-3)"]
+
+                if not per_charge:
+                    conc_df = _group_defect_charge_state_concentrations(
+                        conc_df, per_site, skip_formatting=True
+                    )
+
+                if per_site:
+                    cm3_conc_df = self.get_equilibrium_concentrations(
+                        chempots=chempots,
+                        limit=limit,
+                        el_refs=el_refs,
+                        temperature=quenched_temperature,
+                        fermi_level=fermi_level,
+                        skip_formatting=True,
+                        per_charge=per_charge,
+                    )
+                    per_site_conc_df = self.get_equilibrium_concentrations(
+                        chempots=chempots,
+                        limit=limit,
+                        el_refs=el_refs,
+                        temperature=quenched_temperature,
+                        fermi_level=fermi_level,
+                        skip_formatting=True,
+                        per_site=True,
+                        per_charge=per_charge,
+                    )
+                    per_site_factors = (
+                        per_site_conc_df["Concentration (per site)"] / cm3_conc_df["Concentration (cm^-3)"]
+                    )
+                    conc_df["Concentration (per site)"] = (
+                        conc_df["Concentration (cm^-3)"] * per_site_factors
+                    )
+                    conc_df = conc_df.drop(columns=["Concentration (cm^-3)"])
+
+                    if not skip_formatting:
+                        conc_df["Concentration (per site)"] = conc_df["Concentration (per site)"].apply(
+                            _format_per_site_concentration
+                        )
 
-            conc_df = conc_df.merge(annealing_defect_concentrations, on="Defect")
-            conc_df["Concentration (cm^-3)"] = (  # set total concentration to match annealing conc
-                conc_df["Concentration (cm^-3)"]  # but with same relative concentrations
-                / conc_df.groupby("Defect")["Concentration (cm^-3)"].transform("sum")
-            ) * conc_df["Total Concentration (cm^-3)"]
+                elif not skip_formatting:
+                    conc_df["Concentration (cm^-3)"] = conc_df["Concentration (cm^-3)"].apply(
+                        lambda x: f"{x:.3e}"
+                    )
 
-            if return_conc_df:
                 return conc_df
-            qd_tot = (conc_df["Charge"] * conc_df["Concentration (cm^-3)"]).sum()
-            qd_tot += fermi_dos.get_doping(
-                fermi_level=fermi_level + self.vbm, temperature=quenched_temperature
-            )
-            return qd_tot
 
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", "overflow")  # ignore overflow warnings
+            def _get_constrained_total_q(fermi_level, return_conc_df=False):
+                conc_df = _get_constrained_concentrations(fermi_level, skip_formatting=True)
+                qd_tot = (conc_df["Charge"] * conc_df["Concentration (cm^-3)"]).sum()
+                qd_tot += fermi_dos.get_doping(
+                    fermi_level=fermi_level + self.vbm, temperature=quenched_temperature
+                )
+                return qd_tot
 
             eq_fermi_level: float = brentq(
                 _get_constrained_total_q, -1.0, self.band_gap + 1.0  # type: ignore
             )
+            e_conc, h_conc = get_e_h_concs(
+                fermi_dos, eq_fermi_level + self.vbm, quenched_temperature  # type: ignore
+            )
 
-        e_conc, h_conc = get_e_h_concs(
-            fermi_dos, eq_fermi_level + self.vbm, quenched_temperature  # type: ignore
-        )
-        return (
-            eq_fermi_level,
-            e_conc,
-            h_conc,
-            _get_constrained_total_q(eq_fermi_level, return_conc_df=True),
-        )
+            return (
+                eq_fermi_level,
+                e_conc,
+                h_conc,
+                _get_constrained_concentrations(eq_fermi_level, per_charge, per_site, skip_formatting),
+            )
 
     def get_formation_energy(
         self,
@@ -2568,7 +2627,7 @@ def get_formation_energies(
                 If None (default), set to the mid-gap Fermi level (E_g/2).
             skip_formatting (bool):
                 Whether to skip formatting the defect charge states as
-                strings (and keep as ints and floats instead).
+                strings (and keep as ``int``\s and ``float``\s instead).
                 (default: False)
 
         Returns:
@@ -2617,7 +2676,7 @@ def _single_formation_energy_table(
         fermi_level: float = 0,
         skip_formatting: bool = False,
     ) -> pd.DataFrame:
-        """
+        r"""
         Returns a defect formation energy table for a single chemical potential
         limit as a pandas ``DataFrame``.
 
@@ -2659,7 +2718,7 @@ def _single_formation_energy_table(
                 used, or if not present, ``self.vbm``).
             skip_formatting (bool):
                 Whether to skip formatting the defect charge states as
-                strings (and keep as ints and floats instead).
+                strings (and keep as ``int``\s and ``float``\s instead).
                 (default: False)
 
         Returns:
@@ -2791,7 +2850,7 @@ def get_symmetries_and_degeneracies(
         Args:
             skip_formatting (bool):
                 Whether to skip formatting the defect charge states as
-                strings (and keep as ints and floats instead).
+                strings (and keep as ``int``\s and ``float``\s instead).
                 (default: False)
             symprec (float):
                 Symmetry tolerance for ``spglib`` to use when determining
@@ -2870,6 +2929,54 @@ def __repr__(self):
         )
 
 
+def _group_defect_charge_state_concentrations(conc_df, per_site=False, skip_formatting=False):
+    summed_df = conc_df.groupby("Defect").sum(numeric_only=True)
+    raw_concentrations = (
+        summed_df["Raw Concentration"]
+        if "Raw Concentration" in summed_df.columns
+        else summed_df[next(k for k in conc_df.columns if k.startswith("Concentration"))]
+    )
+    summed_df[next(k for k in conc_df.columns if k.startswith("Concentration"))] = (
+        raw_concentrations.apply(
+            lambda x: _format_concentration(x, per_site=per_site, skip_formatting=skip_formatting)
+        )
+    )
+    return summed_df.drop(
+        columns=[
+            i
+            for i in [
+                "Charge",
+                "Formation Energy (eV)",
+                "Raw Charge",
+                "Raw Concentration",
+                "Charge State Population",
+            ]
+            if i in summed_df.columns
+        ]
+    )
+
+
+def _format_concentration(raw_concentration, per_site=False, skip_formatting=False):
+    """
+    Format concentration values for ``DataFrame`` outputs.
+    """
+    if skip_formatting:
+        return raw_concentration
+    if per_site:
+        return _format_per_site_concentration(raw_concentration)
+
+    return f"{raw_concentration:.3e}"
+
+
+def _format_per_site_concentration(raw_concentration):
+    """
+    Format per-site concentrations for ``DataFrame`` outputs.
+    """
+    if raw_concentration > 1e-5:
+        return f"{raw_concentration:.3%}"
+    return f"{raw_concentration * 100:.3e} %"
+
+
 def get_e_h_concs(fermi_dos: FermiDos, fermi_level: float, temperature: float) -> tuple[float, float]:
     """
     Get the corresponding electron and hole concentrations (in cm^-3) for a
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 782b8fc4..1df04384 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -1,6 +1,7 @@
 """
-Tests for the `doped.analysis` module, which also implicitly tests most of the
-`doped.utils.parsing` module.
+Tests for the ``doped.analysis`` module, which also implicitly tests most of
+the ``doped.utils.parsing`` module, and some ``doped.thermodynamics``
+functions.
 """
 
 import gzip
@@ -920,10 +921,12 @@ def test_V2O5_same_named_defects(self):
         return thermo.plot(limit="V2O5-O2")
 
     @custom_mpl_image_compare(filename="SrTiO3_v_O.png")
-    def test_SrTiO3_diff_ISYM_bulk_defect(self):
+    def test_SrTiO3_diff_ISYM_bulk_defect_and_concentration_funcs(self):
         """
         Test parsing SrTiO3 defect calculations, where a different ISYM was
-        used for the bulk (= 3) compared to the defect (= 0) calculations.
+        used for the bulk (= 3) compared to the defect (= 0) calculations, as
+        well as the ``DefectThermodynamics`` concentration functions with
+        various options.
 
         Previously this failed because the bulk/defect kpoints could not be
         properly compared.
@@ -971,11 +974,156 @@ def test_SrTiO3_diff_ISYM_bulk_defect(self):
         thermo = dp.get_defect_thermodynamics()
 
         # hardcoded check of bulk_site_concentration property:
+        sto_O_site_conc = 5.067520709900586e22
         for defect_entry in dp.defect_dict.values():  # oxygen site concentration
-            assert np.isclose(defect_entry.bulk_site_concentration, 5.06752e22, rtol=1e-4)
+            assert np.isclose(defect_entry.bulk_site_concentration, sto_O_site_conc, rtol=1e-4)
 
         print(thermo.get_symmetries_and_degeneracies())
 
+        conc_df = thermo.get_equilibrium_concentrations()  # no chempots or Fermi level
+        print("conc_df", conc_df)  # for debugging
+        srtio3_V_O_conc_lists = [  # with no chempots or Fermi level (so using Eg/2)
+            ["vac_O", "+2", 9.742, "4.456e-141", "100.00%"],
+            ["vac_O", "+1", 11.043, "2.497e-162", "0.00%"],
+            ["vac_O", "0", 12.635, "1.109e-189", "0.00%"],
+        ]
+        for i, row in enumerate(srtio3_V_O_conc_lists):
+            print(i, row)
+            assert list(conc_df.iloc[i]) == row
+
+        per_site_conc_df = thermo.get_equilibrium_concentrations(per_site=True)
+        print("per_site_conc_df", per_site_conc_df)  # for debugging
+        for i, row in enumerate(srtio3_V_O_conc_lists):
+            print(i, row)
+            for j, val in enumerate(row):
+                if j != 3:
+                    assert per_site_conc_df.iloc[i, j] == val
+                else:
+                    assert np.isclose(
+                        float(per_site_conc_df.iloc[i, j][:-2]), 100 * float(row[3]) / sto_O_site_conc
+                    )
+
+        per_site_conc_df = thermo.get_equilibrium_concentrations(per_site=True, skip_formatting=True)
+        print("per_site_conc_df", per_site_conc_df)  # for debugging
+        for i, row in enumerate(srtio3_V_O_conc_lists):
+            print(i, row)
+            for j, val in enumerate(row):
+                if j not in [1, 3]:
+                    assert per_site_conc_df.iloc[i, j] == val
+                elif j == 3:
+                    assert np.isclose(
+                        per_site_conc_df.iloc[i, j], float(row[3]) / sto_O_site_conc, rtol=1e-5
+                    )
+                elif j == 1:
+                    assert per_site_conc_df.iloc[i, j] == int(val)  # charge states
+
+        assert thermo.get_equilibrium_concentrations(per_charge=False).to_numpy().tolist() == [
+            ["4.456e-141"]
+        ]
+        assert np.isclose(
+            float(
+                thermo.get_equilibrium_concentrations(
+                    per_charge=False, per_site=True, skip_formatting=True
+                )
+                .to_numpy()
+                .tolist()[0][0]
+            ),
+            4.456e-141 / sto_O_site_conc,
+        )
+        assert np.isclose(
+            float(
+                thermo.get_equilibrium_concentrations(per_charge=False, per_site=True)
+                .to_numpy()
+                .tolist()[0][0][:-2]
+            ),
+            100 * 4.456e-141 / sto_O_site_conc,
+            rtol=1e-3,
+        )
+
+        assert next(
+            iter(
+                thermo.get_equilibrium_concentrations(per_charge=False, fermi_level=1.710795)
+                .to_numpy()
+                .tolist()
+            )
+        ) == ["2.004e-142"]
+
+        per_site_conc_df = thermo.get_equilibrium_concentrations(per_site=True, fermi_level=1.710795)
+        print("per_site_conc_df", per_site_conc_df)  # for debugging
+        custom_fermi_concs = ["3.954e-163 %", "1.045e-183 %", "2.189e-210 %"]
+        for i, val in enumerate(custom_fermi_concs):
+            print(i, val)
+            assert per_site_conc_df.iloc[i, 3] == val
+
+        # test get_quenched_fermi_level_and_concentrations
+        fermi_level, e_conc, h_conc, conc_df = thermo.get_quenched_fermi_level_and_concentrations(
+            bulk_dos_vr=f"{self.SrTiO3_DATA_DIR}/bulk_sp333/vasprun.xml",
+            annealing_temperature=300,
+            per_charge=False,
+        )
+        print("conc_df", conc_df)  # for debugging
+        assert np.isclose(fermi_level, 1.710795, rtol=1e-4)
+        assert np.isclose(e_conc, h_conc, rtol=1e-4)  # same because defect concentration negligible
+        # without chempots
+        assert np.isclose(e_conc, 6.129e-7, rtol=1e-3)
+        assert conc_df.to_numpy().tolist() == [["2.004e-142", 6.010973640124676e-142]]
+        assert conc_df.index[0] == "vac_O"
+        assert conc_df.index.name == "Defect"
+
+        fermi_level, e_conc, h_conc, conc_df = thermo.get_quenched_fermi_level_and_concentrations(
+            bulk_dos_vr=f"{self.SrTiO3_DATA_DIR}/bulk_sp333/vasprun.xml",
+            annealing_temperature=300,
+            skip_formatting=True,
+        )
+        print("quenched conc_df", conc_df)  # for debugging
+        assert np.isclose(fermi_level, 1.710795, rtol=1e-4)
+        assert np.isclose(e_conc, h_conc, rtol=1e-4)  # same because defect concentration negligible
+        # without chempots
+        assert np.isclose(e_conc, 6.129e-7, rtol=1e-3)
+        quenched_conc_df_lists = [
+            ["vac_O", 2, 9.822, 2.0036578800415587e-142, "100.00%", 2.0036578800415587e-142],
+            ["vac_O", 1, 11.083, 5.2947236022031443e-163, "0.00%", 2.0036578800415587e-142],
+            ["vac_O", 0, 12.635, 1.1092122529257341e-189, "0.00%", 2.0036578800415587e-142],
+        ]
+        for i, row in enumerate(quenched_conc_df_lists):
+            print(i, row)
+            assert list(conc_df.iloc[i]) == row
+
+        fermi_level, e_conc, h_conc, conc_df = thermo.get_quenched_fermi_level_and_concentrations(
+            bulk_dos_vr=f"{self.SrTiO3_DATA_DIR}/bulk_sp333/vasprun.xml",
+            annealing_temperature=300,
+            per_site=True,
+        )
+        print("per_site quenched conc_df", conc_df)  # for debugging
+        assert np.isclose(fermi_level, 1.710795, rtol=1e-4)
+        assert np.isclose(e_conc, h_conc, rtol=1e-4)  # same because defect concentration negligible
+        # without chempots
+        assert np.isclose(e_conc, 6.129e-7, rtol=1e-3)
+        quenched_per_site_conc_df_lists = [
+            ["vac_O", 2, 9.822, "100.00%", 2.0036578800415587e-142, "3.954e-163 %"],
+            ["vac_O", 1, 11.083, "0.00%", 2.0036578800415587e-142, "1.045e-183 %"],
+            ["vac_O", 0, 12.635, "0.00%", 2.0036578800415587e-142, "2.189e-210 %"],
+        ]
+        for i, row in enumerate(quenched_per_site_conc_df_lists):
+            print(i, row)
+            assert list(conc_df.iloc[i]) == row
+
+        fermi_level, e_conc, h_conc, conc_df = thermo.get_quenched_fermi_level_and_concentrations(
+            bulk_dos_vr=f"{self.SrTiO3_DATA_DIR}/bulk_sp333/vasprun.xml",
+            annealing_temperature=300,
+            per_site=True,
+            per_charge=False,
+            skip_formatting=True,
+        )
+        print("per_site not per_charge quenched conc_df", conc_df)  # for debugging
+        assert np.isclose(fermi_level, 1.710795, rtol=1e-4)
+        assert np.isclose(e_conc, h_conc, rtol=1e-4)  # same because defect concentration negligible
+        # without chempots
+        assert np.isclose(e_conc, 6.129e-7, rtol=1e-3)
+        assert conc_df.to_numpy().tolist() == [[6.010973640124676e-142, 3.953921443531439e-165]]
+        assert conc_df.index[0] == "vac_O"
+        assert conc_df.index.name == "Defect"
+
         return thermo.plot()
 
     @custom_mpl_image_compare(filename="ZnS_defects.png")

From 47a88b8d462faa7291d97762f4ebf8a69b90103b Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Fri, 31 May 2024 17:06:57 -0400
Subject: [PATCH 04/10] Remove user potcar functional typing import (moved to
 `TYPE_CHECKING` block in new `pymatgen`)

---
 docs/Troubleshooting.rst |  2 +-
 doped/vasp.py            | 16 +++++++---------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/docs/Troubleshooting.rst b/docs/Troubleshooting.rst
index 5af2ab75..9ed26a38 100644
--- a/docs/Troubleshooting.rst
+++ b/docs/Troubleshooting.rst
@@ -14,7 +14,7 @@ message about the origin of the problem, it is likely to be an issue with your v
 
 .. code:: bash
 
-  pip install pymatgen pymatgen-analysis-defects --upgrade
+  pip install pymatgen pymatgen-analysis-defects monty --upgrade
   pip install doped --upgrade
 
 If this does not solve your issue, please check the specific cases noted below. If your issue still isn't
diff --git a/doped/vasp.py b/doped/vasp.py
index 62fca734..80d2ea10 100644
--- a/doped/vasp.py
+++ b/doped/vasp.py
@@ -20,7 +20,7 @@
 from pymatgen.core import SETTINGS
 from pymatgen.core.structure import Structure
 from pymatgen.io.vasp.inputs import BadIncarWarning, Kpoints, Poscar, Potcar
-from pymatgen.io.vasp.sets import DictSet, UserPotcarFunctional
+from pymatgen.io.vasp.sets import DictSet
 from tqdm import tqdm
 
 from doped import _doped_obj_properties_methods, _ignore_pmg_warnings
@@ -69,9 +69,7 @@ def deep_dict_update(d: dict, u: dict) -> dict:
 }
 
 
-def _test_potcar_functional_choice(
-    potcar_functional: UserPotcarFunctional = "PBE", symbols: Optional[list] = None
-):
+def _test_potcar_functional_choice(potcar_functional: str = "PBE", symbols: Optional[list] = None):
     """
     Check if the potcar functional choice needs to be changed to match those
     available.
@@ -138,7 +136,7 @@ def __init__(
         structure: Structure,
         user_incar_settings: Optional[dict] = None,
         user_kpoints_settings: Optional[Union[dict, Kpoints]] = None,
-        user_potcar_functional: UserPotcarFunctional = "PBE",
+        user_potcar_functional: str = "PBE",
         user_potcar_settings: Optional[dict] = None,
         auto_kpar: bool = True,
         poscar_comment: Optional[str] = None,
@@ -251,7 +249,7 @@ def potcar(self) -> Potcar:
         Redefined to intelligently handle ``pymatgen`` ``POTCAR`` issues.
         """
         if any("VASP_PSP_DIR" in i for i in SETTINGS):
-            self.user_potcar_functional: UserPotcarFunctional = _test_potcar_functional_choice(
+            self.user_potcar_functional: str = _test_potcar_functional_choice(
                 self.user_potcar_functional, self.potcar_symbols
             )
 
@@ -319,7 +317,7 @@ def __init__(
         charge_state: int = 0,
         user_incar_settings: Optional[dict] = None,
         user_kpoints_settings: Optional[Union[dict, Kpoints]] = None,
-        user_potcar_functional: UserPotcarFunctional = "PBE",
+        user_potcar_functional: str = "PBE",
         user_potcar_settings: Optional[dict] = None,
         poscar_comment: Optional[str] = None,
         **kwargs,
@@ -598,7 +596,7 @@ def __init__(
         soc: Optional[bool] = None,
         user_incar_settings: Optional[dict] = None,
         user_kpoints_settings: Optional[Union[dict, Kpoints]] = None,
-        user_potcar_functional: UserPotcarFunctional = "PBE",
+        user_potcar_functional: str = "PBE",
         user_potcar_settings: Optional[dict] = None,
         **kwargs,
     ):
@@ -1873,7 +1871,7 @@ def __init__(
         soc: Optional[bool] = None,
         user_incar_settings: Optional[dict] = None,
         user_kpoints_settings: Optional[Union[dict, Kpoints]] = None,
-        user_potcar_functional: UserPotcarFunctional = "PBE",
+        user_potcar_functional: str = "PBE",
         user_potcar_settings: Optional[dict] = None,
         **kwargs,  # to allow POTCAR testing on GH Actions
     ):

From abe4ef525d65cfd72a606d29f1cb6ee0446972df Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Fri, 31 May 2024 17:08:10 -0400
Subject: [PATCH 05/10] Temporarily add typing extensions dependency to fix
 pymatgen bug

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 82ba059e..974d17ab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,9 @@ dependencies = [
     "vise>=0.9.0",  # for eigenvalue analysis, >=0.9.0 required for SOC compatibility
     "easyunfold>=0.3.6",  # for efficient PROCAR parsing (and supporting SOC),
     # if any dependency issues arise can make easyunfold an optional dependency
+    "typing-extensions"  # to fix `pymatgen` typing issues
+    # (https://github.com/materialsproject/pymatgen/pull/3752)
+    # TODO: Remove once new pymatgen released
 ]
 
 [project.urls]

From 697e4898fb8d1ee1ed1c4349840c9d17b28fd2a9 Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Fri, 31 May 2024 17:21:09 -0400
Subject: [PATCH 06/10] Use new `Structure.from_ase_atoms()` method instead of
 `AseAtomsAdaptor` (nice one @Andrew-S-Rosen)

---
 examples/GGA_workflow_tutorial.ipynb | 16 +++++++++-------
 tests/test_generation.py             |  4 +---
 tests/test_vasp.py                   |  4 +---
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/examples/GGA_workflow_tutorial.ipynb b/examples/GGA_workflow_tutorial.ipynb
index 43ea55fe..75f3a8cc 100644
--- a/examples/GGA_workflow_tutorial.ipynb
+++ b/examples/GGA_workflow_tutorial.ipynb
@@ -387,9 +387,11 @@
     "from kgrid.series import cutoff_series\n",
     "from kgrid import calc_kpt_tuple\n",
     "import numpy as np\n",
+    "from pymatgen.core.structure import Structure\n",
+    "from pymatgen.io.ase import AseAtomsAdaptor\n",
     "\n",
     "def generate_kgrids_cutoffs(\n",
-    "    structure: pymatgen.core.structure.Structure,\n",
+    "    structure: Structure,\n",
     "    kmin: int = 4,\n",
     "    kmax: int = 20,\n",
     ") -> list:\n",
@@ -414,7 +416,6 @@
     "        l_min=kmin,\n",
     "        l_max=kmax,\n",
     "    )\n",
-    "    kspacing = [np.pi / c for c in kpoint_cutoffs]\n",
     "    kgrid_samples = [\n",
     "        calc_kpt_tuple(atoms, cutoff_length=(cutoff - 1e-4))\n",
     "        for cutoff in kpoint_cutoffs\n",
@@ -4694,17 +4695,19 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 48,
    "metadata": {},
+   "cell_type": "code",
    "outputs": [],
+   "execution_count": 9,
    "source": [
     "from kgrid.series import cutoff_series\n",
     "from kgrid import calc_kpt_tuple\n",
     "import numpy as np\n",
+    "from pymatgen.core.structure import Structure\n",
+    "from pymatgen.io.ase import AseAtomsAdaptor\n",
     "\n",
     "def generate_kgrids_cutoffs(\n",
-    "    structure: pymatgen.core.structure.Structure,\n",
+    "    structure: Structure,\n",
     "    kmin: int = 4,\n",
     "    kmax: int = 20,\n",
     ") -> list:\n",
@@ -4718,7 +4721,7 @@
     "        kmax (int, optional): _description_. Defaults to 20.\n",
     "\n",
     "    Returns:\n",
-    "        _type_: _description_\n",
+    "        list: list of kgrids\n",
     "    \"\"\"\n",
     "    # Transform struct to atoms\n",
     "    aaa = AseAtomsAdaptor()\n",
@@ -4729,7 +4732,6 @@
     "        l_min=kmin,\n",
     "        l_max=kmax,\n",
     "    )\n",
-    "    kspacing = [np.pi / c for c in kpoint_cutoffs]\n",
     "    kgrid_samples = [\n",
     "        calc_kpt_tuple(atoms, cutoff_length=(cutoff - 1e-4))\n",
     "        for cutoff in kpoint_cutoffs\n",
diff --git a/tests/test_generation.py b/tests/test_generation.py
index 7fcec6c6..a1b95ec0 100644
--- a/tests/test_generation.py
+++ b/tests/test_generation.py
@@ -27,7 +27,6 @@
 from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher
 from pymatgen.core.structure import PeriodicSite, Structure
 from pymatgen.entries.computed_entries import ComputedStructureEntry
-from pymatgen.io.ase import AseAtomsAdaptor
 from pymatgen.io.vasp import Poscar
 from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
 from pymatgen.util.coord import pbc_diff
@@ -295,8 +294,7 @@ def setUp(self):
         atoms = bulk("Cu")
         atoms = make_supercell(atoms, [[2, 0, 0], [0, 2, 0], [0, 0, 2]])
         atoms.set_chemical_symbols(["Cu", "Ag"] * 4)
-        aaa = AseAtomsAdaptor()
-        self.agcu = aaa.get_structure(atoms)
+        self.agcu = Structure.from_ase_atoms(atoms)
         self.agcu_defect_gen_string = (
             "DefectsGenerator for input composition AgCu, space group R-3m with 28 defect entries created."
         )
diff --git a/tests/test_vasp.py b/tests/test_vasp.py
index f0265d78..02ca1b56 100644
--- a/tests/test_vasp.py
+++ b/tests/test_vasp.py
@@ -19,7 +19,6 @@
 from monty.serialization import loadfn
 from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher
 from pymatgen.core.structure import Structure
-from pymatgen.io.ase import AseAtomsAdaptor
 from pymatgen.io.vasp.inputs import BadIncarWarning, Incar, Kpoints, Poscar, Potcar
 from test_generation import if_present_rm
 
@@ -106,8 +105,7 @@ def setUp(self):
         atoms = bulk("Cu")
         atoms = make_supercell(atoms, [[2, 0, 0], [0, 2, 0], [0, 0, 2]])
         atoms.set_chemical_symbols(["Cu", "Ag"] * 4)
-        aaa = AseAtomsAdaptor()
-        self.agcu = aaa.get_structure(atoms)
+        self.agcu = Structure.from_ase_atoms(atoms)
         self.sqs_agsbte2 = Structure.from_file(f"{self.data_dir}/AgSbTe2_SQS_POSCAR")
 
         self.neutral_def_incar_min = {

From 9a0b60d9deec34b3ec8a350f106c0c235596df14 Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Sun, 2 Jun 2024 17:33:49 -0400
Subject: [PATCH 07/10] Overhaul oxi state guessing; use `BVAnalyzer` if
 possible (faster and better), make oxi-guessing functions public, use
 `SimpleQueue` to imporove timeout efficiency...

---
 doped/analysis.py        |  24 ++---
 doped/core.py            | 224 ++++++++++++++++++++++++++++-----------
 doped/generation.py      |  24 ++---
 tests/test_generation.py |  64 +++++------
 4 files changed, 214 insertions(+), 122 deletions(-)

diff --git a/doped/analysis.py b/doped/analysis.py
index b5f970ec..160a5cb0 100644
--- a/doped/analysis.py
+++ b/doped/analysis.py
@@ -10,7 +10,7 @@
 import contextlib
 import os
 import warnings
-from multiprocessing import Pool, Queue, cpu_count
+from multiprocessing import Pool, cpu_count
 from typing import TYPE_CHECKING, Optional, Union
 
 import numpy as np
@@ -26,7 +26,7 @@
 from tqdm import tqdm
 
 from doped import _doped_obj_properties_methods, _ignore_pmg_warnings
-from doped.core import DefectEntry, _guess_and_set_oxi_states_with_timeout, _rough_oxi_state_cost_from_comp
+from doped.core import DefectEntry, guess_and_set_oxi_states_with_timeout
 from doped.generation import get_defect_name_from_defect, get_defect_name_from_entry, name_defect_entries
 from doped.thermodynamics import DefectThermodynamics
 from doped.utils.parsing import (
@@ -706,18 +706,14 @@ def __init__(
 
         # try parsing the bulk oxidation states first, for later assigning defect "oxi_state"s (i.e.
         # fully ionised charge states):
-        if _rough_oxi_state_cost_from_comp(self.bulk_vr.final_structure.composition) > 1e6:
-            self._bulk_oxi_states: Union[dict, bool] = False  # will take very long to guess oxi_state
-        else:
-            queue: Queue = Queue()
-            self._bulk_oxi_states = _guess_and_set_oxi_states_with_timeout(
-                self.bulk_vr.final_structure, queue=queue
-            )
-            if self._bulk_oxi_states:
-                self.bulk_vr.final_structure = queue.get()  # oxi-state decorated structure
-                self._bulk_oxi_states = {
-                    el.symbol: el.oxi_state for el in self.bulk_vr.final_structure.composition.elements
-                }
+        self._bulk_oxi_states: Union[dict, bool] = False
+        if bulk_struct_w_oxi := guess_and_set_oxi_states_with_timeout(
+            self.bulk_vr.final_structure, break_early_if_expensive=True
+        ):
+            self.bulk_vr.final_structure = bulk_struct_w_oxi
+            self._bulk_oxi_states = {
+                el.symbol: el.oxi_state for el in self.bulk_vr.final_structure.composition.elements  # type: ignore
+            }
 
         self.defect_dict = {}
         self.bulk_corrections_data = {  # so we only load and parse bulk data once
diff --git a/doped/core.py b/doped/core.py
index 2c3e9815..9b38ebc6 100644
--- a/doped/core.py
+++ b/doped/core.py
@@ -7,12 +7,12 @@
 import warnings
 from dataclasses import asdict, dataclass, field
 from functools import reduce
-from itertools import combinations_with_replacement
-from multiprocessing import Process, Queue, current_process
+from multiprocessing import Process, SimpleQueue, current_process
 from typing import TYPE_CHECKING, Any, Optional, Union
 
 import numpy as np
 from monty.serialization import dumpfn, loadfn
+from pymatgen.analysis.bond_valence import BVAnalyzer
 from pymatgen.analysis.defects import core, thermo
 from pymatgen.analysis.defects.utils import CorrectionResult
 from pymatgen.core.composition import Composition, Element
@@ -1270,9 +1270,33 @@ def _get_dft_chempots(chempots, el_refs, limit):
     return chempots
 
 
-def _guess_and_set_struct_oxi_states(structure, try_without_max_sites=False, queue=None):
+def _guess_and_set_struct_oxi_states(structure):
     """
-    Tries to guess (and set) the oxidation states of the input structure.
+    Tries to guess (and set) the oxidation states of the input structure, using
+    the ``pymatgen`` ``BVAnalyzer`` class.
+
+    Args:
+        structure (Structure): The structure for which to guess the oxidation states.
+
+    Returns:
+        Structure: The structure with oxidation states guessed and set, or ``False``
+        if oxidation states could not be guessed.
+    """
+    bv_analyzer = BVAnalyzer()
+    with contextlib.suppress(ValueError):  # ValueError raised if oxi states can't be assigned
+        oxi_dec_structure = bv_analyzer.get_oxi_state_decorated_structure(structure)
+        if all(
+            np.isclose(int(specie.oxi_state), specie.oxi_state) for specie in oxi_dec_structure.species
+        ):
+            return oxi_dec_structure
+
+    return False  # if oxi states could not be guessed
+
+
+def _guess_and_set_struct_oxi_states_icsd_prob(structure, try_without_max_sites=False):
+    """
+    Tries to guess (and set) the oxidation states of the input structure, using
+    the ``pymatgen``-tabulated ICSD oxidation state probabilities.
 
     Args:
         structure (Structure): The structure for which to guess the oxidation states.
@@ -1280,26 +1304,24 @@ def _guess_and_set_struct_oxi_states(structure, try_without_max_sites=False, que
             Whether to try to guess the oxidation states
             without using the ``max_sites=-1`` argument (``True``)(which attempts
             to use the reduced composition for guessing oxi states) or not (``False``).
-        queue (Queue):
-            A multiprocessing queue to put the guessed structure in, if provided.
-            Only really intended for use internally in ``doped``, during defect
-            generation.
+
+    Returns:
+        Structure: The structure with oxidation states guessed and set, or ``False``
+        if oxidation states could not be guessed.
     """
+    structure = structure.copy()  # don't modify original structure
     if try_without_max_sites:
         with contextlib.suppress(Exception):
             structure.add_oxidation_state_by_guess()
             # check all oxidation states are whole numbers:
             if all(np.isclose(int(specie.oxi_state), specie.oxi_state) for specie in structure.species):
-                if queue is not None:
-                    queue.put(structure)
-                return
+                return structure
 
     # else try to use the reduced cell since oxidation state assignment scales poorly with system size:
     try:
         attempt = 0
         structure.add_oxidation_state_by_guess(max_sites=-1)
-        # check oxi_states assigned and not all zero:
-        while (
+        while (  # check oxi_states assigned and not all zero:
             attempt < 3
             and all(specie.oxi_state == 0 for specie in structure.species)
             or not all(np.isclose(int(specie.oxi_state), specie.oxi_state) for specie in structure.species)
@@ -1312,16 +1334,112 @@ def _guess_and_set_struct_oxi_states(structure, try_without_max_sites=False, que
     except Exception:
         structure.add_oxidation_state_by_guess()
 
-    if queue is not None:
-        queue.put(structure)
+    if all(hasattr(site.specie, "oxi_state") for site in structure.sites) and all(
+        isinstance(site.specie.oxi_state, (int, float)) for site in structure.sites
+    ):
+        return structure
+
+    return False
 
 
-def _guess_and_set_oxi_states_with_timeout(structure, timeout_1=10, timeout_2=15, queue=None) -> bool:
+def guess_and_set_struct_oxi_states(structure, try_without_max_sites=False):
+    """
+    Tries to guess (and set) the oxidation states of the input structure, first
+    using the ``pymatgen`` ``BVAnalyzer`` class, and if that fails, using the
+    ICSD oxidation state probabilities to guess.
+
+    Args:
+        structure (Structure): The structure for which to guess the oxidation states.
+        try_without_max_sites (bool):
+            Whether to try to guess the oxidation states
+            without using the ``max_sites=-1`` argument (``True``)(which attempts
+            to use the reduced composition for guessing oxi states) or not (``False``),
+            when using the ICSD oxidation state probability guessing.
+
+    Returns:
+        Structure: The structure with oxidation states guessed and set, or ``False``
+        if oxidation states could not be guessed.
+    """
+    if structure_with_oxi := _guess_and_set_struct_oxi_states(structure):
+        return structure_with_oxi
+
+    return _guess_and_set_struct_oxi_states_icsd_prob(structure, try_without_max_sites)
+
+
+def guess_and_set_oxi_states_with_timeout(
+    structure, timeout_1=10, timeout_2=15, break_early_if_expensive=False
+) -> bool:
     """
     Tries to guess (and set) the oxidation states of the input structure, with
     a timeout catch for cases where the structure is complex and oxi state
     guessing will take a very very long time.
 
+    Tries first without using the ``pymatgen`` ``BVAnalyzer`` class, and if
+    this fails, tries using the ICSD oxidation state probabilities (with
+    timeouts) to guess.
+
+    Args:
+        structure (Structure): The structure for which to guess the oxidation states.
+        timeout_1 (float):
+            Timeout in seconds for the second attempt to guess the oxidation states,
+            using ICSD oxidation state probabilities (with ``max_sites=-1``).
+            Default is 10 seconds.
+        timeout_2 (float):
+            Timeout in seconds for the third attempt to guess the oxidation states,
+            using ICSD oxidation state probabilities (without ``max_sites=-1``).
+            Default is 15 seconds.
+        break_early_if_expensive (bool):
+            Whether to stop the function if the first oxi state guessing attempt
+            (with ``BVAnalyzer``) fails and the cost estimate for the ICSD probability
+            guessing is high (expected to take a long time; > 10 seconds).
+            Default is ``False``.
+
+    Returns:
+        Structure: The structure with oxidation states guessed and set, or ``False``
+        if oxidation states could not be guessed.
+    """
+    if structure_with_oxi := _guess_and_set_struct_oxi_states(structure):
+        return structure_with_oxi  # BVAnalyzer succeeded
+
+    if (  # if BVAnalyzer failed and cost estimate is high, break early:
+        (
+            break_early_if_expensive or current_process().daemon
+        )  # if in a daemon process, can't spawn new `Process`s
+        and _rough_oxi_state_cost_icsd_prob_from_comp(structure.composition) > 1e6
+    ):
+        return False
+
+    if current_process().daemon:  # if in a daemon process, can't spawn new `Process`s
+        return _guess_and_set_struct_oxi_states_icsd_prob(structure)
+
+    return _guess_and_set_oxi_states_with_timeout_icsd_prob(structure, timeout_1, timeout_2)
+
+
+def _guess_and_set_struct_oxi_states_icsd_prob_process(structure, queue, try_without_max_sites=False):
+    """
+    Implements the ``_guess_and_set_struct_oxi_states_icsd_prob`` function
+    above, but also putting the results into the supplied ``multiprocessing``
+    queue object (for use with timeouts via ``Process``).
+
+    For internal ``doped`` usage.
+    """
+    if structure_with_oxi := _guess_and_set_struct_oxi_states_icsd_prob(structure, try_without_max_sites):
+        queue.put(structure_with_oxi)
+    else:
+        queue.put(False)
+
+
+def _guess_and_set_oxi_states_with_timeout_icsd_prob(
+    structure,
+    timeout_1=10,
+    timeout_2=15,
+) -> bool:
+    """
+    Tries to guess (and set) the oxidation states of the input structure using
+    the ICSD oxidation state probabilities approach, with a timeout catch for
+    cases where the structure is complex and oxi state guessing will take a
+    very very long time.
+
     Tries first without using the ``max_sites=-1`` argument with ``pymatgen``'s
     oxidation state guessing functions (which attempts to use the reduced
     composition for guessing oxi states, but can be a little less reliable for tricky
@@ -1329,25 +1447,21 @@ def _guess_and_set_oxi_states_with_timeout(structure, timeout_1=10, timeout_2=15
 
     Args:
         structure (Structure): The structure for which to guess the oxidation states.
-        queue (Queue):
-            A multiprocessing queue to put the guessed structure in, if provided.
-            Only really intended for use internally in ``doped``, during defect
-            generation.
         timeout_1 (float):
             Timeout in seconds for the first attempt to guess the oxidation states
             (with ``max_sites=-1``). Default is 10 seconds.
         timeout_2 (float):
             Timeout in seconds for the second attempt to guess the oxidation states
             (without ``max_sites=-1``). Default is 15 seconds.
+
+    Returns:
+        Structure: The structure with oxidation states guessed and set, or ``False``
+        if oxidation states could not be guessed.
     """
-    if queue is None:
-        queued_struct = False
-        queue = Queue()
-    else:
-        queued_struct = True
+    queue: SimpleQueue = SimpleQueue()
 
     guess_oxi_process_wout_max_sites = Process(
-        target=_guess_and_set_struct_oxi_states, args=(structure, True, queue)
+        target=_guess_and_set_struct_oxi_states_icsd_prob_process, args=(structure, queue, True)
     )  # try without max sites first, if fails, try with max sites
     guess_oxi_process_wout_max_sites.start()
     guess_oxi_process_wout_max_sites.join(timeout=timeout_1)
@@ -1357,8 +1471,8 @@ def _guess_and_set_oxi_states_with_timeout(structure, timeout_1=10, timeout_2=15
         guess_oxi_process_wout_max_sites.join()
 
         guess_oxi_process = Process(
-            target=_guess_and_set_struct_oxi_states,
-            args=(structure, False, queue),
+            target=_guess_and_set_struct_oxi_states_icsd_prob_process,
+            args=(structure, queue, False),
         )
         guess_oxi_process.start()
         guess_oxi_process.join(timeout=timeout_2)  # wait for pymatgen to guess oxi states,
@@ -1370,20 +1484,15 @@ def _guess_and_set_oxi_states_with_timeout(structure, timeout_1=10, timeout_2=15
 
             return False
 
-    if not queued_struct:
-        # apply oxi states to structure:
-        structure_from_subprocess = queue.get()
-        structure.add_oxidation_state_by_element(
-            {el.symbol: el.oxi_state for el in structure_from_subprocess.composition.elements}
-        )
-
-    return True
+    # apply oxi states to structure:
+    return queue.get()
 
 
-def _rough_oxi_state_cost_from_comp(comp: Union[str, Composition], max_sites=True) -> float:
+def _rough_oxi_state_cost_icsd_prob_from_comp(comp: Union[str, Composition], max_sites=True) -> float:
     """
     A cost function which roughly estimates the computational cost of guessing
-    the oxidation states of a given composition.
+    the oxidation states of a given composition, using the ICSD oxidation state
+    probabilities approach.
     """
     if isinstance(comp, str):
         comp = Composition(comp)
@@ -1393,13 +1502,16 @@ def _rough_oxi_state_cost_from_comp(comp: Union[str, Composition], max_sites=Tru
 
     el_amt = comp.get_el_amt_dict()
     elements = list(el_amt)
+
+    def num_possible_combinations(n, r):
+        from math import factorial
+
+        return factorial(n + r - 1) / factorial(r) / factorial(n - 1)
+
     return np.prod(
         [
-            sum(
-                1
-                for _i in combinations_with_replacement(
-                    Element(el).icsd_oxidation_states or Element(el).oxidation_states, int(el_amt[el])
-                )
+            num_possible_combinations(
+                len(Element(el).icsd_oxidation_states or Element(el).oxidation_states), int(el_amt[el])
             )
             for el in elements
         ]
@@ -1488,26 +1600,16 @@ def _set_oxi_state(self):
             all(hasattr(site.specie, "oxi_state") for site in self.structure.sites)
             and all(isinstance(site.specie.oxi_state, (int, float)) for site in self.structure.sites)
         ):
-            if _rough_oxi_state_cost_from_comp(self.structure.composition) > 1e6:
-                self.oxi_state = "Undetermined"  # likely will take very long to guess oxi_state
-                return
-
-            if current_process().daemon:  # if in a daemon process, can't spawn new `Process`s
-                _guess_and_set_struct_oxi_states(self.structure)
-                self.oxi_state = self._guess_oxi_state()
+            # try guess oxi-states but with timeout:
+            if struct_w_oxi := guess_and_set_oxi_states_with_timeout(
+                self.structure, timeout_1=5, timeout_2=5, break_early_if_expensive=True
+            ):
+                self.structure = struct_w_oxi
+            else:
+                self.oxi_state = "Undetermined"
                 return
 
-            # else try guess oxi-states but with timeout, using new `Process`s:
-            _guess_and_set_oxi_states_with_timeout(self.structure, timeout_1=5, timeout_2=5)
-
-        if not (  # oxi states unable to be parsed, set to "Undetermined"
-            all(hasattr(site.specie, "oxi_state") for site in self.structure.sites)
-            and all(isinstance(site.specie.oxi_state, (int, float)) for site in self.structure.sites)
-        ):
-            self.oxi_state = "Undetermined"
-
-        else:
-            self.oxi_state = self._guess_oxi_state()
+        self.oxi_state = self._guess_oxi_state()
 
     @classmethod
     def _from_pmg_defect(cls, defect: core.Defect, bulk_oxi_states=False, **doped_kwargs) -> "Defect":
diff --git a/doped/generation.py b/doped/generation.py
index 49d9c505..b0aa7ab0 100644
--- a/doped/generation.py
+++ b/doped/generation.py
@@ -9,7 +9,7 @@
 import warnings
 from functools import partial, reduce
 from itertools import chain
-from multiprocessing import Pool, Queue, cpu_count
+from multiprocessing import Pool, cpu_count
 from typing import Optional, Union, cast
 from unittest.mock import MagicMock
 
@@ -39,8 +39,8 @@
     Interstitial,
     Substitution,
     Vacancy,
-    _guess_and_set_oxi_states_with_timeout,
     doped_defect_from_pmg_defect,
+    guess_and_set_oxi_states_with_timeout,
 )
 from doped.utils import parsing, supercells, symmetry
 
@@ -789,10 +789,9 @@ def guess_defect_charge_states(
         possible_oxi_states, orig_oxi, max_host_oxi_magnitude, return_log=True
     )
 
-    charge_state_list = [
+    if charge_state_list := [
         k for k, v in possible_charge_states.items() if v["probability"] > probability_threshold
-    ]
-    if charge_state_list:
+    ]:
         charge_state_range = (int(min(charge_state_list)), int(max(charge_state_list)))
     else:
         charge_state_range = (0, 0)
@@ -844,10 +843,9 @@ def guess_defect_charge_states(
         sorted(possible_charge_states.items(), key=lambda x: x[1]["probability"], reverse=True)
     )
 
-    charge_state_list = [
+    if charge_state_list := [
         k for k, v in sorted_charge_state_dict.items() if v["probability"] > probability_threshold
-    ]
-    if charge_state_list:
+    ]:
         charge_state_range = (int(min(charge_state_list)), int(max(charge_state_list)))
     else:
         # if no charge states are included, take most probable (if probability > 0.1*threshold)
@@ -1332,7 +1330,7 @@ class (such as ``clustering_tol``, ``stol``, ``min_dist`` etc), or to
                 )
 
             self._bulk_oxi_states: Union[bool, dict] = (
-                True  # to check if pymatgen can guess the bulk oxidation states
+                False  # to check if pymatgen can guess the bulk oxidation states
             )
             # if input structure was oxi-state-decorated, use these oxi states for defect generation:
             if all(hasattr(site.specie, "oxi_state") for site in self.structure.sites) and all(
@@ -1343,12 +1341,8 @@ class (such as ``clustering_tol``, ``stol``, ``min_dist`` etc), or to
                 }
 
             else:  # guess & set oxidation states now, to speed up oxi state handling in defect generation
-                queue: Queue = Queue()
-                self._bulk_oxi_states = _guess_and_set_oxi_states_with_timeout(
-                    self.primitive_structure, queue=queue
-                )
-                if self._bulk_oxi_states:
-                    self.primitive_structure = queue.get()
+                if prim_struct_w_oxi := guess_and_set_oxi_states_with_timeout(self.primitive_structure):
+                    self.primitive_structure = prim_struct_w_oxi
                     self._bulk_oxi_states = {
                         el.symbol: el.oxi_state for el in self.primitive_structure.composition.elements
                     }
diff --git a/tests/test_generation.py b/tests/test_generation.py
index a1b95ec0..8413c460 100644
--- a/tests/test_generation.py
+++ b/tests/test_generation.py
@@ -605,28 +605,28 @@ def setUp(self):
         self.sb2si2te6 = Structure.from_file(f"{self.data_dir}/Sb2Si2Te6_POSCAR")
 
         self.sb2si2te6_defect_gen_info = (
-            """Vacancies    Guessed Charges     Conv. Cell Coords    Wyckoff
------------  ------------------  -------------------  ---------
-v_Si         [+1,0,-1,-2,-3,-4]  [0.000,0.000,0.445]  6c
-v_Sb         [+1,0,-1,-2]        [0.000,0.000,0.166]  6c
-v_Te         [+2,+1,0,-1]        [0.335,0.003,0.073]  18f
+            """Vacancies    Guessed Charges    Conv. Cell Coords    Wyckoff
+-----------  -----------------  -------------------  ---------
+v_Si         [+1,0,-1,-2,-3]    [0.000,0.000,0.445]  6c
+v_Sb         [+1,0,-1,-2,-3]    [0.000,0.000,0.166]  6c
+v_Te         [+2,+1,0,-1]       [0.335,0.003,0.073]  18f
 
 Substitutions    Guessed Charges              Conv. Cell Coords    Wyckoff
 ---------------  ---------------------------  -------------------  ---------
-Si_Sb            [+2,+1,0]                    [0.000,0.000,0.166]  6c
+Si_Sb            [+1,0,-1]                    [0.000,0.000,0.166]  6c
 Si_Te            [+6,+5,+4,+3,+2,+1,0,-1,-2]  [0.335,0.003,0.073]  18f
-Sb_Si            [+1,0,-1,-2,-3,-4,-5,-6,-7]  [0.000,0.000,0.445]  6c
+Sb_Si            [+2,+1,0,-1,-2,-3,-4,-5,-6]  [0.000,0.000,0.445]  6c
 Sb_Te            [+7,+6,+5,+4,+3,+2,+1,0,-1]  [0.335,0.003,0.073]  18f
-Te_Si            [+2,+1,0,-1,-2,-3,-4,-5,-6]  [0.000,0.000,0.445]  6c
-Te_Sb            [+4,+3,+2,+1,0,-1,-2,-3,-4]  [0.000,0.000,0.166]  6c
+Te_Si            [+3,+2,+1,0,-1,-2,-3,-4,-5]  [0.000,0.000,0.445]  6c
+Te_Sb            [+3,+2,+1,0,-1,-2,-3,-4,-5]  [0.000,0.000,0.166]  6c
 
 Interstitials    Guessed Charges              Conv. Cell Coords    Wyckoff
 ---------------  ---------------------------  -------------------  ---------
-Si_i_C1_Si2.20   [+4,+3,+2,+1,0,-1,-2,-3,-4]  [0.179,0.359,0.167]  18f
-Si_i_C1_Si2.43   [+4,+3,+2,+1,0,-1,-2,-3,-4]  [0.341,0.341,0.458]  18f
-Si_i_C1_Te2.44   [+4,+3,+2,+1,0,-1,-2,-3,-4]  [0.001,0.336,0.289]  18f
-Si_i_C3_Si2.64   [+4,+3,+2,+1,0,-1,-2,-3,-4]  [0.000,0.000,0.318]  6c
-Si_i_C3i_Te2.81  [+4,+3,+2,+1,0,-1,-2,-3,-4]  [0.000,0.000,0.000]  3a
+Si_i_C1_Si2.20   [+4,+3,+2,+1,0]              [0.179,0.359,0.167]  18f
+Si_i_C1_Si2.43   [+4,+3,+2,+1,0]              [0.341,0.341,0.458]  18f
+Si_i_C1_Te2.44   [+4,+3,+2,+1,0]              [0.001,0.336,0.289]  18f
+Si_i_C3_Si2.64   [+4,+3,+2,+1,0]              [0.000,0.000,0.318]  6c
+Si_i_C3i_Te2.81  [+4,+3,+2,+1,0]              [0.000,0.000,0.000]  3a
 Sb_i_C1_Si2.20   [+5,+4,+3,+2,+1,0,-1,-2,-3]  [0.179,0.359,0.167]  18f
 Sb_i_C1_Si2.43   [+5,+4,+3,+2,+1,0,-1,-2,-3]  [0.341,0.341,0.458]  18f
 Sb_i_C1_Te2.44   [+5,+4,+3,+2,+1,0,-1,-2,-3]  [0.001,0.336,0.289]  18f
@@ -1001,25 +1001,25 @@ def _generate_and_test_no_warnings(self, structure, min_image_distance=None, **k
             with warnings.catch_warnings(record=True) as w:
                 warnings.resetwarnings()
                 defect_gen = DefectsGenerator(structure, **kwargs)
-            if w:
-                print([str(warning.message) for warning in w])  # for debugging
-            if min_image_distance is None:
-                assert not w
-            else:
-                assert len(w) == 1
-                assert issubclass(w[-1].category, UserWarning)
-                assert (
-                    f"Input structure is <10 Å in at least one direction (minimum image distance ="
-                    f" {min_image_distance:.2f} Å, which is usually too small for accurate defect "
-                    f"calculations, but generate_supercell = False, so using input structure as "
-                    f"defect & bulk supercells. Caution advised!" in str(w[-1].message)
-                )
             output = sys.stdout.getvalue()  # Return a str containing the printed output
         finally:
             sys.stdout = original_stdout  # Reset standard output to its original value.
 
         if w:
             print([str(warning.message) for warning in w])  # for debugging
+        print(output)  # for debugging
+
+        if min_image_distance is None:
+            assert not w
+        else:
+            assert len(w) == 1
+            assert issubclass(w[-1].category, UserWarning)
+            assert (
+                f"Input structure is <10 Å in at least one direction (minimum image distance ="
+                f" {min_image_distance:.2f} Å, which is usually too small for accurate defect "
+                f"calculations, but generate_supercell = False, so using input structure as "
+                f"defect & bulk supercells. Caution advised!" in str(w[-1].message)
+            )
 
         return defect_gen, output
 
@@ -3209,11 +3209,11 @@ def test_charge_state_gen_kwargs(self):
 
         assert (  # different charge states than when max_sites = -1 is used:
             (
-                """Vacancies    Guessed Charges        Conv. Cell Coords    Wyckoff
------------  ---------------------  -------------------  ---------
-v_Si         [+2,+1,0,-1,-2,-3,-4]  [0.000,0.000,0.445]  6c
-v_Sb         [+2,+1,0,-1,-2]        [0.000,0.000,0.166]  6c
-v_Te         [+2,+1,0,-1,-2]        [0.335,0.003,0.073]  18f
+                """Vacancies    Guessed Charges     Conv. Cell Coords    Wyckoff
+-----------  ------------------  -------------------  ---------
+v_Si         [+2,+1,0,-1,-2,-3]  [0.000,0.000,0.445]  6c
+v_Sb         [+2,+1,0,-1,-2,-3]  [0.000,0.000,0.166]  6c
+v_Te         [+2,+1,0,-1,-2]     [0.335,0.003,0.073]  18f
 \n"""
             )
             in output

From d98252e6de4c941c54f30d074a0395355fd60d3d Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Sun, 2 Jun 2024 21:20:09 -0400
Subject: [PATCH 08/10] Update tests

---
 doped/vasp.py             | 138 +++++++++++++++++++-------------------
 tests/data/VO2_POSCAR     |  14 ++++
 tests/test_analysis.py    |   5 ++
 tests/test_corrections.py |   9 ++-
 4 files changed, 92 insertions(+), 74 deletions(-)
 create mode 100644 tests/data/VO2_POSCAR

diff --git a/doped/vasp.py b/doped/vasp.py
index 80d2ea10..f50cb967 100644
--- a/doped/vasp.py
+++ b/doped/vasp.py
@@ -20,7 +20,7 @@
 from pymatgen.core import SETTINGS
 from pymatgen.core.structure import Structure
 from pymatgen.io.vasp.inputs import BadIncarWarning, Kpoints, Poscar, Potcar
-from pymatgen.io.vasp.sets import DictSet
+from pymatgen.io.vasp.sets import VaspInputSet
 from tqdm import tqdm
 
 from doped import _doped_obj_properties_methods, _ignore_pmg_warnings
@@ -123,9 +123,9 @@ def __repr__(self):
             return super().__repr__()
 
 
-class DopedDictSet(DictSet):
+class DopedDictSet(VaspInputSet):
     """
-    Modified version of ``pymatgen`` ``DictSet``, to have more robust
+    Modified version of ``pymatgen`` ``VaspInputSet``, to have more robust
     ``POTCAR`` handling, expedited I/O (particularly for ``POTCAR`` generation,
     which can be slow when generating many folders), ensure ``POSCAR`` atom
     sorting, avoid encoding issues with ``KPOINTS`` comments etc.
@@ -152,7 +152,7 @@ def __init__(
                 quotation marks (e.g. ``{"ALGO": "All"}``).
                 (default: None)
             user_kpoints_settings (dict or Kpoints):
-                Dictionary of user ``KPOINTS`` settings (in ``pymatgen`` ``DictSet``
+                Dictionary of user ``KPOINTS`` settings (in ``pymatgen`` ``VaspInputSet``
                 format) e.g., ``{"reciprocal_density": 123}``, or a ``Kpoints`` object.
                 Default is Gamma-only.
             user_potcar_functional (str):
@@ -170,7 +170,7 @@ def __init__(
                 Default is ``True``.
             poscar_comment (str):
                 Comment line to use for ``POSCAR`` file. Default is structure formula.
-            **kwargs: Additional kwargs to pass to ``DictSet``.
+            **kwargs: Additional kwargs to pass to ``VaspInputSet``.
         """
         _ignore_pmg_warnings()
         self.auto_kpar = auto_kpar
@@ -205,7 +205,7 @@ def __init__(
                 user_potcar_settings = user_potcar_settings["POTCAR"]
 
         potcar_settings.update(user_potcar_settings or {})
-        base_config_dict = {"POTCAR": potcar_settings}  # needs to be set with ``DictSet``
+        base_config_dict = {"POTCAR": potcar_settings}  # needs to be set with ``VaspInputSet``
         config_dict = deep_dict_update(base_config_dict, kwargs.pop("config_dict", {}))
         config_dict["INCAR"] = user_incar_settings or {}
 
@@ -221,8 +221,8 @@ def __init__(
     @property
     def incar(self):
         """
-        Returns the ``Incar`` object generated from the ``DictSet`` config,
-        with a warning if ``KPAR > 1`` and only one k-point.
+        Returns the ``Incar`` object generated from the ``VaspInputSet``
+        config, with a warning if ``KPAR > 1`` and only one k-point.
         """
         incar_obj = super().incar
 
@@ -307,7 +307,7 @@ def __repr__(self):
 
 class DefectDictSet(DopedDictSet):
     """
-    Extension to ``pymatgen`` ``DictSet`` object for ``VASP`` defect
+    Extension to ``pymatgen`` ``VaspInputSet`` object for ``VASP`` defect
     calculations.
     """
 
@@ -335,7 +335,7 @@ def __init__(
                 as strings with quotation marks (e.g. ``{"ALGO": "All"}``).
                 (default: None)
             user_kpoints_settings (dict or Kpoints):
-                Dictionary of user ``KPOINTS`` settings (in ``pymatgen`` ``DictSet`` format) e.g.,
+                Dictionary of user ``KPOINTS`` settings (in ``pymatgen`` ``VaspInputSet`` format) e.g.,
                 ``{"reciprocal_density": 123}``, or a ``Kpoints`` object. Default is Gamma-centred,
                 reciprocal_density = 100 [Å⁻³].
             user_potcar_functional (str):
@@ -347,7 +347,7 @@ def __init__(
             poscar_comment (str):
                 Comment line to use for ``POSCAR`` files. Default is defect name,
                 fractional coordinates of initial site and charge state.
-            **kwargs: Additional kwargs to pass to ``DictSet``.
+            **kwargs: Additional kwargs to pass to ``VaspInputSet``.
         """
         _ignore_pmg_warnings()
         self.charge_state = charge_state
@@ -491,7 +491,7 @@ def write_input(
     ):
         """
         Writes out all input to a directory. Refactored slightly from
-        ``pymatgen`` ``DictSet.write_input()`` to allow checking of user
+        ``pymatgen`` ``VaspInputSet.write_input()`` to allow checking of user
         ``POTCAR`` setup.
 
         Args:
@@ -520,7 +520,7 @@ def write_input(
         else:
             potcars = self._check_user_potcars(unperturbed_poscar=unperturbed_poscar, snb=snb)
 
-        if unperturbed_poscar and potcars:  # write everything, use DictSet.write_input()
+        if unperturbed_poscar and potcars:  # write everything, use VaspInputSet.write_input()
             try:
                 super().write_input(
                     output_path,
@@ -627,7 +627,7 @@ def __init__(
             is only generated for systems with a max atomic number (Z) >= 31 (i.e.
             further down the periodic table than Zn).
 
-        where ``DefectDictSet`` is an extension of ``pymatgen``'s ``DictSet`` class for
+        where ``DefectDictSet`` is an extension of ``pymatgen``'s ``VaspInputSet`` class for
         defect calculations, with ``incar``, ``poscar``, ``kpoints`` and ``potcar``
         attributes for the corresponding VASP defect calculations (see docstring).
         Also creates the corresponding ``bulk_vasp_...`` attributes for singlepoint
@@ -669,10 +669,10 @@ def __init__(
                 as strings with quotation marks (e.g. ``{"ALGO": "All"}``).
                 (default: None)
             user_kpoints_settings (dict or Kpoints):
-                Dictionary of user KPOINTS settings (in pymatgen DictSet format)
+                Dictionary of user KPOINTS settings (in ``pymatgen`` ``VaspInputSet`` format)
                 e.g. {"reciprocal_density": 123}, or a Kpoints object, to use for the
-                ``vasp_std``, ``vasp_nkred_std`` and ``vasp_ncl`` DefectDictSets (Γ-only for
-                ``vasp_gam``). Default is Gamma-centred, reciprocal_density = 100 [Å⁻³].
+                ``vasp_std``, ``vasp_nkred_std`` and ``vasp_ncl`` ``DefectDictSet``\s (Γ-only
+                for ``vasp_gam``). Default is Gamma-centred, reciprocal_density = 100 [Å⁻³].
             user_potcar_functional (str):
                 POTCAR functional to use. Default is "PBE" and if this fails,
                 tries "PBE_52", then "PBE_54".
@@ -683,22 +683,22 @@ def __init__(
 
         Attributes:
             vasp_gam (DefectDictSet):
-                DefectDictSet for Gamma-point only relaxation. Usually not needed
+                ``DefectDictSet`` for Gamma-point only relaxation. Usually not needed
                 if ShakeNBreak structure searching has been performed
                 (recommended), unless only Γ-point `k`-point sampling is required
                 (converged) for your system, and no vasp_std calculations with
                 multiple `k`-points are required (determined from kpoints settings).
             vasp_nkred_std (DefectDictSet):
-                DefectDictSet for relaxation with a non-Γ-only kpoint mesh, using
+                ``DefectDictSet`` for relaxation with a non-Γ-only kpoint mesh, using
                 ``NKRED(X,Y,Z)`` INCAR tag(s) to downsample kpoints for the HF exchange
                 part of the hybrid DFT calculation. Not generated for GGA calculations
                 (if ``LHFCALC`` is set to ``False`` in user_incar_settings) or if only Gamma
                 kpoint sampling is required.
             vasp_std (DefectDictSet):
-                DefectDictSet for relaxation with a non-Γ-only kpoint mesh, not using
+                ``DefectDictSet`` for relaxation with a non-Γ-only kpoint mesh, not using
                 ``NKRED``. Not generated if only Gamma kpoint sampling is required.
             vasp_ncl (DefectDictSet):
-                DefectDictSet for singlepoint (static) energy calculation with SOC
+                ``DefectDictSet`` for singlepoint (static) energy calculation with SOC
                 included. Generated if ``soc=True``. If ``soc`` is not set, then by default
                 is only generated for systems with a max atomic number (Z) >= 31
                 (i.e. further down the periodic table than Zn).
@@ -719,23 +719,23 @@ def __init__(
                 structure and charge state (if inputting a Structure object),
                 for defects. For the bulk supercell, it's "{formula} - Bulk".
             bulk_vasp_gam (DefectDictSet):
-                DefectDictSet for a `bulk` Γ-point-only singlepoint (static)
+                ``DefectDictSet`` for a `bulk` Γ-point-only singlepoint (static)
                 supercell calculation. Often not used, as the bulk supercell only
                 needs to be calculated once with the same settings as the final
                 defect calculations, which may be with ``vasp_std`` or ``vasp_ncl``.
             bulk_vasp_nkred_std (DefectDictSet):
-                DefectDictSet for a singlepoint (static) `bulk` ``vasp_std`` supercell
+                ``DefectDictSet`` for a singlepoint (static) `bulk` ``vasp_std`` supercell
                 calculation (i.e. with a non-Γ-only kpoint mesh) and ``NKRED(X,Y,Z)``
                 INCAR tag(s) to downsample kpoints for the HF exchange part of the
                 hybrid DFT calculation. Not generated for GGA calculations (if
                 ``LHFCALC`` is set to ``False`` in user_incar_settings) or if only Gamma
                 kpoint sampling is required.
             bulk_vasp_std (DefectDictSet):
-                DefectDictSet for a singlepoint (static) `bulk` ``vasp_std`` supercell
+                ``DefectDictSet`` for a singlepoint (static) `bulk` ``vasp_std`` supercell
                 calculation with a non-Γ-only kpoint mesh, not using ``NKRED``. Not
                 generated if only Gamma kpoint sampling is required.
             bulk_vasp_ncl (DefectDictSet):
-                DefectDictSet for singlepoint (static) energy calculation of the `bulk`
+                ``DefectDictSet`` for singlepoint (static) energy calculation of the `bulk`
                 supercell with SOC included. Generated if ``soc=True``. If ``soc`` is not
                 set, then by default is only generated for systems with a max atomic
                 number (Z) >= 31 (i.e. further down the periodic table than Zn).
@@ -801,12 +801,12 @@ def vasp_gam(
         self,
     ) -> DefectDictSet:
         """
-        Returns a DefectDictSet object for a VASP Γ-point-only (``vasp_gam``)
-        defect supercell relaxation. Typically not needed if ShakeNBreak
-        structure searching has been performed (recommended), unless only
-        Γ-point `k`-point sampling is required (converged) for your system, and
-        no vasp_std calculations with multiple `k`-points are required
-        (determined from kpoints settings).
+        Returns a ``DefectDictSet`` object for a VASP Γ-point-only
+        (``vasp_gam``) defect supercell relaxation. Typically not needed if
+        ShakeNBreak structure searching has been performed (recommended),
+        unless only Γ-point `k`-point sampling is required (converged) for your
+        system, and no vasp_std calculations with multiple `k`-points are
+        required (determined from kpoints settings).
 
         See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the
         ``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings,
@@ -859,10 +859,10 @@ def _check_vstd_kpoints(self, vasp_std_defect_set: DefectDictSet) -> Optional[De
     @property
     def vasp_std(self) -> Optional[DefectDictSet]:
         """
-        Returns a DefectDictSet object for a VASP defect supercell relaxation
-        using ``vasp_std`` (i.e. with a non-Γ-only kpoint mesh). Returns None
-        and a warning if the input kpoint settings correspond to a Γ-only
-        kpoint mesh (in which case ``vasp_gam`` should be used).
+        Returns a ``DefectDictSet`` object for a VASP defect supercell
+        relaxation using ``vasp_std`` (i.e. with a non-Γ-only kpoint mesh).
+        Returns None and a warning if the input kpoint settings correspond to a
+        Γ-only kpoint mesh (in which case ``vasp_gam`` should be used).
 
         See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the
         ``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings,
@@ -893,8 +893,8 @@ def vasp_std(self) -> Optional[DefectDictSet]:
     @property
     def vasp_nkred_std(self) -> Optional[DefectDictSet]:
         """
-        Returns a DefectDictSet object for a VASP defect supercell relaxation
-        using ``vasp_std`` (i.e. with a non-Γ-only kpoint mesh) and
+        Returns a ``DefectDictSet`` object for a VASP defect supercell
+        relaxation using ``vasp_std`` (i.e. with a non-Γ-only kpoint mesh) and
         ``NKRED(X,Y,Z)`` INCAR tag(s) to downsample kpoints for the HF exchange
         part of hybrid DFT calculations, following the doped recommended defect
         calculation workflow (see docs). By default, sets ``NKRED(X,Y,Z)`` to 2
@@ -974,13 +974,13 @@ def vasp_nkred_std(self) -> Optional[DefectDictSet]:
     @property
     def vasp_ncl(self) -> Optional[DefectDictSet]:
         """
-        Returns a DefectDictSet object for a VASP defect supercell singlepoint
-        calculation with spin-orbit coupling (SOC) included (LSORBIT = True),
-        using ``vasp_ncl``. If ``DefectRelaxSet.soc`` is False, then this
-        returns None and a warning. If the ``soc`` parameter is not set when
-        initializing ``DefectRelaxSet``, then this is set to True for systems
-        with a max atomic number (Z) >= 31 (i.e. further down the periodic
-        table than Zn), otherwise False.
+        Returns a ``DefectDictSet`` object for a VASP defect supercell
+        singlepoint calculation with spin-orbit coupling (SOC) included
+        (LSORBIT = True), using ``vasp_ncl``. If ``DefectRelaxSet.soc`` is
+        False, then this returns None and a warning. If the ``soc`` parameter
+        is not set when initializing ``DefectRelaxSet``, then this is set to
+        True for systems with a max atomic number (Z) >= 31 (i.e. further down
+        the periodic table than Zn), otherwise False.
 
         See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the
         ``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings,
@@ -1028,7 +1028,7 @@ def _check_bulk_supercell_and_warn(self):
     @property
     def bulk_vasp_gam(self) -> Optional[DefectDictSet]:
         """
-        Returns a DefectDictSet object for a VASP `bulk` Γ-point-only
+        Returns a ``DefectDictSet`` object for a VASP `bulk` Γ-point-only
         (``vasp_gam``) singlepoint (static) supercell calculation. Often not
         used, as the bulk supercell only needs to be calculated once with the
         same settings as the final defect calculations, which is ``vasp_std``
@@ -1081,7 +1081,7 @@ def bulk_vasp_gam(self) -> Optional[DefectDictSet]:
     @property
     def bulk_vasp_std(self) -> Optional[DefectDictSet]:
         """
-        Returns a DefectDictSet object for a singlepoint (static) `bulk`
+        Returns a ``DefectDictSet`` object for a singlepoint (static) `bulk`
         ``vasp_std`` supercell calculation. Returns None and a warning if the
         input kpoint settings correspond to a Γ-only kpoint mesh (in which case
         ``(bulk_)vasp_gam`` should be used).
@@ -1135,7 +1135,7 @@ def bulk_vasp_std(self) -> Optional[DefectDictSet]:
     @property
     def bulk_vasp_nkred_std(self) -> Optional[DefectDictSet]:
         """
-        Returns a DefectDictSet object for a singlepoint (static) `bulk`
+        Returns a ``DefectDictSet`` object for a singlepoint (static) `bulk`
         ``vasp_std`` supercell calculation (i.e. with a non-Γ-only kpoint mesh)
         and ``NKRED(X,Y,Z)`` INCAR tag(s) to downsample kpoints for the HF
         exchange part of the hybrid DFT calculation. By default, sets
@@ -1205,13 +1205,13 @@ def bulk_vasp_nkred_std(self) -> Optional[DefectDictSet]:
     @property
     def bulk_vasp_ncl(self) -> Optional[DefectDictSet]:
         """
-        Returns a DefectDictSet object for VASP `bulk` supercell singlepoint
-        calculations with spin-orbit coupling (SOC) included (LSORBIT = True),
-        using ``vasp_ncl``. If ``DefectRelaxSet.soc`` is False, then this
-        returns None and a warning. If the ``soc`` parameter is not set when
-        initializing ``DefectRelaxSet``, then this is set to True for systems
-        with a max atomic number (Z) >= 31 (i.e. further down the periodic
-        table than Zn), otherwise False.
+        Returns a ``DefectDictSet`` object for VASP `bulk` supercell
+        singlepoint calculations with spin-orbit coupling (SOC) included
+        (LSORBIT = True), using ``vasp_ncl``. If ``DefectRelaxSet.soc`` is
+        False, then this returns None and a warning. If the ``soc`` parameter
+        is not set when initializing ``DefectRelaxSet``, then this is set to
+        True for systems with a max atomic number (Z) >= 31 (i.e. further down
+        the periodic table than Zn), otherwise False.
 
         See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the
         ``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings,
@@ -1899,13 +1899,13 @@ def __init__(
             is only generated for systems with a max atomic number (Z) >= 31
             (i.e. further down the periodic table than Zn).
 
-        where ``DefectDictSet`` is an extension of ``pymatgen``'s ``DictSet`` class for
-        defect calculations, with ``incar``, ``poscar``, ``kpoints`` and ``potcar``
-        attributes for the corresponding VASP defect calculations (see docstring).
-        Also creates the corresponding ``bulk_vasp_...`` attributes for singlepoint
-        (static) energy calculations of the bulk (pristine, defect-free) supercell.
-        This needs to be calculated once with the same settings as the final defect
-        calculations, for the later calculation of defect formation energies.
+        where ``DefectDictSet`` is an extension of ``pymatgen``'s ``VaspInputSet``
+        class for defect calculations, with ``incar``, ``poscar``, ``kpoints`` and
+        ``potcar`` attributes for the corresponding VASP defect calculations (see
+        docstring). Also creates the corresponding ``bulk_vasp_...`` attributes for
+        singlepoint (static) energy calculations of the bulk (pristine, defect-free)
+        supercell. This needs to be calculated once with the same settings as the
+        final defect calculations, for the later calculation of defect formation energies.
 
         See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the
         ``doped/VASP_sets`` folder for the default ``INCAR`` settings, and
@@ -1925,10 +1925,10 @@ def __init__(
                 set equal to ``defect_species``. If a list or single ``DefectEntry``
                 object is provided, the defect folder names will be set equal to
                 ``DefectEntry.name`` if the ``name`` attribute is set, otherwise
-                generated according to the ``doped`` convention (see doped.generation).
+                generated according to the ``doped`` convention (see ``doped.generation``).
                 Defect charge states are taken from ``DefectEntry.charge_state``.
             soc (bool):
-                Whether to generate ``vasp_ncl`` DefectDictSet attribute for spin-orbit
+                Whether to generate ``vasp_ncl`` ``DefectDictSet`` attribute for spin-orbit
                 coupling singlepoint (static) energy calculations. If not set, then
                 by default is set to True if the max atomic number (Z) in the
                 structure is >= 31 (i.e. further down the periodic table than Zn).
@@ -1941,9 +1941,9 @@ def __init__(
                 with quotation marks (e.g. ``{"ALGO": "All"}``).
                 (default: None)
             user_kpoints_settings (dict or Kpoints):
-                Dictionary of user KPOINTS settings (in pymatgen DictSet format)
+                Dictionary of user KPOINTS settings (in ``pymatgen`` ``VaspInputSet`` format)
                 e.g. {"reciprocal_density": 123}, or a Kpoints object, to use for the
-                ``vasp_std``, ``vasp_nkred_std`` and ``vasp_ncl`` DefectDictSets (Γ-only for
+                ``vasp_std``, ``vasp_nkred_std`` and ``vasp_ncl`` ``DefectVaspInputSet``\s (Γ-only for
                 ``vasp_gam``). Default is Gamma-centred, reciprocal_density = 100 [Å⁻³].
             user_potcar_functional (str):
                 POTCAR functional to use. Default is "PBE" and if this fails, tries
@@ -1960,23 +1960,23 @@ def __init__(
                 Dictionary of {defect_species: DefectEntry} for the input defect
                 species, for which to generate VASP input files.
             bulk_vasp_gam (DefectDictSet):
-                DefectDictSet for a `bulk` Γ-point-only singlepoint (static)
+                ``DefectDictSet`` for a `bulk` Γ-point-only singlepoint (static)
                 supercell calculation. Often not used, as the bulk supercell only
                 needs to be calculated once with the same settings as the final
                 defect calculations, which may be with ``vasp_std`` or ``vasp_ncl``.
             bulk_vasp_nkred_std (DefectDictSet):
-                DefectDictSet for a singlepoint (static) `bulk` ``vasp_std`` supercell
+                ``DefectDictSet`` for a singlepoint (static) `bulk` ``vasp_std`` supercell
                 calculation (i.e. with a non-Γ-only kpoint mesh) and ``NKRED(X,Y,Z)``
                 INCAR tag(s) to downsample kpoints for the HF exchange part of the
                 hybrid DFT calculation. Not generated for GGA calculations (if
                 ``LHFCALC`` is set to ``False`` in user_incar_settings) or if only Gamma
                 kpoint sampling is required.
             bulk_vasp_std (DefectDictSet):
-                DefectDictSet for a singlepoint (static) `bulk` ``vasp_std`` supercell
+                ``DefectDictSet`` for a singlepoint (static) `bulk` ``vasp_std`` supercell
                 calculation with a non-Γ-only kpoint mesh, not using ``NKRED``. Not
                 generated if only Gamma kpoint sampling is required.
             bulk_vasp_ncl (DefectDictSet):
-                DefectDictSet for singlepoint (static) energy calculation of the `bulk`
+                ``DefectDictSet`` for singlepoint (static) energy calculation of the `bulk`
                 supercell with SOC included. Generated if ``soc=True``. If ``soc`` is not
                 set, then by default is only generated for systems with a max atomic
                 number (Z) >= 31 (i.e. further down the periodic table than Zn).
diff --git a/tests/data/VO2_POSCAR b/tests/data/VO2_POSCAR
new file mode 100644
index 00000000..4e50cd4c
--- /dev/null
+++ b/tests/data/VO2_POSCAR
@@ -0,0 +1,14 @@
+V2 O4
+1.0
+   3.0354292200000002    0.0000000000000000    0.0000000000000000
+   0.0000000000000000    4.5150226299999998    0.0000000000000000
+   0.0000000000000000    0.0000000000000000    4.5150226299999998
+O V
+4 2
+direct
+   0.0000000000000000    0.7006565100000000    0.7006565100000000 O
+   0.4999999300000000    0.7993435900000000    0.2006572000000000 O
+   0.0000000000000000    0.2993421100000000    0.2993421100000000 O
+   0.4999999300000000    0.2006572000000000    0.7993435900000000 O
+   0.4999999300000000    0.4999993100000000    0.4999993100000000 V
+   0.0000000000000000    0.0000000000000000    0.0000000000000000 V
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 1df04384..c8abd757 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -2691,6 +2691,7 @@ def test_bulk_defect_compatibility_checks(self):
                 skip_corrections=True,
                 parse_projected_eigen=False,
             )
+            print([str(warning.message) for warning in w])  # for debugging
             assert len(w) == 1
             assert all(
                 i in str(w[-1].message)
@@ -2731,6 +2732,7 @@ def test_bulk_defect_compatibility_checks(self):
                 skip_corrections=True,
                 parse_projected_eigen=False,
             )
+            print([str(warning.message) for warning in w])  # for debugging
             assert len(w) == 1
             assert all(
                 i in str(w[-1].message)
@@ -2758,6 +2760,7 @@ def test_bulk_defect_compatibility_checks(self):
                 skip_corrections=True,
                 parse_projected_eigen=False,
             )
+            print([str(warning.message) for warning in w])  # for debugging
             assert len(w) == 2  # now INCAR and KPOINTS warnings!
             assert any(
                 all(
@@ -2789,6 +2792,7 @@ def test_bulk_defect_compatibility_checks(self):
                 charge_state=+1,  # manually specify charge state here, as our edited POTCAR doesn't exist
                 parse_projected_eigen=False,
             )
+            print([str(warning.message) for warning in w])  # for debugging
             assert len(w) == 3  # now INCAR and KPOINTS and POTCAR warnings!
             assert any(
                 all(
@@ -2821,6 +2825,7 @@ def test_bulk_defect_compatibility_checks(self):
                 charge_state=+1,  # manually specify charge state here, as our edited POTCAR doesn't exist
                 parse_projected_eigen=False,
             )
+            print([str(warning.message) for warning in w])  # for debugging
             assert any(
                 "The defect and bulk supercells are not the same size, having volumes of 513790.5 and "
                 "2241.3 Å^3 respectively." in str(warning.message)
diff --git a/tests/test_corrections.py b/tests/test_corrections.py
index 61c02dfc..5e9a17f3 100644
--- a/tests/test_corrections.py
+++ b/tests/test_corrections.py
@@ -14,9 +14,8 @@
 import matplotlib as mpl
 import numpy as np
 import pytest
-from pymatgen.core.sites import PeriodicSite
+from pymatgen.core.structure import PeriodicSite, Structure
 from pymatgen.entries.computed_entries import ComputedStructureEntry
-from pymatgen.util.testing import PymatgenTest
 from test_analysis import if_present_rm
 
 from doped import analysis
@@ -30,15 +29,15 @@
 data_dir = os.path.join(module_path, "data")
 
 
-class FiniteSizeChargeCorrectionTest(PymatgenTest):
+class FiniteSizeChargeCorrectionTest(unittest.TestCase):
     """
-    Test functions for getting freysoldt and kumagai corrections.
+    Test functions for getting Freysoldt (FNV) and Kumagai (eFNV) corrections.
     """
 
     def setUp(self):
         self.dielectric = 15.0
 
-        struct = PymatgenTest.get_structure("VO2")
+        struct = Structure.from_file(os.path.join(data_dir, "VO2_POSCAR"))
         struct.make_supercell(3)
         vac = Vacancy(struct, struct.sites[0], charge=-3)
 

From 2b0ca974234d265f99dca6e11a66cc61cfca8f23 Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Tue, 4 Jun 2024 13:48:37 -0400
Subject: [PATCH 09/10] Remove `typing_extensions` requirement now new
 `pymatgen` released

---
 docs/Dev_ToDo.md | 1 -
 pyproject.toml   | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/docs/Dev_ToDo.md b/docs/Dev_ToDo.md
index 4375ce75..289a24f3 100644
--- a/docs/Dev_ToDo.md
+++ b/docs/Dev_ToDo.md
@@ -87,7 +87,6 @@
   there's any useful functionality we want to add!
 
 ## SK To-Do for next update:
-- Remove `typing-extensions` requirement from `pyproject.toml`, once new `pymatgen-analysis-defects` released.
 - `doped` repo/docs cleanup `TODO`s above
 - Quick-start tutorial suggested by Alex G
 - Add chempot grid plotting tool, shown in `JOSS_plots` using Alex's chemical potential grid, and test (and remove TODO from JOSS plots notebook).
diff --git a/pyproject.toml b/pyproject.toml
index 974d17ab..82ba059e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,9 +35,6 @@ dependencies = [
     "vise>=0.9.0",  # for eigenvalue analysis, >=0.9.0 required for SOC compatibility
     "easyunfold>=0.3.6",  # for efficient PROCAR parsing (and supporting SOC),
     # if any dependency issues arise can make easyunfold an optional dependency
-    "typing-extensions"  # to fix `pymatgen` typing issues
-    # (https://github.com/materialsproject/pymatgen/pull/3752)
-    # TODO: Remove once new pymatgen released
 ]
 
 [project.urls]

From ff209d39c844d823fbdff648bc363fcf6d5151f6 Mon Sep 17 00:00:00 2001
From: Sean Kavanagh <s.kavanagh19@imperial.ac.uk>
Date: Wed, 5 Jun 2024 12:59:09 -0400
Subject: [PATCH 10/10] Bump changelog and version numbers for release

---
 CHANGELOG.rst  | 6 ++++++
 pyproject.toml | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 25547693..b1474c55 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,12 @@
 Change Log
 ==========
 
+v.2.4.4
+----------
+- Make oxidation state guessing more efficient, semi-significant speed up in generation/parsing for tough cases.
+- Add `bulk_site_concentration` property to `DefectEntry`, giving the concentration of the corresponding lattice site of that defect in the pristine bulk.
+- Minor updates to ensure compatibility with recent ``pymatgen`` and ``ASE`` releases.
+
 v.2.4.3
 ----------
 - Remove ``spglib<=2.0.2`` dependency (set to avoid unnecessary warnings), and update installation instructions accordingly.
diff --git a/pyproject.toml b/pyproject.toml
index 82ba059e..59bdb5f9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "doped"
-version = "2.4.3"
+version = "2.4.4"
 description = "Python package to setup, process and analyse solid-state defect calculations with VASP"
 authors = [{name = "Seán Kavanagh", email = "skavanagh@seas.harvard.edu"}]
 readme = "README.md"