From 17479052cf47b82df87ace50c79cd94342c4b3b9 Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Tue, 3 Sep 2024 12:41:58 -0400 Subject: [PATCH] Add `include_site_info` option to plotting and test --- doped/thermodynamics.py | 9 ++- doped/utils/plotting.py | 4 +- tests/test_plotting.py | 137 ++++++++++++++++++++++++++++++++--- tests/test_thermodynamics.py | 68 ++--------------- 4 files changed, 143 insertions(+), 75 deletions(-) diff --git a/doped/thermodynamics.py b/doped/thermodynamics.py index f4e5c644..706542cf 100644 --- a/doped/thermodynamics.py +++ b/doped/thermodynamics.py @@ -2435,7 +2435,6 @@ def get_doping_windows( ) # TODO: Don't show chempot table by default? At least is limit explicitly chosen? - # TODO: Add include_site_info as plotting option! And update docstrings with this behaviour # TODO: Add option to only plot defect states that are stable at some point in the bandgap # TODO: Add option to plot formation energies at the centroid of the chemical stability region? And # make this the default if no chempots are specified? Or better default to plot both the most ( @@ -2465,6 +2464,7 @@ def plot( xlim: Optional[tuple] = None, ylim: Optional[tuple] = None, fermi_level: Optional[float] = None, + include_site_info: bool = False, colormap: Optional[Union[str, colors.Colormap]] = None, auto_labels: bool = False, filename: Optional[PathLike] = None, @@ -2543,6 +2543,12 @@ def plot( If set, plots a dashed vertical line at this Fermi level value, typically used to indicate the equilibrium Fermi level position (e.g. calculated with py-sc-fermi). (Default: None) + include_site_info (bool): + Whether to include site info in defect names in the plot legend (e.g. + $Cd_{i_{C3v}}^{0}$ rather than $Cd_{i}^{0}$). Default is ``False``, where + site info is not included unless we have inequivalent sites for the same + defect type. If, even with site info added, there are duplicate defect + names, then "-a", "-b", "-c" etc are appended to the names to differentiate. colormap (str, matplotlib.colors.Colormap): Colormap to use for the formation energy lines, either as a string (which can be a colormap name from @@ -2622,6 +2628,7 @@ def plot( xlim=xlim, ylim=ylim, fermi_level=fermi_level, + include_site_info=include_site_info, title=plot_title, colormap=colormap, auto_labels=auto_labels, diff --git a/doped/utils/plotting.py b/doped/utils/plotting.py index b444e1e7..b6da6621 100644 --- a/doped/utils/plotting.py +++ b/doped/utils/plotting.py @@ -306,7 +306,7 @@ def format_defect_name( with contextlib.suppress(IndexError): point_group_symbol = defect_species.split("_")[2] if point_group_symbol in sch_symbols and all( # recognised point group symbol? - i not in defect_species.lower() for i in ["int", "vac", "sub", "as"] + i not in defect_species for i in ["int", "Int", "vac", "Vac", "sub", "Sub", "as"] # no As_... ): # from 2nd underscore to last underscore (before charge state) is site info # convert point group symbol to formatted version (e.g. C1 -> C_1): @@ -877,6 +877,7 @@ def _TLD_plot( xlim=None, ylim=None, fermi_level=None, + include_site_info=False, title=None, colormap: Optional[Union[str, Colormap]] = None, auto_labels=False, @@ -989,6 +990,7 @@ def _TLD_plot( else defect_names_for_legend ), all_entries=all_entries is True, + include_site_info=include_site_info, ), loc=2, bbox_to_anchor=(1, 1), diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 6411358b..b31806c9 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -10,6 +10,7 @@ import shutil import unittest import warnings +from copy import deepcopy import matplotlib as mpl import pytest @@ -81,7 +82,7 @@ def test_plot_YTOS(self): def test_format_defect_name(self): """ - Test format_defect_name() function. + Test ``format_defect_name()`` function. """ # test standard behaviour formatted_name = plotting.format_defect_name( @@ -287,6 +288,7 @@ def test_format_defect_name(self): "inter_14_O_+2": "O$_i^{+2}$", "inter_14_Th_+2": "Th$_i^{+2}$", "vac_14_Th_+2": "$\\it{V}\\!$ $_{Th}^{+2}$", + "As_i_C2_-3": "As$_i^{-3}$", } for defect_species, expected_name in defect_species_name_dict.items(): @@ -364,6 +366,7 @@ def test_format_defect_name(self): "inter_14_O_+2": "O$_{i_{14}}^{+2}$", "inter_14_Th_+2": "Th$_{i_{14}}^{+2}$", "vac_14_Th_+2": "$\\it{V}\\!$ $_{Th_{14}}^{+2}$", + "As_i_C2_-3": "As$_{i_{C_{2}}}^{-3}$", } for ( defect_species, @@ -393,6 +396,20 @@ def test_plot_neutral_v_O_V2O5(self): class DefectThermodynamicsPlotsTestCase(DefectThermodynamicsSetupMixin): + def setUp(self): + super().setUp() + self.CdTe_LZ_thermo_wout_meta = deepcopy(self.orig_CdTe_LZ_thermo_wout_meta) + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.CdTe_LZ_defect_dict = loadfn( + os.path.join(cls.module_path, "data/CdTe_LZ_defect_dict_v2.3_wout_meta.json.gz") + ) + cls.orig_CdTe_LZ_thermo_wout_meta = DefectThermodynamics( + cls.CdTe_LZ_defect_dict, chempots=cls.CdTe_chempots + ) + def test_plot_limit_no_chempots_error(self): with pytest.raises(ValueError) as exc: self.CdTe_defect_thermo.plot(limit="Te-rich") @@ -507,19 +524,12 @@ def test_V2O5_O_rich_faded_plot(self): @custom_mpl_image_compare(filename="CdTe_LZ_all_default_Te_rich.png") def test_CdTe_LZ_all_defects_plot(self): - lz_cdte_defect_dict = loadfn( - os.path.join(self.module_path, "data/CdTe_LZ_defect_dict_v2.3_wout_meta.json.gz") - ) - CdTe_LZ_thermo_wout_meta = DefectThermodynamics(lz_cdte_defect_dict, chempots=self.CdTe_chempots) - return CdTe_LZ_thermo_wout_meta.plot(limit="Te-rich") + return self.CdTe_LZ_thermo_wout_meta.plot(limit="Te-rich") @custom_mpl_image_compare(filename="CdTe_LZ_all_Te_rich_dist_tol_2.png") def test_CdTe_LZ_all_defects_plot_dist_tol_2(self): # Matches SK Thesis Fig 6.1b - lz_cdte_defect_dict = loadfn( - os.path.join(self.module_path, "data/CdTe_LZ_defect_dict_v2.3_wout_meta.json.gz") - ) - lz_cdte_defect_thermo = DefectThermodynamics(lz_cdte_defect_dict) + lz_cdte_defect_thermo = self.CdTe_LZ_thermo_wout_meta lz_cdte_defect_thermo.dist_tol = 2 # increase to 2 Å to merge Te_i defects return lz_cdte_defect_thermo.plot(chempots=self.CdTe_chempots, limit="Te-rich") @@ -561,3 +571,110 @@ def test_Sb2O5_merged_dist_tol(self): print([str(warn.message) for warn in w]) # for debugging assert not w return plot + + @custom_mpl_image_compare(filename="CdTe_duplicate_entry_names.png") + def test_handling_duplicate_entry_names_CdTe(self): + """ + Test renaming behaviour when defect entries with the same names are + provided. + """ + defect_dict = loadfn(os.path.join(self.module_path, "data/CdTe_defect_dict_v2.3.json.gz")) + num_entries = len(defect_dict) + for defect_entry in defect_dict.values(): + if "Cd_i" in defect_entry.name: + defect_entry.name = f"Cd_i_{defect_entry.charge_state}" + if "Te_i" in defect_entry.name: + defect_entry.name = f"Te_i_{defect_entry.charge_state}" + + defect_thermo = DefectThermodynamics(defect_dict) + assert len(defect_thermo.defect_entries) == num_entries + print([entry.name for entry in defect_thermo.defect_entries]) + + return defect_thermo.plot(self.CdTe_chempots, limit="Cd-rich") + + @custom_mpl_image_compare(filename="Se_duplicate_entry_names_old_plot.png") + def test_handling_duplicate_entry_names_ext_Se_old_names(self): + """ + Test renaming behaviour when defect entries with the same names are + provided. + + In this case, the defect folder/entry names match the old ``doped`` + format, with e.g. ``sub_1_Br_on_Se_-1`` and ``inter_2_O_0`` etc. + + We have some duplicates (``inter_1_O_0`` and ``inter_2_O_0``) which + are removed by including site info, but some (``inter_11_H_X``) which + aren't, so ``_a`` & ``_b`` are appended to those names. + """ + fig = self.Se_ext_no_pnict_thermo.plot() + legend_txt = [t.get_text() for t in fig.get_axes()[0].get_legend().get_texts()] + print(legend_txt) + for i in ["O$_{i_{1}}$", "O$_{i_{2}}$", "H$_i$$_{-a}$", "H$_i$$_{-b}$"]: + assert i in legend_txt + + return fig + + @custom_mpl_image_compare(filename="Se_pnictogen_plot.png") + def test_plotting_ext_Se_new_names(self): + """ + Test plotting behaviour with pnictogen impurities in Se. + + Previously this would append site info to the defect names by default, + but this is not necessary here as there are no inequivalent + substitution/interstitial sites, so check that the legend is as + expected with no site info. + """ + fig = self.Se_pnict_thermo.plot() + legend_txt = [t.get_text() for t in fig.get_axes()[0].get_legend().get_texts()] + print(legend_txt) + for i in ["i_{", "{Se_{", "}}$", "-a", "-b"]: + assert all(i not in text for text in legend_txt) + + return fig + + @custom_mpl_image_compare(filename="Se_site_info_old_names_plot.png") + def test_ext_Se_old_names_include_site_info(self): + """ + Test plotting extrinsic defects in Se with ``include_site_info=True``. + + In this case, the defect folder/entry names match the old ``doped`` + format, with e.g. ``sub_1_Br_on_Se_-1`` and ``inter_2_O_0`` etc. + + We have some duplicates (``inter_1_O_0`` and ``inter_2_O_0``) which + are removed by including site info, but some (``inter_11_H_X``) which + aren't, so ``_a`` & ``_b`` are appended to those names. + """ + fig = self.Se_ext_no_pnict_thermo.plot(include_site_info=True) + legend_txt = [t.get_text() for t in fig.get_axes()[0].get_legend().get_texts()] + print(legend_txt) + for i in [ + "O$_{i_{1}}$", + "O$_{i_{2}}$", + "Te$_{Se_{1}}$", + "H$_{i_{11}}$$_{-a}$", + "H$_{i_{11}}$$_{-b}$", + "F$_{i_{1}}$", + ]: + assert i in legend_txt + + return fig + + @custom_mpl_image_compare(filename="Se_pnictogen_site_info_plot.png") + def test_plotting_ext_Se_site_info(self): + """ + Test plotting behaviour with pnictogen impurities in Se, with + ``include_site_info=True``. + """ + fig = self.Se_pnict_thermo.plot(include_site_info=True) + legend_txt = [t.get_text() for t in fig.get_axes()[0].get_legend().get_texts()] + print(legend_txt) + # site info being added; C2 for interstitials, none for _Se (only one site) + assert all(any(i in text for i in ["{C_{2}}}", "_{Se"]) for text in legend_txt) + + return fig + + @custom_mpl_image_compare(filename="CdTe_LZ_all_Te_rich_site_info.png") + def test_CdTe_LZ_site_info_plot(self): + """ + Test CdTe plotting behaviour with ``include_site_info=True``. + """ + return self.CdTe_LZ_thermo_wout_meta.plot(limit="Te-rich", include_site_info=True) diff --git a/tests/test_thermodynamics.py b/tests/test_thermodynamics.py index e3af651b..76662532 100644 --- a/tests/test_thermodynamics.py +++ b/tests/test_thermodynamics.py @@ -103,6 +103,8 @@ def setUp(self): self.Sb2O5_defect_thermo = deepcopy(self.orig_Sb2O5_defect_thermo) self.ZnS_defect_thermo = deepcopy(self.orig_ZnS_defect_thermo) + self.Se_ext_no_pnict_thermo = deepcopy(self.orig_Se_ext_no_pnict_thermo) + self.Se_pnict_thermo = deepcopy(self.orig_Se_pnict_thermo) self.cdte_chempot_warning_message = ( "Note that the raw (DFT) energy of the bulk supercell calculation (-3.37 eV/atom) differs " "from that expected from the supplied chemical potentials (-3.50 eV/atom) by >0.025 eV. This " @@ -178,6 +180,9 @@ def setUpClass(cls): cls.orig_ZnS_defect_thermo = loadfn(os.path.join(data_dir, "ZnS/ZnS_thermo.json.gz")) + cls.orig_Se_ext_no_pnict_thermo = loadfn(os.path.join(data_dir, "Se_Ext_No_Pnict_Thermo.json.gz")) + cls.orig_Se_pnict_thermo = loadfn(os.path.join(data_dir, "Se_Pnict_Thermo.json.gz")) + class DefectThermodynamicsTestCase(DefectThermodynamicsSetupMixin): def _compare_defect_thermo_and_dict(self, defect_thermo, defect_dict): @@ -1902,69 +1907,6 @@ def test_formation_energy_mult_degen(self): ) assert np.isclose(orig_conc, new_conc * random_defect_entry.bulk_site_concentration) - @custom_mpl_image_compare(filename="CdTe_duplicate_entry_names.png") - def test_handling_duplicate_entry_names_CdTe(self): - """ - Test renaming behaviour when defect entries with the same names are - provided. - """ - defect_dict = loadfn(os.path.join(self.module_path, "data/CdTe_defect_dict_v2.3.json.gz")) - num_entries = len(defect_dict) - for defect_entry in defect_dict.values(): - if "Cd_i" in defect_entry.name: - defect_entry.name = f"Cd_i_{defect_entry.charge_state}" - if "Te_i" in defect_entry.name: - defect_entry.name = f"Te_i_{defect_entry.charge_state}" - - defect_thermo = DefectThermodynamics(defect_dict) - assert len(defect_thermo.defect_entries) == num_entries - print([entry.name for entry in defect_thermo.defect_entries]) - - return defect_thermo.plot(self.CdTe_chempots, limit="Cd-rich") - - @custom_mpl_image_compare(filename="Se_duplicate_entry_names_old.png") - def test_handling_duplicate_entry_names_ext_Se_old_names(self): - """ - Test renaming behaviour when defect entries with the same names are - provided. - - In this case, the defect folder/entry names match the old ``doped`` - format, with e.g. ``sub_1_Br_on_Se_-1`` and ``inter_2_O_0`` etc. - - We have some duplicates (``inter_1_O_0`` and ``inter_2_O_0``) which - are removed by including site info, but some (``inter_11_H_X``) which - aren't, so ``_a`` & ``_b`` are appended to those names. - """ - defect_thermo = loadfn(os.path.join(self.module_path, "data/Se_Ext_no_Pnict_Thermo.json.gz")) - print([entry.name for entry in defect_thermo.defect_entries]) - fig = defect_thermo.plot() - legend_txt = [t.get_text() for t in fig.get_axes()[0].get_legend().get_texts()] - print(legend_txt) - for i in ["O$_{i_{1}}$", "O$_{i_{2}}$", "H$_i$$_{-a}$", "H$_i$$_{-b}$"]: - assert i in legend_txt - - return fig - - @custom_mpl_image_compare(filename="Se_pnictogen_plot.png") - def test_plotting_ext_Se_new_names(self): - """ - Test plotting behaviour with pnictogen impurities in Se. - - Previously this would append site info to the defect names by default, - but this is not necessary here as there are no inequivalent - substitution/interstitial sites, so check that the legend is as - expected with no site info. - """ - defect_thermo = loadfn(os.path.join(self.module_path, "data/Se_Pnict_Thermo.json.gz")) - print([entry.name for entry in defect_thermo.defect_entries]) - fig = defect_thermo.plot() - legend_txt = [t.get_text() for t in fig.get_axes()[0].get_legend().get_texts()] - print(legend_txt) - for i in ["i_{", "{Se_{", "}}$", "-a", "-b"]: - assert all(i not in text for text in legend_txt) - - return fig - def test_Sb2O5_formation_energies(self): formation_energy_table_df = self.Sb2O5_defect_thermo.get_formation_energies( self.Sb2O5_chempots, limit="Sb2O5-SbO2", fermi_level=3