From b9d3fd4ccd4a4fed2bf58a2695be6c5ada6b7d77 Mon Sep 17 00:00:00 2001 From: David Waterman Date: Tue, 30 Jul 2024 09:54:15 +0100 Subject: [PATCH 1/9] dials.symmetry: allow free selection of `significance_level` in the range [0,1] (#2696) * Change significance_level to be a float in [0,1] --- newsfragments/2696.feature | 1 + src/dials/algorithms/symmetry/absences/screw_axes.py | 5 ++--- src/dials/command_line/symmetry.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 newsfragments/2696.feature diff --git a/newsfragments/2696.feature b/newsfragments/2696.feature new file mode 100644 index 0000000000..7d85003dd8 --- /dev/null +++ b/newsfragments/2696.feature @@ -0,0 +1 @@ +``dials.symmetry``: allow free selection of ``significance_level`` in the range [0,1] diff --git a/src/dials/algorithms/symmetry/absences/screw_axes.py b/src/dials/algorithms/symmetry/absences/screw_axes.py index 58010d2c8e..77aed0cee6 100644 --- a/src/dials/algorithms/symmetry/absences/screw_axes.py +++ b/src/dials/algorithms/symmetry/absences/screw_axes.py @@ -209,7 +209,7 @@ def score_axis_fourier(self, reflection_table, significance_level=0.95): def score_axis_direct(self, reflection_table, significance_level=0.95): """Score the axis given a reflection table of data.""" - assert significance_level in [0.95, 0.975, 0.99] + self.get_all_suitable_reflections(reflection_table) expected_sel = self.miller_axis_vals.iround() % self.axis_repeat == 0 @@ -241,8 +241,7 @@ def score_axis_direct(self, reflection_table, significance_level=0.95): # sanity check - is most of intensity in 'expected' channel? intensity_test = self.mean_I_sigma > (20.0 * self.mean_I_sigma_abs) - cutoffs = {0.95: 1.645, 0.975: 1.960, 0.99: 2.326} - cutoff = cutoffs[significance_level] + cutoff = norm.ppf(significance_level) if z_score_absent > cutoff and not intensity_test: # z > 1.65 in only 5% of cases for normal dist diff --git a/src/dials/command_line/symmetry.py b/src/dials/command_line/symmetry.py index e593258f45..9485f27f10 100644 --- a/src/dials/command_line/symmetry.py +++ b/src/dials/command_line/symmetry.py @@ -105,8 +105,8 @@ .help = "Use fourier analysis or direct analysis of I/sigma to determine" "likelihood of systematic absences" - significance_level = *0.95 0.975 0.99 - .type = choice + significance_level = 0.95 + .type = float(value_min=0, value_max=1) .help = "Significance to use when testing whether axial reflections are " "different to zero (absences and reflections in reflecting condition)." From b6bd34850364e7f897c60d2e5be9039f0789eb99 Mon Sep 17 00:00:00 2001 From: David Waterman Date: Wed, 31 Jul 2024 15:37:15 +0100 Subject: [PATCH 2/9] Updates for the small molecule tutorial (#2692) * Fix error in text - wrong viewer * Add note about dials.symmetry catering more for MX * Use dials.export format=shelx --- .../tutorials/small_molecule_tutorial.rst | 15 ++++++--------- newsfragments/2692.bugfix | 1 + 2 files changed, 7 insertions(+), 9 deletions(-) create mode 100644 newsfragments/2692.bugfix diff --git a/doc/sphinx/documentation/tutorials/small_molecule_tutorial.rst b/doc/sphinx/documentation/tutorials/small_molecule_tutorial.rst index a3d58298e3..76a4f8eaa1 100644 --- a/doc/sphinx/documentation/tutorials/small_molecule_tutorial.rst +++ b/doc/sphinx/documentation/tutorials/small_molecule_tutorial.rst @@ -117,7 +117,7 @@ which will integrate each sweep in sequence, again using all available cores. Af .. code-block:: bash - dials.reciprocal_lattice_viewer integrated.refl integrated.expt + dials.image_viewer integrated.refl integrated.expt @@ -132,6 +132,8 @@ So far the data were processed with a triclinic unit cell, which is usually OK b This will look at the shape of the unit cell and determine the maximum possible symmetry based on the cell parameters, with some tolerance. Each of the possible symmetry operations will be individually tested and scored, and those operations identified as being present will be composed into the point group to be assigned to the data. An attempt is then made to estimate the space group from the presence or absence of axial reflections: this is rather less reliable than the point group determination but also less important for the scaling. After the point group has been determined the reflections will be reindexed automatically to match the correct setting, ensuring that the data are correctly prepared for scaling. +.. note:: ``dials.symmetry`` will only suggest one of the 65 Sohncke space groups relevant for chiral molecules. It will not detect mirrors or glide planes. + Scaling ------- @@ -225,19 +227,14 @@ However these may be useful in later structure refinement. Exporting --------- -The output data are by default saved in the standard DIALS reflection format, which is not particularly useful. In MX, a standard format is MTZ which includes the unit cell and symmetry information with the reflection data. This is created with +The output data are by default saved in the standard DIALS reflection format, which is not particularly useful. DIALS is able to convert this to SHELX format though. This can be done by .. code-block:: bash - dials.export scaled.refl scaled.expt - -And there is a useful "jiffy" included with xia2 to convert this to SHELX format and generate .ins and .hkl files for structure solution and refinement viz: - -.. code-block:: bash + dials.export scaled.refl scaled.expt format=shelx shelx.ins=lcys.ins shelx.hklout=lcys.hkl composition=CHNOS - xia2.to_shelx scaled.mtz lcys CHNOS -Such that you can then run +So that you can then run .. code-block:: bash diff --git a/newsfragments/2692.bugfix b/newsfragments/2692.bugfix new file mode 100644 index 0000000000..d392a22846 --- /dev/null +++ b/newsfragments/2692.bugfix @@ -0,0 +1 @@ +Improvements to the small molecule tutorial. From 7cf56ea6cd8cb252e140b3b44e37f64c154b0ca9 Mon Sep 17 00:00:00 2001 From: James Beilsten-Edmands <30625594+jbeilstenedmands@users.noreply.github.com> Date: Fri, 2 Aug 2024 09:46:33 +0100 Subject: [PATCH 3/9] Updates to mmcif export (#2709) For mmcif export, add a few extra items so that the output can be understood with gemmi (and sortmtz), and make sure it works for still data too. --- newsfragments/2709.feature | 1 + src/dials/command_line/export.py | 8 ++++ src/dials/util/export_mmcif.py | 52 +++++++++++++++++++++--- tests/command_line/test_export.py | 11 +++++ tests/command_line/test_ssx_reduction.py | 19 +++++++++ 5 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 newsfragments/2709.feature diff --git a/newsfragments/2709.feature b/newsfragments/2709.feature new file mode 100644 index 0000000000..6af2c983c7 --- /dev/null +++ b/newsfragments/2709.feature @@ -0,0 +1 @@ +``dials.export``: Add support for exporting still data in mmcif format, that can be understood with gemmi diff --git a/src/dials/command_line/export.py b/src/dials/command_line/export.py index 3eecb8ca2e..a7668cc28c 100644 --- a/src/dials/command_line/export.py +++ b/src/dials/command_line/export.py @@ -220,6 +220,14 @@ "mmcif file should comply with. v5_next adds support for" "recording unmerged data as well as additional scan metadata" "and statistics, however writing can be slow for large datasets." + scale = True + .type = bool + .help = "If True, apply a scale such that the minimum intensity is greater" + "than (less negative than) the mmcif.min_scale value below." + min_scale = -999999.0 + .type = float + .help = "If mmcif.scale is True, scale all negative intensities such that" + "they are less negative than this value." } mosflm { diff --git a/src/dials/util/export_mmcif.py b/src/dials/util/export_mmcif.py index bf846c1b6a..ea66e11a1a 100644 --- a/src/dials/util/export_mmcif.py +++ b/src/dials/util/export_mmcif.py @@ -17,6 +17,7 @@ from scitbx.array_family import flex import dials.util.version +from dials.algorithms.symmetry import median_unit_cell from dials.util.filter_reflections import filter_reflection_table logger = logging.getLogger(__name__) @@ -89,7 +90,12 @@ def write(self, experiments, reflections): def make_cif_block(self, experiments, reflections): """Write the data to a cif block""" # Select reflections - selection = reflections.get_flags(reflections.flags.integrated, all=True) + # if rotation, get reflections integrated by both integration methods + # else if stills, only summation integrated reflections are available. + if all(e.scan and e.scan.get_oscillation()[1] != 0.0 for e in experiments): + selection = reflections.get_flags(reflections.flags.integrated, all=True) + else: + selection = reflections.get_flags(reflections.flags.integrated, all=False) reflections = reflections.select(selection) # Filter out bad variances and other issues, but don't filter on ice rings @@ -210,7 +216,8 @@ def make_cif_block(self, experiments, reflections): epochs = [] for exp in experiments: wls.append(round(exp.beam.get_wavelength(), 5)) - epochs.append(exp.scan.get_epochs()[0]) + if exp.scan: + epochs.append(exp.scan.get_epochs()[0]) unique_wls = set(wls) cif_block["_exptl_crystal.id"] = 1 # links to crystal_id cif_block["_diffrn.id"] = 1 # links to diffrn_id @@ -225,10 +232,34 @@ def make_cif_block(self, experiments, reflections): # _diffrn_detector.pdbx_collection_date = (Date of collection yyyy-mm-dd) # _diffrn_detector.type = (full name of detector e.g. DECTRIS PILATUS3 2M) # One date is required, so if multiple just use the first date. - min_epoch = min(epochs) - date_str = time.strftime("%Y-%m-%d", time.gmtime(min_epoch)) cif_block["_diffrn_detector.diffrn_id"] = 1 - cif_block["_diffrn_detector.pdbx_collection_date"] = date_str + if epochs: # some still expts have scans, but some don't + min_epoch = min(epochs) + date_str = time.strftime("%Y-%m-%d", time.gmtime(min_epoch)) + cif_block["_diffrn_detector.pdbx_collection_date"] = date_str + + # add some symmetry information + sginfo = experiments[0].crystal.get_space_group().info() + symbol = sginfo.type().universal_hermann_mauguin_symbol() + number = sginfo.type().number() + symmetry_block = iotbx.cif.model.block() + symmetry_block["_symmetry.entry_id"] = "DIALS" + symmetry_block["_symmetry.space_group_name_H-M"] = symbol + symmetry_block["_symmetry.Int_Tables_number"] = number + cif_block.update(symmetry_block) + + # add a loop with cell values (median if multi-crystal) + median_cell = median_unit_cell(experiments) + a, b, c, al, be, ga = median_cell.parameters() + cell_block = iotbx.cif.model.block() + cell_block["_cell.entry_id"] = "DIALS" + cell_block["_cell.length_a"] = f"{a:.4f}" + cell_block["_cell.length_b"] = f"{b:.4f}" + cell_block["_cell.length_c"] = f"{c:.4f}" + cell_block["_cell.angle_alpha"] = f"{al:.4f}" + cell_block["_cell.angle_beta"] = f"{be:.4f}" + cell_block["_cell.angle_gamma"] = f"{ga:.4f}" + cif_block.update(cell_block) # Write reflection data # Required columns @@ -315,6 +346,17 @@ def make_cif_block(self, experiments, reflections): reflections["angle"] = reflections["xyzcal.mm"].parts()[2] * RAD2DEG variables_present.extend(["angle"]) + if self.params.mmcif.scale and "intensity.scale.value" in reflections: + min_val = min(reflections["intensity.scale.value"]) + if min_val <= self.params.mmcif.min_scale: + # reduce the range of data, for e.g. sortmtz analysis + divisor = abs(min_val) / abs(self.params.mmcif.min_scale) + n = len(str(divisor).split(".")[0]) + divisor = float("1" + int(n) * "0") + reflections["intensity.scale.value"] /= divisor + reflections["intensity.scale.sigma"] /= divisor + reflections["scales"] /= divisor + if self.params.mmcif.pdb_version == "v5_next": if "partiality" in reflections: variables_present.extend(["partiality"]) diff --git a/tests/command_line/test_export.py b/tests/command_line/test_export.py index 51f0c9a78a..2e18096a0d 100644 --- a/tests/command_line/test_export.py +++ b/tests/command_line/test_export.py @@ -319,6 +319,17 @@ def test_mmcif_on_scaled_data(dials_data, tmp_path, pdb_version): model = iotbx.cif.reader(file_path=str(tmp_path / "scaled.mmcif")).model() if pdb_version == "v5": assert "_pdbx_diffrn_data_section.id" not in model["dials"].keys() + # check that gemmi can understand the output + cmd = [ + shutil.which("gemmi"), + "cif2mtz", + tmp_path / "scaled.mmcif", + tmp_path / "test.mtz", + ] + result = subprocess.run(cmd, cwd=tmp_path, capture_output=True) + assert not result.returncode and not result.stderr + assert (tmp_path / "test.mtz").is_file() + elif pdb_version == "v5_next": assert "_pdbx_diffrn_data_section.id" in model["dials"].keys() diff --git a/tests/command_line/test_ssx_reduction.py b/tests/command_line/test_ssx_reduction.py index 09599afdea..3c9a83ad50 100644 --- a/tests/command_line/test_ssx_reduction.py +++ b/tests/command_line/test_ssx_reduction.py @@ -68,6 +68,25 @@ def test_ssx_reduction(dials_data, tmp_path): assert not result.returncode and not result.stderr assert (tmp_path / "scaled.mtz").is_file() + # now try running mmcif export + result = subprocess.run( + [shutil.which("dials.export"), scale_expts, scale_refls, "format=mmcif"], + cwd=tmp_path, + capture_output=True, + ) + assert not result.returncode and not result.stderr + assert (tmp_path / "scaled.cif").is_file() + # check that gemmi can understand the output cif + cmd = [ + shutil.which("gemmi"), + "cif2mtz", + tmp_path / "scaled.cif", + tmp_path / "test.mtz", + ] + result = subprocess.run(cmd, cwd=tmp_path, capture_output=True) + assert not result.returncode and not result.stderr + assert (tmp_path / "test.mtz").is_file() + result = subprocess.run( [shutil.which("dials.merge"), scale_expts, scale_refls], cwd=tmp_path, From 769dab72b7257bf8b945b3857674998d7ed86e64 Mon Sep 17 00:00:00 2001 From: David Waterman Date: Tue, 6 Aug 2024 09:39:47 +0100 Subject: [PATCH 4/9] Convert merged MTZ export to use gemmi (#2700) Add a gemmi-fied merged MTZ creator, called MergedMTZCreator, and deprecate the old MTZWriterBase version. --------- Co-authored-by: Daniel Tchon --- newsfragments/2700.bugfix | 2 + src/dials/algorithms/merging/merge.py | 279 ++++++++++++++++++++++++- src/dials/command_line/merge.py | 9 +- src/dials/command_line/scale.py | 8 +- src/dials/util/export_mtz.py | 19 +- tests/algorithms/merging/test_merge.py | 2 +- 6 files changed, 294 insertions(+), 25 deletions(-) create mode 100644 newsfragments/2700.bugfix diff --git a/newsfragments/2700.bugfix b/newsfragments/2700.bugfix new file mode 100644 index 0000000000..ca2ba74688 --- /dev/null +++ b/newsfragments/2700.bugfix @@ -0,0 +1,2 @@ +``dials.merge``: Use gemmi to output merged MTZs for consistency with ``dials.export`` + diff --git a/src/dials/algorithms/merging/merge.py b/src/dials/algorithms/merging/merge.py index 1757dac567..4b905286e5 100644 --- a/src/dials/algorithms/merging/merge.py +++ b/src/dials/algorithms/merging/merge.py @@ -3,20 +3,31 @@ from __future__ import annotations import logging +import time from contextlib import contextmanager from io import StringIO from typing import List, Optional, Tuple import numpy as np +import pandas as pd import iotbx.mtz -from cctbx import miller, r_free_utils +from cctbx import miller, r_free_utils, sgtbx, uctbx +from dxtbx import flumpy from dxtbx.model import ExperimentList from iotbx import mtz, phil from iotbx.reflection_file_editor import is_rfree_array from iotbx.reflection_file_utils import get_r_free_flags_scores +from libtbx import env from mmtbx.scaling import data_statistics +from dials.util.version import dials_version + +try: + import gemmi +except ModuleNotFoundError: + gemmi = None + from dials.algorithms.merging.reporting import ( MergeJSONCollector, MergingStatisticsData, @@ -196,6 +207,251 @@ def __init__( self.merged_half_datasets = merged_half_datasets +class MergedMTZCreator: + """Creates a gemmi.Mtz object for merged data.""" + + def __init__( + self, space_group: sgtbx.space_group, unit_cell: uctbx.unit_cell + ) -> None: + """ + Initializes the MergedMTZCreator with the provided space group and unit cell. + + Args: + space_group: The sgtbx.space_group object. + unit_cell: The uctbx.unit_cell object with cell parameters. + + Returns: + None + """ + + self.space_group = space_group + self.unit_cell = unit_cell + + mtz = gemmi.Mtz(with_base=True) + mtz.title = f"From {env.dispatcher_name}" + date_str = time.strftime("%Y-%m-%d at %H:%M:%S %Z") + if time.strftime("%Z") != "GMT": + date_str += time.strftime(" (%Y-%m-%d at %H:%M:%S %Z)", time.gmtime()) + mtz.history += [ + f"From {dials_version()}, run on {date_str}", + ] + hall = space_group.type().hall_symbol() + ops = gemmi.symops_from_hall(hall) + mtz.spacegroup = gemmi.find_spacegroup_by_ops(ops) + + if unit_cell: + mtz.set_cell_for_all(gemmi.UnitCell(*unit_cell.parameters())) + + self.type_to_sig_type = {"J": "Q", "F": "Q", "G": "L", "K": "M", "D": "Q"} + + self.mtz = mtz + + def add_data( + self, mtz_datasets: List[MTZDataClass], r_free_array: miller.array = None + ) -> None: + """ + Adds data to the MTZ object based on the provided datasets and optional R-free array. + + Args: + mtz_datasets (List[MTZDataClass]): The list of MTZDataClass objects containing the datasets. + r_free_array (miller.array, optional): The optional R-free array to add. + + Returns: + None + """ + + self._initialise_data_array(mtz_datasets) + + if r_free_array: + self._add_column(r_free_array, label="FreeR_flag", type_char="I") + + if len(mtz_datasets) > 1: + suffixes = [f"_WAVE{i+1}" for i in range(len(mtz_datasets))] + else: + suffixes = [""] + + for i, dataset in enumerate(mtz_datasets): + if dataset.dataset_name is None: + dataset.dataset_name = "FROMDIALS" + if dataset.crystal_name is None: + dataset.crystal_name = f"crystal_{i+2}" + if dataset.project_name is None: + dataset.project_name = "DIALS" + d = self.mtz.add_dataset(dataset.dataset_name) + d.crystal_name = dataset.crystal_name + d.project_name = dataset.project_name + d.wavelength = dataset.wavelength + + suffix = suffixes[i] + if dataset.merged_array: + self._add_column(dataset.merged_array, "IMEAN" + suffix, "J") + if dataset.multiplicities: + self._add_column(dataset.multiplicities, "N" + suffix, "I") + if dataset.amplitudes: + self._add_column(dataset.amplitudes, "F" + suffix, "F") + if dataset.merged_anomalous_array: + i_plus, i_minus = self._separate_anomalous( + dataset.merged_anomalous_array + ) + self._add_column(i_plus, "I" + suffix + "(+)", "K") + self._add_column(i_minus, "I" + suffix + "(-)", "K") + if dataset.anomalous_multiplicities: + n_plus, n_minus = self._separate_anomalous( + dataset.anomalous_multiplicities + ) + self._add_column(n_plus, "N" + suffix + "(+)", "I") + self._add_column(n_minus, "N" + suffix + "(-)", "I") + if dataset.anomalous_amplitudes: + f_plus, f_minus = self._separate_anomalous(dataset.anomalous_amplitudes) + self._add_column(f_plus, "F" + suffix + "(+)", "G") + self._add_column(f_minus, "F" + suffix + "(-)", "G") + if dataset.dano: + self._add_column(dataset.dano, "DANO" + suffix, "D") + if dataset.merged_half_datasets: + self._add_column( + dataset.merged_half_datasets.data1, "IHALF1" + suffix, "J" + ) + self._add_column( + dataset.merged_half_datasets.data2, "IHALF2" + suffix, "J" + ) + self._add_column( + dataset.merged_half_datasets.multiplicity1, "NHALF1" + suffix, "R" + ) + self._add_column( + dataset.merged_half_datasets.multiplicity2, "NHALF2" + suffix, "R" + ) + self.mtz.set_data(self.mtz_data) + + def _initialise_data_array(self, mtz_datasets): + """ + Sets the Miller indices for the data array based on the given MTZ datasets. + + Args: + mtz_datasets (List[MTZDataClass]): A list of MTZDataClass objects + + Returns: + None + + This function creates a merged miller set by combining the indices of all the datasets. + The indices are kept for use by the _add_column() function. A pandas DataFrame `mtz_data` + is created with the indices of the merged set as rows and the columns "H", "K", and "L". + Other columns will be added to the DataFrame by the _add_column() function. + """ + + miller_set = miller.set( + crystal_symmetry=mtz_datasets[0].merged_array.crystal_symmetry(), + indices=mtz_datasets[0].merged_array.indices().deep_copy(), + anomalous_flag=False, + ) + for dataset in mtz_datasets[1:]: + indices = dataset.merged_array.indices() + missing_isel = miller.match_indices(miller_set.indices(), indices).singles( + 1 + ) + miller_set.indices().extend(indices.select(missing_isel)) + + self.indices = miller_set.indices() + + self.mtz_data = pd.DataFrame( + flumpy.to_numpy(self.indices).astype("float32"), + columns=["H", "K", "L"], + ) + + def _add_column(self, column: miller.array, label: str, type_char: str): + """ + Add a data column (and associated sigmas if available) to the MTZ object. + + Args: + column (miller.array): The column to add to the MTZ object. + label (str): The label for the column. + type_char (str): The type character for the column. + + This function adds column data to the MTZ object. It first adds the column + to the most recently added dataset of the MTZ object. Then, it matches the + Miller indices of the column data to those in MTZ object to find the pairs + of matching indices. It selects the data from the column based on the pairs + of matching indices and inserts them into the `mtz_data` DataFrame. If the + column has sigmas the steps are repeated for them too. + + Returns: + None + """ + self.mtz.add_column(label, type_char) + + indices = column.indices() + + matches = miller.match_indices(self.indices, indices) + pairs = matches.pairs() + isel_i = pairs.column(0) + isel_j = pairs.column(1) + + data = flex.double(len(self.indices), float("nan")) + data.set_selected(isel_i, column.data().as_double().select(isel_j)) + + self.mtz_data.insert( + len(self.mtz_data.columns), + label, + flumpy.to_numpy(data).astype("float32"), + ) + + if column.sigmas() is None: + return + + type_char = self.type_to_sig_type[type_char] + self.mtz.add_column("SIG" + label, type_char) + + sigmas = flex.double(len(self.indices), float("nan")) + sigmas.set_selected(isel_i, column.sigmas().as_double().select(isel_j)) + + self.mtz_data.insert( + len(self.mtz_data.columns), + "SIG" + label, + flumpy.to_numpy(sigmas).astype("float32"), + ) + + def _separate_anomalous( + self, miller_array: miller.array + ) -> Tuple[miller.array, miller.array]: + """ + Separates the anomalous pairs from a given Miller array to produce + two arrays: one for the positive and one for the negative hemisphere + + Args: + miller_array (miller.array): The input Miller array. + + Returns: + Tuple[miller.array, miller.array]: A tuple containing the positive + and negative anomalous arrays. + """ + asu, matches = miller_array.match_bijvoet_mates() + + sel = matches.pairs_hemisphere_selection("+") + sel.extend(matches.singles_hemisphere_selection("+")) + + indices = asu.indices().select(sel) + data = asu.data().select(sel) + sigmas = None + if asu.sigmas() is not None: + sigmas = asu.sigmas().select(sel) + + miller_set = miller.set(miller_array.crystal_symmetry(), indices) + plus_array = miller.array(miller_set, data, sigmas) + + sel = matches.pairs_hemisphere_selection("-") + sel.extend(matches.singles_hemisphere_selection("-")) + + indices = -asu.indices().select(sel) + data = asu.data().select(sel) + sigmas = None + if asu.sigmas() is not None: + sigmas = asu.sigmas().select(sel) + + miller_set = miller.set(miller_array.crystal_symmetry(), indices) + minus_array = miller.array(miller_set, data, sigmas) + + return plus_array, minus_array + + def make_merged_mtz_file(mtz_datasets, r_free_array: miller.array = None): """ Make an mtz file object for the data, adding the date, time and program. @@ -207,9 +463,12 @@ def make_merged_mtz_file(mtz_datasets, r_free_array: miller.array = None): experiment. Returns: - An iotbx mtz file object. + A gemmi.Mtz object or an iotbx mtz object, if gemmi is not available. """ + if gemmi: + return make_merged_mtz_file_with_gemmi(mtz_datasets, r_free_array) + if len(mtz_datasets) > 1: writer = MADMergedMTZWriter else: @@ -248,6 +507,22 @@ def make_merged_mtz_file(mtz_datasets, r_free_array: miller.array = None): return mtz_writer.mtz_file +def make_merged_mtz_file_with_gemmi(mtz_datasets, r_free_array=None): + + # XXX This should replace the code in make_merged_mtz_file when + # MergedMTZWriter and MADMergedMTZWriter are removed + writer = MergedMTZCreator + + mtz_writer = writer( + mtz_datasets[0].merged_array.space_group(), + mtz_datasets[0].merged_array.unit_cell(), + ) + + mtz_writer.add_data(mtz_datasets, r_free_array) + + return mtz_writer.mtz + + def merge_scaled_array( experiments, scaled_array, diff --git a/src/dials/command_line/merge.py b/src/dials/command_line/merge.py index 11e15a7ff5..296cf59c74 100644 --- a/src/dials/command_line/merge.py +++ b/src/dials/command_line/merge.py @@ -7,7 +7,6 @@ import json import logging import sys -from io import StringIO from typing import List, Tuple from dxtbx.model import ExperimentList @@ -30,7 +29,7 @@ exclude_image_ranges_from_scans, get_selection_for_valid_image_ranges, ) -from dials.util.export_mtz import match_wavelengths +from dials.util.export_mtz import log_summary, match_wavelengths from dials.util.options import ArgumentParser, reflections_and_experiments_from_files from dials.util.version import dials_version @@ -359,10 +358,8 @@ def run(args=None): raise Sorry(e) logger.info("\nWriting reflections to %s", (params.output.mtz)) - out = StringIO() - mtz_file.show_summary(out=out) - logger.info(out.getvalue()) - mtz_file.write(params.output.mtz) + log_summary(mtz_file) + mtz_file.write_to_file(params.output.mtz) if params.output.json: with open(params.output.json, "w", encoding="utf-8") as f: diff --git a/src/dials/command_line/scale.py b/src/dials/command_line/scale.py index a3284a908a..b53cbef7d8 100644 --- a/src/dials/command_line/scale.py +++ b/src/dials/command_line/scale.py @@ -40,12 +40,12 @@ import logging import sys -from io import StringIO from libtbx import phil from dials.algorithms.scaling.algorithm import ScaleAndFilterAlgorithm, ScalingAlgorithm from dials.util import Sorry, log, show_mail_handle_errors +from dials.util.export_mtz import log_summary from dials.util.options import ArgumentParser, reflections_and_experiments_from_files from dials.util.version import dials_version @@ -135,10 +135,8 @@ def _export_merged_mtz(params, experiments, joint_table): mtz_file = merge_data_to_mtz(merge_params, experiments, [joint_table]) logger.disabled = False logger.info("\nWriting merged data to %s", (params.output.merged_mtz)) - out = StringIO() - mtz_file.show_summary(out=out) - logger.info(out.getvalue()) - mtz_file.write(params.output.merged_mtz) + log_summary(mtz_file) + mtz_file.write_to_file(params.output.merged_mtz) def _export_unmerged_mtz(params, experiments, reflection_table): diff --git a/src/dials/util/export_mtz.py b/src/dials/util/export_mtz.py index 28514ed4bd..f94f96f61c 100644 --- a/src/dials/util/export_mtz.py +++ b/src/dials/util/export_mtz.py @@ -2,21 +2,17 @@ import logging import time +import warnings from collections import Counter from copy import deepcopy from dataclasses import dataclass, field from math import isclose from typing import List, Optional +import gemmi import numpy as np import pandas as pd -try: - import gemmi -except ModuleNotFoundError as e: - gemmi = None - gemmi_import_error = e - from cctbx import uctbx from dxtbx import flumpy from iotbx import mtz @@ -53,6 +49,11 @@ class MTZWriterBase: def __init__(self, space_group, unit_cell=None): """If a unit cell is provided, will be used as default unless specified for each crystal.""" + warnings.warn( + "MTZWriterBase classes (MergedMTZWriter and MADMergedMTZWriter) are deprecated. Use MergedMTZCreator instead.\n", + DeprecationWarning, + stacklevel=2, + ) mtz_file = mtz.object() mtz_file.set_title(f"From {env.dispatcher_name}") date_str = time.strftime("%Y-%m-%d at %H:%M:%S %Z") @@ -315,8 +316,6 @@ def add_batch_list( if max_batch_number > batch_offset: batch_offset = max_batch_number - if gemmi is None: - raise gemmi_import_error batch = gemmi.Mtz.Batch() # Setting fields that are the same for all batches @@ -677,8 +676,6 @@ def export_mtz( ) # Create the mtz file - if gemmi is None: - raise gemmi_import_error mtz = gemmi.Mtz(with_base=True) mtz.title = f"From {env.dispatcher_name}" date_str = time.strftime("%Y-%m-%d at %H:%M:%S %Z") @@ -828,7 +825,7 @@ def log_summary(mtz): for col in mtz.columns: # col.min_value and col.max_value are not set, so we have to calculate them here logger.info( - f"{col.label:<12s} {col.type} {col.dataset_id:2d} {col.array.min():12.6g} {col.array.max():10.6g}" + f"{col.label:<12s} {col.type} {col.dataset_id:2d} {np.nanmin(col.array):12.6g} {np.nanmax(col.array):10.6g}" ) logger.info(f"History ({len(mtz.history)} lines):") for line in mtz.history: diff --git a/tests/algorithms/merging/test_merge.py b/tests/algorithms/merging/test_merge.py index 40739de3fa..f5de41d830 100644 --- a/tests/algorithms/merging/test_merge.py +++ b/tests/algorithms/merging/test_merge.py @@ -85,7 +85,7 @@ def test_r_free_flags_from_reference(tmp_path): params = phil_scope.extract() r_free_flags = generate_r_free_flags(params, mtz_datasets) mtz = make_merged_mtz_file(mtz_datasets, r_free_array=r_free_flags) - mtz.write(str(mtz_file)) + mtz.write_to_file(str(mtz_file)) # Now actually test r_free_flags_from_reference params.r_free_flags.reference = str(mtz_file) From 32ad28bd2248814f09856d0399856e6c2d9ca13f Mon Sep 17 00:00:00 2001 From: David Waterman Date: Wed, 7 Aug 2024 14:10:13 +0100 Subject: [PATCH 5/9] Fix matplotlib v3.9 colormaps (#2688) Fix for "matplotlib.cm has no attribute get_cmap" --- newsfragments/2688.bugfix | 1 + src/dials/algorithms/refinement/corrgram.py | 3 +-- src/dials/command_line/reference_profile_viewer.py | 4 ++-- src/dials/command_line/stereographic_projection.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 newsfragments/2688.bugfix diff --git a/newsfragments/2688.bugfix b/newsfragments/2688.bugfix new file mode 100644 index 0000000000..d8eaa47e9b --- /dev/null +++ b/newsfragments/2688.bugfix @@ -0,0 +1 @@ +Avoid deprecated ``matplotlib.cm.get_cmap`` calls diff --git a/src/dials/algorithms/refinement/corrgram.py b/src/dials/algorithms/refinement/corrgram.py index 56a4116f05..350476d34f 100644 --- a/src/dials/algorithms/refinement/corrgram.py +++ b/src/dials/algorithms/refinement/corrgram.py @@ -63,7 +63,6 @@ def corrgram(corrmat, labels): import matplotlib matplotlib.use("Agg") - import matplotlib.cm as cm import matplotlib.pyplot as plt except ImportError as e: logger.info("matplotlib modules not available " + str(e), exc_info=True) @@ -71,7 +70,7 @@ def corrgram(corrmat, labels): plt.figure(1) ax = plt.subplot(1, 1, 1, aspect="equal") - clrmap = cm.get_cmap("bwr") + clrmap = matplotlib.colormaps["bwr"] for x in range(nr): for y in range(nr): diff --git a/src/dials/command_line/reference_profile_viewer.py b/src/dials/command_line/reference_profile_viewer.py index 5833989c30..f058c1f0af 100644 --- a/src/dials/command_line/reference_profile_viewer.py +++ b/src/dials/command_line/reference_profile_viewer.py @@ -154,9 +154,9 @@ def draw_figure(self): # the direction through the Ewald sphere vals2D = profile["data"].sum(axis=0) cmap = copy.copy( - matplotlib.cm.get_cmap( + matplotlib.colormaps[ self.cmap_choice.GetString(self.cmap_choice.GetSelection()) - ) + ] ) # If any X, Y position is masked down the summed Z stack then mask diff --git a/src/dials/command_line/stereographic_projection.py b/src/dials/command_line/stereographic_projection.py index 8a3c72f208..0457739cad 100644 --- a/src/dials/command_line/stereographic_projection.py +++ b/src/dials/command_line/stereographic_projection.py @@ -357,7 +357,7 @@ def plot_projections( epochs = flex.double(epochs) epochs -= flex.min(epochs) epochs /= flex.max(epochs) - cmap = matplotlib.cm.get_cmap(colour_map) + cmap = matplotlib.colormaps[colour_map] colours = [cmap(e) for e in epochs] elif colours is None or len(colours) == 0: colours = ["b"] * len(projections_all) From 5d8c9bfb29be9dfc89d57e35c4e6bcf926aa38b2 Mon Sep 17 00:00:00 2001 From: David McDonagh <60879630+toastisme@users.noreply.github.com> Date: Wed, 7 Aug 2024 22:59:06 +0100 Subject: [PATCH 6/9] Modified branching to Laue refinement methods (#2715) * Modified branching to Laue refinement methods to depend on ExperimentType. --- newsfragments/2715.bugfix | 1 + .../algorithms/refinement/reflection_manager.py | 2 +- src/dials/array_family/flex_ext.py | 17 +++++++++++------ 3 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 newsfragments/2715.bugfix diff --git a/newsfragments/2715.bugfix b/newsfragments/2715.bugfix new file mode 100644 index 0000000000..deda002998 --- /dev/null +++ b/newsfragments/2715.bugfix @@ -0,0 +1 @@ +Modified branching to Laue refinement methods to check for ExperimentType first. diff --git a/src/dials/algorithms/refinement/reflection_manager.py b/src/dials/algorithms/refinement/reflection_manager.py index 2f3c01790d..a90ecd36e8 100644 --- a/src/dials/algorithms/refinement/reflection_manager.py +++ b/src/dials/algorithms/refinement/reflection_manager.py @@ -234,7 +234,7 @@ def from_parameters_reflections_experiments( flex.set_random_seed(params.random_seed) logger.debug("Random seed set to %d", params.random_seed) - if "wavelength" in reflections: + if experiments.all_laue() or experiments.all_tof(): return ReflectionManagerFactory.laue_manager( experiments, reflections, params ) diff --git a/src/dials/array_family/flex_ext.py b/src/dials/array_family/flex_ext.py index 72a02daecc..65d978922f 100644 --- a/src/dials/array_family/flex_ext.py +++ b/src/dials/array_family/flex_ext.py @@ -23,6 +23,7 @@ import cctbx.array_family.flex import cctbx.miller import libtbx.smart_open +from dxtbx.model import ExperimentType from scitbx import matrix import dials.extensions.glm_background_ext @@ -1331,12 +1332,16 @@ def map_centroids_to_reciprocal_space( cctbx.array_family.flex.vec2_double(x, y) ) - if calculated and "wavelength_cal" in self and "s0_cal" in self: - wavelength = self["wavelength_cal"].select(sel) - s0 = self["s0_cal"].select(sel) - elif "wavelength" in self and "s0" in self: - wavelength = self["wavelength"].select(sel) - s0 = self["s0"].select(sel) + if ( + expt.get_type() == ExperimentType.LAUE + or expt.get_type() == ExperimentType.TOF + ): + if calculated and "wavelength_cal" in self and "s0_cal" in self: + wavelength = self["wavelength_cal"].select(sel) + s0 = self["s0_cal"].select(sel) + elif "wavelength" in self and "s0" in self: + wavelength = self["wavelength"].select(sel) + s0 = self["s0"].select(sel) else: wavelength = expt.beam.get_wavelength() s0 = expt.beam.get_s0() From b9aae978b6d818a7dc17e7b89b66890c2e040a4e Mon Sep 17 00:00:00 2001 From: David Waterman Date: Thu, 8 Aug 2024 16:13:23 +0100 Subject: [PATCH 7/9] Fix `dials.reciprocal_lattice_viewer` middle mouse drag to translate (#2707) Adds pyopengl dependency as part of this --- .conda-envs/linux.txt | 1 + .conda-envs/macos.txt | 1 + .conda-envs/windows.txt | 1 + newsfragments/2707.bugfix | 1 + src/dials/util/wx_viewer.py | 21 ++++++--------------- 5 files changed, 10 insertions(+), 15 deletions(-) create mode 100644 newsfragments/2707.bugfix diff --git a/.conda-envs/linux.txt b/.conda-envs/linux.txt index c4b5eeabbc..23de941a8d 100644 --- a/.conda-envs/linux.txt +++ b/.conda-envs/linux.txt @@ -33,6 +33,7 @@ conda-forge::pint conda-forge::pip conda-forge::psutil conda-forge::pybind11 +conda-forge::pyopengl conda-forge::pyrtf conda-forge::pytest conda-forge::pytest-forked diff --git a/.conda-envs/macos.txt b/.conda-envs/macos.txt index 108cf8d281..20e9269339 100644 --- a/.conda-envs/macos.txt +++ b/.conda-envs/macos.txt @@ -33,6 +33,7 @@ conda-forge::pip conda-forge::psutil conda-forge::pthread-stubs conda-forge::pybind11 +conda-forge::pyopengl conda-forge::pyrtf conda-forge::pytest conda-forge::pytest-forked diff --git a/.conda-envs/windows.txt b/.conda-envs/windows.txt index 4b8d404841..2ec6d88e13 100644 --- a/.conda-envs/windows.txt +++ b/.conda-envs/windows.txt @@ -31,6 +31,7 @@ conda-forge::pint conda-forge::pip conda-forge::psutil conda-forge::pybind11 +conda-forge::pyopengl conda-forge::pyrtf conda-forge::pytest conda-forge::pytest-forked diff --git a/newsfragments/2707.bugfix b/newsfragments/2707.bugfix new file mode 100644 index 0000000000..03028e0ab6 --- /dev/null +++ b/newsfragments/2707.bugfix @@ -0,0 +1 @@ +``dials.reciprocal_lattice_viewer``: fix middle mouse drag to translate function. diff --git a/src/dials/util/wx_viewer.py b/src/dials/util/wx_viewer.py index fa282abdc3..b335f79fb6 100644 --- a/src/dials/util/wx_viewer.py +++ b/src/dials/util/wx_viewer.py @@ -600,28 +600,19 @@ def OnTranslate(self, event): model = gltbx.util.get_gl_modelview_matrix() proj = gltbx.util.get_gl_projection_matrix() view = gltbx.util.get_gl_viewport() - winx = [] - winy = [] - winz = [] rc = self.rotation_center rc_eye = gltbx.util.object_as_eye_coordinates(rc) - assert glu.gluProject(rc[0], rc[1], rc[2], model, proj, view, winx, winy, winz) - objx = [] - objy = [] - objz = [] + winx, winy, winz = glu.gluProject(rc[0], rc[1], rc[2], model, proj, view) win_height = max(1, self.w) - assert glu.gluUnProject( - winx[0], - winy[0] + 0.5 * win_height, - winz[0], + objx, objy, objz = glu.gluUnProject( + winx, + winy + 0.5 * win_height, + winz, model, proj, view, - objx, - objy, - objz, ) - dist = v3distsq((objx[0], objy[0], objz[0]), rc) ** 0.5 + dist = v3distsq((objx, objy, objz), rc) ** 0.5 scale = abs(dist / (0.5 * win_height)) x, y = event.GetX(), event.GetY() gltbx.util.translate_object(scale, x, y, self.xmouse, self.ymouse) From fd7985296fb2dc78852af21f46f3db5be5bd9046 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 9 Aug 2024 14:59:42 +0100 Subject: [PATCH 8/9] Tidy warning around joint_indexing (#2714) Tidy warning around joint_indexing Fixes #2713 --- newsfragments/2714.misc | 2 ++ src/dials/command_line/index.py | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 newsfragments/2714.misc diff --git a/newsfragments/2714.misc b/newsfragments/2714.misc new file mode 100644 index 0000000000..eb586aaa39 --- /dev/null +++ b/newsfragments/2714.misc @@ -0,0 +1,2 @@ +Tidy output of ``dials.index`` around the defaults for ``joint_index`` + diff --git a/src/dials/command_line/index.py b/src/dials/command_line/index.py index 02179f8a35..08d11cb7f5 100644 --- a/src/dials/command_line/index.py +++ b/src/dials/command_line/index.py @@ -171,13 +171,14 @@ def index(experiments, reflections, params): if params.indexing.joint_indexing is Auto: if all(e.is_still() for e in experiments): params.indexing.joint_indexing = False - logger.info("joint_indexing=False has been set for stills experiments") + logger.info("Disabling joint_indexing for still data") elif all(not e.is_still() for e in experiments): params.indexing.joint_indexing = True - logger.info("joint_indexing=True has been set for scans experiments") + if len(experiments) > 1: + logger.info("Enabling joint_indexing for rotation data") else: raise ValueError( - "Unable to set joint_indexing automatically for a mixture of stills and scans experiments" + "Unable to set joint_indexing automatically for a mixture of still and rotation data" ) if len(experiments) == 1 or params.indexing.joint_indexing: From a2f8f714a3a2d7a257119ce00baacb5bd51cf4ed Mon Sep 17 00:00:00 2001 From: Daniel Paley Date: Fri, 9 Aug 2024 14:41:26 -0400 Subject: [PATCH 9/9] Reflection table performance improvement (#2718) Eliminate a double loop in selections from reflection tables. Speedup of 12 min per call for a table containing 165k experiment identifiers. Co-authored-by: David Mittan-Moreau Co-authored-by: Aaron Brewster --- newsfragments/2718.bugfix | 3 +++ .../boost_python/reflection_table_suite.h | 11 ++++------- 2 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 newsfragments/2718.bugfix diff --git a/newsfragments/2718.bugfix b/newsfragments/2718.bugfix new file mode 100644 index 0000000000..06ae9674d6 --- /dev/null +++ b/newsfragments/2718.bugfix @@ -0,0 +1,3 @@ +Performance improvement for selections from large reflection tables. For a +table containing 165k experiment identifiers the speedup is 1000x (12 minutes +per call). diff --git a/src/dials/array_family/boost_python/reflection_table_suite.h b/src/dials/array_family/boost_python/reflection_table_suite.h index 5af6bcbb9d..c5b4a44930 100644 --- a/src/dials/array_family/boost_python/reflection_table_suite.h +++ b/src/dials/array_family/boost_python/reflection_table_suite.h @@ -32,12 +32,9 @@ namespace dials { namespace af { namespace boost_python { // Copy across identifiers for ids in new table typedef typename T::experiment_map_type::const_iterator const_iterator; for (std::set::iterator i = new_ids.begin(); i != new_ids.end(); ++i) { - for (const_iterator it = self.experiment_identifiers()->begin(); - it != self.experiment_identifiers()->end(); - ++it) { - if (it->first == *i) { - (*new_table.experiment_identifiers())[it->first] = it->second; - } + const_iterator found = self.experiment_identifiers()->find(*i); + if (found != self.experiment_identifiers()->end()) { + (*new_table.experiment_identifiers())[found->first] = found->second; } } } @@ -262,4 +259,4 @@ namespace dials { namespace af { namespace boost_python { }}}} // namespace dials::af::boost_python::reflection_table_suite -#endif // DIALS_ARRAY_FAMILY_BOOST_PYTHON_REFLECTION_TABLE_SUITE_H \ No newline at end of file +#endif // DIALS_ARRAY_FAMILY_BOOST_PYTHON_REFLECTION_TABLE_SUITE_H