diff --git a/.azure-pipelines/azure-pipelines.yml b/.azure-pipelines/azure-pipelines.yml index 7d1cd42a0e..c205a96375 100644 --- a/.azure-pipelines/azure-pipelines.yml +++ b/.azure-pipelines/azure-pipelines.yml @@ -2,7 +2,7 @@ variables: CACHE_VERSION: 20201102 isPullRequest: $[startsWith(variables['Build.SourceBranch'], 'refs/pull/')] PYTHON_MINIMUM_VERSION: "3.9" - PYTHON_TESTING_VERSION: "3.11" + PYTHON_TESTING_VERSION: "3.12" stages: - stage: prepare diff --git a/.conda-envs/linux.txt b/.conda-envs/linux.txt index 06c1e015aa..c4b5eeabbc 100644 --- a/.conda-envs/linux.txt +++ b/.conda-envs/linux.txt @@ -11,14 +11,15 @@ conda-forge::eigen conda-forge::future conda-forge::gemmi>=0.6.5 conda-forge::h5py>=3.1.0 -conda-forge::hdf5<1.13 +conda-forge::hdf5 conda-forge::hdf5plugin conda-forge::iota conda-forge::jinja2 -conda-forge::jpeg conda-forge::libboost-devel conda-forge::libboost-python-devel +conda-forge::libglu conda-forge::matplotlib-base>=3.0.2 +conda-forge::mesa-libgl-devel-cos7-x86_64 conda-forge::mrcfile conda-forge::msgpack-cxx conda-forge::msgpack-python diff --git a/.conda-envs/macos.txt b/.conda-envs/macos.txt index f095f192f7..108cf8d281 100644 --- a/.conda-envs/macos.txt +++ b/.conda-envs/macos.txt @@ -11,11 +11,10 @@ conda-forge::eigen conda-forge::future conda-forge::gemmi>=0.6.5 conda-forge::h5py>=3.1.0 -conda-forge::hdf5<1.13 +conda-forge::hdf5 conda-forge::hdf5plugin conda-forge::iota conda-forge::jinja2 -conda-forge::jpeg conda-forge::libboost-devel conda-forge::libboost-python-devel conda-forge::libcxx @@ -53,6 +52,6 @@ conda-forge::sqlite conda-forge::tabulate conda-forge::tqdm conda-forge::urllib3 -conda-forge::wxpython>=4.2.0 +conda-forge::wxpython>=4.2.0=*_5 conda-forge::xz conda-forge::zlib diff --git a/.conda-envs/windows.txt b/.conda-envs/windows.txt index dc297f27c6..4b8d404841 100644 --- a/.conda-envs/windows.txt +++ b/.conda-envs/windows.txt @@ -11,11 +11,10 @@ conda-forge::eigen conda-forge::future conda-forge::gemmi>=0.6.5 conda-forge::h5py>=3.1.0 -conda-forge::hdf5<1.13 +conda-forge::hdf5 conda-forge::hdf5plugin conda-forge::iota conda-forge::jinja2 -conda-forge::jpeg conda-forge::libboost-devel conda-forge::libboost-python-devel conda-forge::matplotlib-base>=3.0.2 diff --git a/AUTHORS b/AUTHORS index 9d94f7e9e5..7b4453a79d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -2,6 +2,7 @@ Contributors include: Aaron Brewster Ammaar Saeed +Amy Thompson Asmit Bhowmick Benjamin Williams Billy Poon diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 91b49f400d..19d271eed8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,44 @@ +DIALS 3.19.1 (2024-05-23) +========================= + +No significant changes. + + +DIALS 3.19.0 (2024-04-17) +========================= + +Features +-------- + +- ``dials.predict``: Allow usage when image data are not available. (`#2553 `_) +- Add ``TOFSpotFinder`` to tailor default params to time of flight experiments and add additional reflection table data. (`#2602 `_) +- ``dials.ssx_index``: Allow use of sequences indexer, pink_indexer and low_res_spot_match indexing algorithms. (`#2614 `_) +- ``dials.export``: Add option ``composition=`` to specify of asymmetric unit composition for SHELX ``.ins`` file output. (`#2623 `_) + + +Bugfixes +-------- + +- Fix building on RHEL8 and other more recent distributions. (`#1465 `_) +- ``dials.index``: Joint indexing is automatically set on for rotation data, off for still data. This can be overridden by explicit use of ``joint_indexing=``. (`#2605 `_) +- ``dials.export`` ``format=shelx``: Increased precision of unit cell parameters and their estimated standard deviations written to ``.ins`` file. (`#2624 `_) +- ``dials.ssx_index``: Don't combine detector models if individually refined. (`#2634 `_) +- ``dials.scale``: Fix error in propagation of partiality variance, when scaling still data. (`#2642 `_) +- ``dials.export``: Fix crash when exporting scaled still-shot data. (`#2646 `_) + + +Deprecations and Removals +------------------------- + +- API: ``array_family/flex_ext.py``: remove ``nthread``s parameter from ``extract_shoeboxes``, as it was never implemented. (`#2638 `_) + + +Misc +---- + +- `#2617 `_, `#2618 `_, `#2619 `_, `#2626 `_, `#2633 `_, `#2648 `_ + + DIALS 3.18.1 (2024-03-26) ========================= diff --git a/installer/bootstrap.py b/installer/bootstrap.py index 33ed202ccf..ac1aa0a992 100755 --- a/installer/bootstrap.py +++ b/installer/bootstrap.py @@ -50,7 +50,7 @@ allowed_ssh_connections = {} concurrent_git_connection_limit = threading.Semaphore(5) -_prebuilt_cctbx_base = "2023" # Latest Nightly release +_prebuilt_cctbx_base = "2024" def make_executable(filepath): @@ -1432,8 +1432,8 @@ def run(): parser.add_argument( "--python", help="Install this minor version of Python (default: %(default)s)", - default="3.10", - choices=("3.9", "3.10", "3.11"), + default="3.11", + choices=("3.9", "3.10", "3.11", "3.12"), ) parser.add_argument( "--branch", diff --git a/newsfragments/2553.feature b/newsfragments/2553.feature deleted file mode 100644 index 2505e12c61..0000000000 --- a/newsfragments/2553.feature +++ /dev/null @@ -1 +0,0 @@ -``dials.predict``: allow this command to work in the case that image data are not available. diff --git a/newsfragments/2602.feature b/newsfragments/2602.feature deleted file mode 100644 index d9d07deaa2..0000000000 --- a/newsfragments/2602.feature +++ /dev/null @@ -1 +0,0 @@ -Add `TOFSpotFinder` to tailor default params to time of flight experiments and add additional reflection table data. diff --git a/newsfragments/2605.bugfix b/newsfragments/2605.bugfix deleted file mode 100644 index 9df920cbec..0000000000 --- a/newsfragments/2605.bugfix +++ /dev/null @@ -1,3 +0,0 @@ -``dials.index``: Set joint indexing to be automatically defined by default: -``True`` for rotation data, ``False`` for still shots. This default can be -overridden by the user by explicit use of ``joint_indexing=``. diff --git a/newsfragments/2613.misc b/newsfragments/2613.misc new file mode 100644 index 0000000000..45974be105 --- /dev/null +++ b/newsfragments/2613.misc @@ -0,0 +1 @@ +Update cctbx-base dependency to 2024 versions. diff --git a/newsfragments/2614.feature b/newsfragments/2614.feature deleted file mode 100644 index 2cc2db1586..0000000000 --- a/newsfragments/2614.feature +++ /dev/null @@ -1 +0,0 @@ -``dials.ssx_index``: Allow use of sequences indexer, and pink_indexer and low_res_spot_match indexing algorithms. diff --git a/newsfragments/2616.misc b/newsfragments/2616.misc new file mode 100644 index 0000000000..8cc6b8c058 --- /dev/null +++ b/newsfragments/2616.misc @@ -0,0 +1 @@ +``dials.import``: add a test for distance overrides for a multi-panel detector. diff --git a/newsfragments/2617.misc b/newsfragments/2617.misc deleted file mode 100644 index b7adf88987..0000000000 --- a/newsfragments/2617.misc +++ /dev/null @@ -1 +0,0 @@ -Fix Deprecation warning and doc build failure for pinkindexer file diff --git a/newsfragments/2618.misc b/newsfragments/2618.misc deleted file mode 100644 index 622dc0c48b..0000000000 --- a/newsfragments/2618.misc +++ /dev/null @@ -1 +0,0 @@ -Unpin gemmi version diff --git a/newsfragments/2619.misc b/newsfragments/2619.misc deleted file mode 100644 index 4d252b79e1..0000000000 --- a/newsfragments/2619.misc +++ /dev/null @@ -1 +0,0 @@ -Do more accurate and reliable discovery of available CPUs and virtual memory across a range of different systems. This should make various DIALS tools, in particular ``dials.integrate``, function better in situations where system resources are fairly limited or DIALS requires a lot of memory. \ No newline at end of file diff --git a/newsfragments/2626.misc b/newsfragments/2626.misc deleted file mode 100644 index e2a302055d..0000000000 --- a/newsfragments/2626.misc +++ /dev/null @@ -1 +0,0 @@ -Add filter in `TOFSpotFinder._post_process` to remove any reflections outside of the time of flight range diff --git a/newsfragments/2632.feature b/newsfragments/2632.feature new file mode 100644 index 0000000000..1d4515a7b8 --- /dev/null +++ b/newsfragments/2632.feature @@ -0,0 +1 @@ +``dials.correlation_matrix``: A new command-line tool for correlation and cosine similarity clustering of multi-crystal datasets, independent of ``xia2.multiplex``. It provides HTML output, including clustering heatmaps, dendrograms and corresponding ``dials.cosym`` graphs. diff --git a/newsfragments/2633.misc b/newsfragments/2633.misc deleted file mode 100644 index 91386d5ab1..0000000000 --- a/newsfragments/2633.misc +++ /dev/null @@ -1 +0,0 @@ -Constrain wxPython version to minimum 4.2.0. This ensures releases are not downgraded. diff --git a/newsfragments/2634.bugfix b/newsfragments/2634.bugfix deleted file mode 100644 index 161632c309..0000000000 --- a/newsfragments/2634.bugfix +++ /dev/null @@ -1 +0,0 @@ -``dials.ssx_index``: Don't combine detector models if individually refined diff --git a/newsfragments/2641.misc b/newsfragments/2641.misc new file mode 100644 index 0000000000..e7fce4ab53 --- /dev/null +++ b/newsfragments/2641.misc @@ -0,0 +1 @@ +Remove find_spots_server_client tests. diff --git a/newsfragments/2651.feature b/newsfragments/2651.feature new file mode 100644 index 0000000000..312d214171 --- /dev/null +++ b/newsfragments/2651.feature @@ -0,0 +1 @@ +DIALS is now compatible with Python 3.12. diff --git a/newsfragments/2654.feature b/newsfragments/2654.feature new file mode 100644 index 0000000000..a46d4d9535 --- /dev/null +++ b/newsfragments/2654.feature @@ -0,0 +1 @@ +``dials.scale``: Add filtering options to default basic error model to allow error modelling of stills data diff --git a/newsfragments/2658.bugfix b/newsfragments/2658.bugfix new file mode 100644 index 0000000000..6e9e6f86a0 --- /dev/null +++ b/newsfragments/2658.bugfix @@ -0,0 +1 @@ +``dials.find_rotation_axis``: Correctly set the orientation of the rotation axis for a multi-axis goniometer. diff --git a/newsfragments/2660.bugfix b/newsfragments/2660.bugfix new file mode 100644 index 0000000000..5e29f576d1 --- /dev/null +++ b/newsfragments/2660.bugfix @@ -0,0 +1 @@ +Fix `dials.show` beam checks for time of flight experiments. diff --git a/newsfragments/2664.bugfix b/newsfragments/2664.bugfix new file mode 100644 index 0000000000..99af372143 --- /dev/null +++ b/newsfragments/2664.bugfix @@ -0,0 +1 @@ +d_min, d_max in masking: raise error if d_min > d_max so no spots will be found diff --git a/newsfragments/2666.feature b/newsfragments/2666.feature new file mode 100644 index 0000000000..01b33456da --- /dev/null +++ b/newsfragments/2666.feature @@ -0,0 +1 @@ +``dials.cosym``: Add alternative weighting during cosym CC calculation diff --git a/newsfragments/2667.misc b/newsfragments/2667.misc new file mode 100644 index 0000000000..6ea1b19665 --- /dev/null +++ b/newsfragments/2667.misc @@ -0,0 +1 @@ +Reset ids in image grouping code to enable use in scripts diff --git a/newsfragments/2668.bugfix b/newsfragments/2668.bugfix new file mode 100644 index 0000000000..00980f672f --- /dev/null +++ b/newsfragments/2668.bugfix @@ -0,0 +1 @@ +``dials.cosym``: Make function return structure correct in recently added cc_weights option diff --git a/newsfragments/2670.bugfix b/newsfragments/2670.bugfix new file mode 100644 index 0000000000..2f76fa6ed4 --- /dev/null +++ b/newsfragments/2670.bugfix @@ -0,0 +1 @@ +``dials.find_rotation_axis``: Add reflection selection criteria to avoid runs that use a very large amount of memory. diff --git a/newsfragments/2671.feature b/newsfragments/2671.feature new file mode 100644 index 0000000000..8daf56a644 --- /dev/null +++ b/newsfragments/2671.feature @@ -0,0 +1 @@ +``dials.correlation_matrix``: Add tables with cluster information to html output diff --git a/newsfragments/2672.feature b/newsfragments/2672.feature new file mode 100644 index 0000000000..e410954029 --- /dev/null +++ b/newsfragments/2672.feature @@ -0,0 +1 @@ +XXX.feature: Add dials.split_still_data for splitting dials-processed still data based on image number (e.g. dose series) diff --git a/newsfragments/2673.bugfix b/newsfragments/2673.bugfix new file mode 100644 index 0000000000..22cb041c81 --- /dev/null +++ b/newsfragments/2673.bugfix @@ -0,0 +1 @@ +``dials.cosym``: For cc_weights=sigma, ensure correct filtering based on min_pairs parameters diff --git a/newsfragments/2674.bugfix b/newsfragments/2674.bugfix new file mode 100644 index 0000000000..28100bbc86 --- /dev/null +++ b/newsfragments/2674.bugfix @@ -0,0 +1 @@ +``dials.cosym``: Fix to give more accurate cc calculation when running with a space_group set diff --git a/newsfragments/2679.bugfix b/newsfragments/2679.bugfix new file mode 100644 index 0000000000..2d9a35c7b1 --- /dev/null +++ b/newsfragments/2679.bugfix @@ -0,0 +1 @@ +Fix flex.reflection_table.concat to not modify in-place. diff --git a/setup.py b/setup.py index cb759536f0..db32d461de 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from build import build -__version_tag__ = "3.18.dev" +__version_tag__ = "3.20.dev" setup_kwargs = { "name": "dials", diff --git a/src/dials/algorithms/correlation/__init__.py b/src/dials/algorithms/correlation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/dials/algorithms/correlation/analysis.py b/src/dials/algorithms/correlation/analysis.py new file mode 100644 index 0000000000..cf41589e0e --- /dev/null +++ b/src/dials/algorithms/correlation/analysis.py @@ -0,0 +1,402 @@ +from __future__ import annotations + +import copy +import json +import logging +import sys +from collections import OrderedDict + +import numpy as np +import scipy.spatial.distance as ssd +from scipy.cluster import hierarchy + +import iotbx.phil +from dxtbx.model import ExperimentList +from libtbx.phil import scope_extract +from scitbx.array_family import flex + +from dials.algorithms.correlation.cluster import ClusterInfo +from dials.algorithms.correlation.plots import linkage_matrix_to_dict, to_plotly_json +from dials.algorithms.symmetry.cosym import CosymAnalysis +from dials.algorithms.symmetry.cosym.plots import plot_coords, plot_rij_histogram +from dials.array_family.flex import reflection_table +from dials.util import tabulate +from dials.util.exclude_images import get_selection_for_valid_image_ranges +from dials.util.filter_reflections import filtered_arrays_from_experiments_reflections +from dials.util.multi_dataset_handling import select_datasets_on_identifiers + +logger = logging.getLogger("dials.algorithms.correlation.analysis") + +phil_scope = iotbx.phil.parse( + """\ +partiality_threshold = 0.4 + .type = float(value_min=0, value_max=1) + .help = "Use reflections with a partiality greater than the threshold." + +include scope dials.algorithms.symmetry.cosym.phil_scope + +relative_length_tolerance = 0.05 + .type = float(value_min=0) + .help = "Datasets are only accepted if unit cell lengths fall within this relative tolerance of the median cell lengths." + +absolute_angle_tolerance = 2 + .type = float(value_min=0) + .help = "Datasets are only accepted if unit cell angles fall within this absolute tolerance of the median cell angles." + +min_reflections = 10 + .type = int(value_min=1) + .help = "The minimum number of reflections per experiment." +""", + process_includes=True, +) + + +class CorrelationMatrix: + def __init__( + self, + experiments: ExperimentList, + reflections: list[reflection_table], + params: scope_extract = None, + ids_to_identifiers_map: dict = None, + ): + """ + Set up the required cosym preparations for determining the correlation matricies + of a series of input experiments and reflections. + + Args: + experiments (dxtbx_model_ext.ExperimentList): list of experiments in dials format + reflections (list): list of dials_array_family_flex_ext.reflection_table objects associated with the experiments + params (libtbx.phil.scope_extract): + """ + + # Set up experiments, reflections and params + + if params is None: + params = phil_scope.extract() + self.params = params + self._reflections = [] + self.ids_to_identifiers_map = ids_to_identifiers_map + + if len(reflections) == len(experiments): + for refl, expt in zip(reflections, experiments): + sel = get_selection_for_valid_image_ranges(refl, expt) + self._reflections.append(refl.select(sel)) + else: + sys.exit( + "Number of reflection tables does not match number of experiments." + ) + + # Initial filtering to remove experiments and reflections that do not meet the minimum number of reflections required (params.min_reflections) + + self._experiments, self._reflections = self._filter_min_reflections( + experiments, self._reflections + ) + + # Used for optional json creation that is in a format friendly for import and analysis (for future development) + # Also to retain dataset ids when used in multiplex + if self.ids_to_identifiers_map is None: + self.ids_to_identifiers_map = {} + for table in self._reflections: + self.ids_to_identifiers_map.update(table.experiment_identifiers()) + + self.labels = list(dict.fromkeys(self.ids_to_identifiers_map)) + self._labels_all = flex.size_t(self.labels) + + # Filter reflections that do not meet partiality threshold or default I/Sig(I) criteria + + datasets = filtered_arrays_from_experiments_reflections( + self._experiments, + self._reflections, + outlier_rejection_after_filter=False, + partiality_threshold=params.partiality_threshold, + ) + + self.unmerged_datasets = datasets + + # Merge intensities to prepare for cosym analysis + + self.datasets = self._merge_intensities(datasets) + + # Set required params for cosym to skip symmetry determination and reduce dimensions + + self.params.lattice_group = self.datasets[0].space_group_info() + self.params.space_group = self.datasets[0].space_group_info() + + self.cosym_analysis = CosymAnalysis(self.datasets, self.params) + + def _merge_intensities(self, datasets: list) -> list: + """ + Merge intensities and elimate systematically absent reflections. + + Args: + datasets(list): list of cctbx.miller.array objects + Returns: + datasets_sys_absent_eliminated(list): list of merged cctbx.miller.array objects + """ + individual_merged_intensities = [] + for unmerged in datasets: + individual_merged_intensities.append( + unmerged.merge_equivalents().array().set_info(unmerged.info()) + ) + datasets_sys_absent_eliminated = [ + d.eliminate_sys_absent(integral_only=True).primitive_setting() + for d in individual_merged_intensities + ] + + return datasets_sys_absent_eliminated + + def _filter_min_reflections( + self, experiments: ExperimentList, reflections: list[reflection_table] + ) -> tuple[ExperimentList, list[reflection_table]]: + """ + Filter all datasets that have less than the specified number of reflections. + + Args: + experiments (dxtbx_model_ext.ExperimentList): list of experiments in dials format + reflections (list): list of dials_array_family_flex_ext.reflection_table objects associated with the experiments + + Returns: + filtered_datasets (tuple): tuple of filtered datasets + + """ + identifiers = [] + + for expt, refl in zip(experiments, reflections): + if len(refl) >= self.params.min_reflections: + identifiers.append(expt.identifier) + + filtered_datasets = select_datasets_on_identifiers( + experiments, reflections, use_datasets=identifiers + ) + + return filtered_datasets + + def calculate_matrices(self): + """ + Runs the required algorithms within dials.cosym to calculate the rij matrix and optimise the coordinates. + These results are passed into matrix computation functions to calculate the cosine similarity (cos-angle) and correlation matrices + as well as the corresponding clustering. + """ + + # Cosym algorithm to calculate the rij matrix (CC matrix when symmetry known) + self.cosym_analysis._intialise_target() + + # Cosym proceedures to calculate the cos-angle matrix + self.cosym_analysis._determine_dimensions() + self.cosym_analysis._optimise( + self.cosym_analysis.params.minimization.engine, + max_iterations=self.cosym_analysis.params.minimization.max_iterations, + max_calls=self.cosym_analysis.params.minimization.max_calls, + ) + + # Convert cosym output into cc/cos matrices in correct form and compute linkage matrices as clustering method + ( + self.correlation_matrix, + self.cc_linkage_matrix, + ) = self.compute_correlation_coefficient_matrix( + self.cosym_analysis.target.rij_matrix + ) + + self.correlation_clusters = self.cluster_info( + linkage_matrix_to_dict(self.cc_linkage_matrix) + ) + + logger.info("\nIntensity correlation clustering summary:") + self.cc_table = ClusterInfo.as_table(self.correlation_clusters) + logger.info(tabulate(self.cc_table, headers="firstrow", tablefmt="rst")) + self.cos_angle, self.cos_linkage_matrix = self.compute_cos_angle_matrix( + self.cosym_analysis.coords + ) + + self.cos_angle_clusters = self.cluster_info( + linkage_matrix_to_dict(self.cos_linkage_matrix) + ) + + logger.info("\nCos(angle) clustering summary:") + self.cos_table = ClusterInfo.as_table(self.cos_angle_clusters) + logger.info(tabulate(self.cos_table, headers="firstrow", tablefmt="rst")) + + @staticmethod + def compute_correlation_coefficient_matrix( + correlation_matrix: np.ndarray, + ) -> tuple(np.ndarray, np.ndarray): + """ + Computes the correlation matrix and clustering linkage matrix from the rij cosym matrix. + + Args: + correlation_matrix(numpy.ndarray): pair-wise matrix of correlation coefficients + + Returns: + correlation_matrix(numpy.ndarray): correlation matrix with corrections to diagonals and accounting for floating point errors + cc_linkage_matrix(numpy.ndarray): linkage matrix describing clustering of correlation matrix in dendrogram-style + + """ + + logger.info("\nCalculating Correlation Matrix (rij matrix - see dials.cosym)") + + # Make diagonals equal to 1 (each dataset correlated with itself) + np.fill_diagonal(correlation_matrix, 1) + + # clip values of correlation matrix to account for floating point errors + correlation_matrix.clip(-1, 1, out=correlation_matrix) + + # Convert to distance matrix rather than correlation + diffraction_dissimilarity = 1 - correlation_matrix + try: + assert ssd.is_valid_dm(diffraction_dissimilarity, tol=1e-12) + except AssertionError: + sys.exit( + "Correlation matrix does not give a valid distance matrix. Distance matrix is either non-symmetric or does not have a zero-diagonal." + ) + + # convert the redundant n*n square matrix form into a condensed nC2 array + cc_dist_mat = ssd.squareform(diffraction_dissimilarity, checks=False) + + # Clustering method + cc_linkage_matrix = hierarchy.linkage(cc_dist_mat, method="average") + + return correlation_matrix, cc_linkage_matrix + + @staticmethod + def compute_cos_angle_matrix( + coords: np.ndarray, + ) -> tuple(np.ndarray, np.ndarray): + """ + Computes the cos_angle matrix and clustering linkage matrix from the optimized cosym coordinates. + Args: + coords(numpy.ndarray): matrix of coordinates output from cosym optimisation + + Returns: + cos_angle(numpy.ndarray): pair-wise cos angle matrix + cos_linkage_matrix(numpy.ndarray): linkage matrix describing clustering of cos angle matrix in dendrogram-style + + """ + logger.info( + "\nCalculating Cos Angle Matrix from optimised cosym coordinates (see dials.cosym)" + ) + + # Convert coordinates to cosine distances and then reversed so closer cosine distances have higher values to match CC matrix + cos_dist_mat = ssd.pdist(coords, metric="cosine") + cos_angle = 1 - ssd.squareform(cos_dist_mat) + + # Clustering method + cos_linkage_matrix = hierarchy.linkage(cos_dist_mat, method="average") + + return cos_angle, cos_linkage_matrix + + def cluster_info(self, cluster_dict: dict) -> list: + """ + Generate list of cluster objects with associated statistics. + Args: + cluster_dict(dict): dictionary of clusters (generated from linkage_matrix_to_dict) + + Returns: + info(list): list of ClusterInfo objects to describe all clusters of a certain type (ie correlation or cos angle) + """ + + info = [] + for cluster_id, cluster in cluster_dict.items(): + uc_params = [flex.double() for i in range(6)] + for j in cluster["datasets"]: + uc_j = self.datasets[j - 1].unit_cell().parameters() + for i in range(6): + uc_params[i].append(uc_j[i]) + average_uc = [flex.mean(uc_params[i]) for i in range(6)] + intensities_cluster = [] + labels_cluster = [] + ids = [self._labels_all[id - 1] for id in cluster["datasets"]] + for idx, k in zip(self._labels_all, self.unmerged_datasets): + if idx in ids: + intensities_cluster.append(k) + labels_cluster.append(idx) + merged = None + for d in intensities_cluster: + if merged is None: + merged = copy.deepcopy(d) + else: + merged = merged.concatenate(d, assert_is_similar_symmetry=False) + merging = merged.merge_equivalents() + merged_intensities = merging.array() + multiplicities = merging.redundancies() + info.append( + ClusterInfo( + cluster_id, + labels_cluster, + flex.mean(multiplicities.data().as_double()), + merged_intensities.completeness(), + unit_cell=average_uc, + height=cluster.get("height"), + ) + ) + return info + + def convert_to_html_json(self): + """ + Prepares the required dataset tables and converts analysis into the required format for HTML output. + """ + + # Convert the cosine and cc matrices into a plotly json format for output graphs + + self.cc_json = to_plotly_json( + self.correlation_matrix, + self.cc_linkage_matrix, + labels=self.labels, + matrix_type="correlation", + ) + self.cos_json = to_plotly_json( + self.cos_angle, + self.cos_linkage_matrix, + labels=self.labels, + matrix_type="cos_angle", + ) + + self.rij_graphs = OrderedDict() + + self.rij_graphs.update( + plot_rij_histogram(self.correlation_matrix, key="cosym_rij_histogram_sg") + ) + + self.rij_graphs.update( + plot_coords(self.cosym_analysis.coords, key="cosym_coordinates_sg") + ) + + # Generate the table for the html that lists all datasets and image paths present in the analysis + + paths = enumerate(e.imageset.paths()[0] for e in self._experiments) + self.table_list = [["Experiment/Image Number", "Image Path"], *map(list, paths)] + + def convert_to_importable_json(self, linkage_matrix: np.ndarray) -> OrderedDict: + """ + Generate a json file of the linkage matrices with unique identifiers rather than dataset numbers + May be useful for future developments to import this for further clustering analysis. + Args: + linkage_matrix(numpy.ndarray): linkage matrix from hierarchy.linkage methods + Returns: + linkage_mat_as_dict(collections.OrderedDict): linkage matrix converted to dictionary with datasets replaced with dials unique identifiers + """ + linkage_mat_as_dict = linkage_matrix_to_dict(linkage_matrix) + for d in linkage_mat_as_dict.values(): + # Difference in indexing between linkage_mat_as_dict and datasets, so have i-1 + d["datasets"] = [self.ids_to_identifiers_map[i - 1] for i in d["datasets"]] + + return linkage_mat_as_dict + + def output_json(self): + """ + Outputs the cos and cc json files containing correlation/cos matrices and linkage details. + """ + + linkage_mat_as_dict_cc = self.convert_to_importable_json(self.cc_linkage_matrix) + linkage_mat_as_dict_cos = self.convert_to_importable_json( + self.cos_linkage_matrix + ) + + combined_json_dict = { + "correlation_matrix_clustering": linkage_mat_as_dict_cc, + "correlation_matrix": self.correlation_matrix.tolist(), + "cos_matrix_clustering": linkage_mat_as_dict_cos, + "cos_angle_matrix": self.cos_angle.tolist(), + } + + with open(self.params.output.json, "w") as f: + json.dump(combined_json_dict, f) diff --git a/src/dials/algorithms/correlation/cluster.py b/src/dials/algorithms/correlation/cluster.py new file mode 100644 index 0000000000..ca271481ba --- /dev/null +++ b/src/dials/algorithms/correlation/cluster.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from libtbx.str_utils import wordwrap + + +class ClusterInfo: + def __init__( + self, cluster_id, labels, multiplicity, completeness, unit_cell, height=None + ): + self.cluster_id = cluster_id + self.labels = labels + self.multiplicity = multiplicity + self.completeness = completeness + self.unit_cell = unit_cell + self.height = height + + def __str__(self): + lines = [ + "Cluster %i" % self.cluster_id, + " Number of datasets: %i" % len(self.labels), + " Completeness: %.1f %%" % (self.completeness * 100), + " Multiplicity: %.2f" % self.multiplicity, + " Datasets:" + ",".join("%s" % s for s in self.labels), + ] + if self.height is not None: + lines.append(" height: %f" % self.height) + return "\n".join(lines) + + @staticmethod + def as_table(cluster_info): + headers = [ + "Cluster", + "No. datasets", + "Datasets", + "Height", + "Multiplicity", + "Completeness", + ] + rows = [] + for info in cluster_info: + rows.append( + [ + "%i" % info.cluster_id, + "%i" % len(info.labels), + wordwrap(" ".join("%s" % l for l in info.labels)), + "%.2g" % info.height, + "%.1f" % info.multiplicity, + "%.2f" % info.completeness, + ] + ) + + rows.insert(0, headers) + return rows diff --git a/src/dials/algorithms/correlation/plots.py b/src/dials/algorithms/correlation/plots.py new file mode 100644 index 0000000000..ea3a9330e4 --- /dev/null +++ b/src/dials/algorithms/correlation/plots.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +import copy +import sys +from collections import OrderedDict + +import numpy as np +from scipy.cluster import hierarchy + +from dials.algorithms.clustering.plots import scipy_dendrogram_to_plotly_json + + +def linkage_matrix_to_dict(linkage_matrix: np.ndarray) -> OrderedDict: + """ + Convert a linkage matrix to a dictionary. + Args: + linkage_matrix(numpy.ndarray): linkage matrix from hierarchy.linkage methods + Returns: + link_dict(collections.OrderedDict): linkage matrix converted to dictionary format + """ + tree = hierarchy.to_tree(linkage_matrix, rd=False) + + d = {} + + # https://gist.github.com/mdml/7537455 + + def add_node(node): + if node.is_leaf(): + return + cluster_id = node.get_id() - len(linkage_matrix) - 1 + row = linkage_matrix[cluster_id] + d[cluster_id + 1] = { + "datasets": [i + 1 for i in sorted(node.pre_order())], + "height": row[2], + } + + # Recursively add the current node's children + if node.left: + add_node(node.left) + if node.right: + add_node(node.right) + + add_node(tree) + + link_dict = OrderedDict(sorted(d.items())) + + return link_dict + + +def to_plotly_json( + correlation_matrix: np.ndarray, + linkage_matrix: np.ndarray, + labels: list = None, + matrix_type: str = "correlation", +) -> dict: + """ + Prepares a plotly-style plot of the heatmap corresponding to the input matrix + with dendrograms on the top and left sides. + Args: + correlation_matrix(numpy.ndarray): pair-wise correlation matrix + linkage_matrix(numpy.ndarray): linkage matrix describing dendrogram-style clustering of correlation matrix + labels(list): desired labels for datasets + matrix_type(str): must be "correlation" or "cos_angle" + Returns: + d(dict): heatmap and dendrogram plot expressed as a dictionary for future graphical display + """ + + if matrix_type not in ("correlation", "cos_angle"): + sys.exit( + f"Matrix type input as {matrix_type}, but needs to be 'correlation' or 'cos_angle'" + ) + + ddict = hierarchy.dendrogram( + linkage_matrix, + color_threshold=0.05, + labels=labels, + show_leaf_counts=False, + no_plot=True, + ) + + # Converts the dendrogram to plotly json format - y2_dict is the one above the heatmap + y2_dict = scipy_dendrogram_to_plotly_json( + ddict, "Dendrogram", xtitle="Individual datasets", ytitle="Ward distance" + ) + + # x2_dict is the same dendrogram but positioned left of the heatmap + x2_dict = copy.deepcopy(y2_dict) + for d in y2_dict["data"]: + d["yaxis"] = "y2" + d["xaxis"] = "x2" + + # Switches x and y data so that the dendrogram will be rotated 90deg + # This orientatates it to match with the heatmap + for d in x2_dict["data"]: + x = d["x"] + y = d["y"] + d["x"] = y + d["y"] = x + d["yaxis"] = "y3" + d["xaxis"] = "x3" + + # Reorders the matrix into the same order as the dendrogram for the plots + D = correlation_matrix + index = ddict["leaves"] + D = D[index, :] + D = D[:, index] + ccdict = { + "data": [ + { + "name": "%s_matrix" % matrix_type, + "x": list(range(D.shape[0])), + "y": list(range(D.shape[1])), + "z": D.tolist(), + "type": "heatmap", + "colorbar": { + "title": ( + "Correlation coefficient" + if matrix_type == "correlation" + else "cos(angle)" + ), + "titleside": "right", + "xpad": 0, + }, + "colorscale": "YIOrRd", + "xaxis": "x", + "yaxis": "y", + } + ], + "layout": { + "autosize": False, + "bargap": 0, + "height": 1000, + "hovermode": "closest", + "margin": {"r": 20, "t": 50, "autoexpand": True, "l": 20}, + "showlegend": False, + "title": "Dendrogram Heatmap", + "width": 1000, + "xaxis": { + "domain": [0.2, 0.9], + "mirror": "allticks", + "showgrid": False, + "showline": False, + "showticklabels": True, + "tickmode": "array", + "ticks": "", + "ticktext": y2_dict["layout"]["xaxis"]["ticktext"], + "tickvals": list(range(len(y2_dict["layout"]["xaxis"]["ticktext"]))), + "tickangle": 300, + "title": "", + "type": "linear", + "zeroline": False, + }, + "yaxis": { + "domain": [0, 0.78], + "anchor": "x", + "mirror": "allticks", + "showgrid": False, + "showline": False, + "showticklabels": True, + "tickmode": "array", + "ticks": "", + "ticktext": y2_dict["layout"]["xaxis"]["ticktext"], + "tickvals": list(range(len(y2_dict["layout"]["xaxis"]["ticktext"]))), + "title": "", + "type": "linear", + "zeroline": False, + }, + "xaxis2": { + "domain": [0.2, 0.9], + "anchor": "y2", + "showgrid": False, + "showline": False, + "showticklabels": False, + "zeroline": False, + }, + "yaxis2": { + "domain": [0.8, 1], + "anchor": "x2", + "showgrid": False, + "showline": False, + "zeroline": False, + }, + "xaxis3": { + "domain": [0.0, 0.1], + "anchor": "y3", + "range": [max(max(d["x"]) for d in x2_dict["data"]), 0], + "showgrid": False, + "showline": False, + "tickangle": 300, + "zeroline": False, + }, + "yaxis3": { + "domain": [0, 0.78], + "anchor": "x3", + "showgrid": False, + "showline": False, + "showticklabels": False, + "zeroline": False, + }, + }, + } + d = ccdict + d["data"].extend(y2_dict["data"]) + d["data"].extend(x2_dict["data"]) + + d["clusters"] = linkage_matrix_to_dict(linkage_matrix) + + return d diff --git a/src/dials/algorithms/scaling/algorithm.py b/src/dials/algorithms/scaling/algorithm.py index 812bfac0b7..bfe09e3a0e 100644 --- a/src/dials/algorithms/scaling/algorithm.py +++ b/src/dials/algorithms/scaling/algorithm.py @@ -641,6 +641,9 @@ def targeted_scaling_algorithm(scaler): scaler.make_ready_for_scaling() scaler.perform_scaling() + expand_and_do_outlier_rejection(scaler, calc_cov=True) + do_error_analysis(scaler, reselect=True) + if scaler.params.scaling_options.full_matrix and ( scaler.params.scaling_refinery.engine == "SimpleLBFGS" ): @@ -648,9 +651,11 @@ def targeted_scaling_algorithm(scaler): engine=scaler.params.scaling_refinery.full_matrix_engine, max_iterations=scaler.params.scaling_refinery.full_matrix_max_iterations, ) + else: + scaler.perform_scaling() expand_and_do_outlier_rejection(scaler, calc_cov=True) - # do_error_analysis(scaler, reselect=False) + do_error_analysis(scaler, reselect=False) scaler.prepare_reflection_tables_for_output() return scaler diff --git a/src/dials/algorithms/scaling/error_model/engine.py b/src/dials/algorithms/scaling/error_model/engine.py index 4b67e7425d..9cf816c1fc 100644 --- a/src/dials/algorithms/scaling/error_model/engine.py +++ b/src/dials/algorithms/scaling/error_model/engine.py @@ -21,7 +21,9 @@ logger = logging.getLogger("dials") -def run_error_model_refinement(model, Ih_table): +def run_error_model_refinement( + model, Ih_table, min_partiality=0.4, use_stills_filtering=False +): """ Refine an error model for the input data, returning the model. @@ -30,7 +32,9 @@ def run_error_model_refinement(model, Ih_table): RuntimeError: can be raised in LBFGS minimiser. """ assert Ih_table.n_work_blocks == 1 - model.configure_for_refinement(Ih_table.blocked_data_list[0]) + model.configure_for_refinement( + Ih_table.blocked_data_list[0], min_partiality, use_stills_filtering + ) if not model.active_parameters: logger.info("All error model parameters fixed, skipping refinement") else: @@ -170,7 +174,6 @@ def test_value_convergence(self): r2 = self.avals[-2] except IndexError: return False - if r2 > 0: return abs((r2 - r1) / r2) < self._avals_tolerance else: @@ -201,7 +204,7 @@ def _refine_component(self, model, target, parameterisation): def run(self): """Refine the model.""" if self.parameters_to_refine == ["a", "b"]: - for n in range(20): # usually converges in around 5 cycles + for n in range(50): # usually converges in around 5 cycles self._refine_a() # now update in model self.avals.append(self.model.components["a"].parameters[0]) diff --git a/src/dials/algorithms/scaling/error_model/error_model.py b/src/dials/algorithms/scaling/error_model/error_model.py index ec490ec228..b42a4edb3b 100644 --- a/src/dials/algorithms/scaling/error_model/error_model.py +++ b/src/dials/algorithms/scaling/error_model/error_model.py @@ -42,6 +42,15 @@ "determine both parameters concurrently. If minimisation=None," "the model parameters are fixed to their initial or given values." .expert_level = 3 + stills { + min_Isigma = 2.0 + .type=float + .help = "Minimum uncorrected I/sigma for individual reflections used in error model optimisation" + min_multiplicity = 4 + .type = int + .help = "Only reflections with at least this multiplicity (after Isigma filtering) are" + "used in error model optimisation." + } min_Ih = 25.0 .type = float .help = "Reflections with expected intensity above this value are to." @@ -248,7 +257,10 @@ def _create_summation_matrix(self): n = self.Ih_table.size self.binning_info["n_reflections"] = n summation_matrix = sparse.matrix(n, self.n_bins) + # calculate expected intensity value in pixels on scale of each image Ih = self.Ih_table.Ih_values * self.Ih_table.inverse_scale_factors + if "partiality" in self.Ih_table.Ih_table: + Ih *= self.Ih_table.Ih_table["partiality"].to_numpy() size_order = flex.sort_permutation(flumpy.from_numpy(Ih), reverse=True) Imax = Ih.max() min_Ih = Ih.min() @@ -383,7 +395,6 @@ def __init__(self, a=None, b=None, basic_params=None): see if a user specified fixed value is set. If no fixed values are given then the model starts with the default parameters a=1.0 b=0.02 """ - self.free_components = [] self.sortedy = None self.sortedx = None @@ -408,14 +419,19 @@ def __init__(self, a=None, b=None, basic_params=None): if not basic_params.b: self._active_parameters.append("b") - def configure_for_refinement(self, Ih_table, min_partiality=0.4): + def configure_for_refinement( + self, Ih_table, min_partiality=0.4, use_stills_filtering=False + ): """ Add data to allow error model refinement. Raises: ValueError if insufficient reflections left after filtering. """ self.filtered_Ih_table = self.filter_unsuitable_reflections( - Ih_table, self.params, min_partiality + Ih_table, + self.params, + min_partiality, + use_stills_filtering, ) # always want binning info so that can calc for output. self.binner = ErrorModelBinner( @@ -455,8 +471,19 @@ def n_refl(self): return self.filtered_Ih_table.size @classmethod - def filter_unsuitable_reflections(cls, Ih_table, error_params, min_partiality): + def filter_unsuitable_reflections( + cls, Ih_table, error_params, min_partiality, use_stills_filtering + ): """Filter suitable reflections for minimisation.""" + if use_stills_filtering: + return filter_unsuitable_reflections_stills( + Ih_table, + error_params.stills.min_multiplicity, + error_params.stills.min_Isigma, + min_partiality=min_partiality, + min_reflections_required=cls.min_reflections_required, + min_Ih=error_params.min_Ih, + ) return filter_unsuitable_reflections( Ih_table, min_Ih=error_params.min_Ih, @@ -570,6 +597,49 @@ def binned_variances_summary(self): ) +def filter_unsuitable_reflections_stills( + Ih_table, + min_multiplicity, + min_Isigma, + min_partiality, + min_reflections_required, + min_Ih, +): + """Filter suitable reflections for minimisation.""" + + if "partiality" in Ih_table.Ih_table: + sel = Ih_table.Ih_table["partiality"].to_numpy() > min_partiality + Ih_table = Ih_table.select(sel) + + sel = (Ih_table.intensities / (Ih_table.variances**0.5)) >= min_Isigma + Ih_table = Ih_table.select(sel) + + Ih = Ih_table.Ih_values * Ih_table.inverse_scale_factors + if "partiality" in Ih_table.Ih_table: + Ih *= Ih_table.Ih_table["partiality"].to_numpy() + sel = Ih > min_Ih + Ih_table = Ih_table.select(sel) + + n_h = Ih_table.calc_nh() + sigmaprime = calc_sigmaprime([1.0, 0.0], Ih_table) + delta_hl = calc_deltahl(Ih_table, n_h, sigmaprime) + # Optimise on the central bulk distribution of the data - avoid the few + # reflections in the long tails. + sel = np.abs(delta_hl) < 6.0 + Ih_table = Ih_table.select(sel) + + sel = Ih_table.calc_nh() >= min_multiplicity + Ih_table = Ih_table.select(sel) + n = Ih_table.size + + if n < min_reflections_required: + raise ValueError( + "Insufficient reflections (%s < %s) to perform error modelling." + % (n, min_reflections_required) + ) + return Ih_table + + def filter_unsuitable_reflections( Ih_table, min_Ih, min_partiality, min_reflections_required ): diff --git a/src/dials/algorithms/scaling/scaler.py b/src/dials/algorithms/scaling/scaler.py index d453f65e48..487ad12546 100644 --- a/src/dials/algorithms/scaling/scaler.py +++ b/src/dials/algorithms/scaling/scaler.py @@ -350,6 +350,9 @@ def __init__(self, params, experiment, reflection_table, for_multi=False): self.free_set_selection = flex.bool(self.n_suitable_refl, False) self._free_Ih_table = None # An array of len n_suitable_refl self._configure_model_and_datastructures(for_multi=for_multi) + self.is_still = True + if self._experiment.scan and self._experiment.scan.get_oscillation()[1] != 0.0: + self.is_still = False if self.params.weighting.error_model.error_model: # reload current error model parameters, or create new null self.experiment.scaling_model.load_error_model( @@ -380,7 +383,10 @@ def perform_error_optimisation(self, update_Ih=True): Ih_table, _ = self._create_global_Ih_table(anomalous=True, remove_outliers=True) try: model = run_error_model_refinement( - self._experiment.scaling_model.error_model, Ih_table + self._experiment.scaling_model.error_model, + Ih_table, + self.params.reflection_selection.min_partiality, + use_stills_filtering=self.is_still, ) except (ValueError, RuntimeError) as e: logger.info(e) @@ -1500,7 +1506,12 @@ def perform_error_optimisation(self, update_Ih=True): continue tables = [s.get_valid_reflections().select(~s.outliers) for s in scalers] space_group = scalers[0].experiment.crystal.get_space_group() - Ih_table = IhTable(tables, space_group, anomalous=True) + Ih_table = IhTable( + tables, + space_group, + anomalous=True, + additional_cols=["partiality"], + ) if len(minimisation_groups) == 1: logger.info("Determining a combined error model for all datasets") else: @@ -1509,7 +1520,10 @@ def perform_error_optimisation(self, update_Ih=True): ) try: model = run_error_model_refinement( - scalers[0]._experiment.scaling_model.error_model, Ih_table + scalers[0]._experiment.scaling_model.error_model, + Ih_table, + min_partiality=self.params.reflection_selection.min_partiality, + use_stills_filtering=scalers[0].is_still, ) except (ValueError, RuntimeError) as e: logger.info(e) diff --git a/src/dials/algorithms/scaling/scaling_library.py b/src/dials/algorithms/scaling/scaling_library.py index 6076be54cd..193887297e 100644 --- a/src/dials/algorithms/scaling/scaling_library.py +++ b/src/dials/algorithms/scaling/scaling_library.py @@ -15,6 +15,7 @@ import math from copy import deepcopy from dataclasses import dataclass +from typing import List, Tuple from unittest.mock import Mock import numpy as np @@ -104,11 +105,11 @@ def choose_initial_scaling_intensities(reflection_table, intensity_choice="profi "intensity.sum.variance" ] * flex.pow2(conv * inverse_partiality) if "partiality.inv.variance" in reflection_table: + # see e.g. https://en.wikipedia.org/wiki/Propagation_of_uncertainty + # section "Example formulae", f=AB. reflection_table["variance"] += ( - reflection_table["intensity.sum.value"] - * conv - * reflection_table["partiality.inv.variance"] - ) + flex.pow2(reflection_table["intensity.sum.value"] * conv) + ) * reflection_table["partiality.inv.variance"] else: reflection_table["intensity"] = ( reflection_table["intensity.sum.value"] * conv @@ -486,6 +487,62 @@ def calc_rsplit(cls, this, other, assume_index_matching=False, use_binning=False ) return results + @classmethod + def weighted_cchalf( + cls, this, other, assume_index_matching=False, use_binning=False + ) -> List[Tuple]: + if not use_binning: + assert other.indices().size() == this.indices().size() + if this.data().size() == 0: + return [(None, 0)] + + if assume_index_matching: + (o, c) = (this, other) + else: + (o, c) = this.common_sets(other=other, assert_no_singles=True) + + # The case where the denominator is less or equal to zero is + # pathological and should never arise in practice. + assert len(o.sigmas()) + assert len(c.sigmas()) + n = len(o.data()) + if n == 1: + return [(None, 1)] + v_o = flex.pow2(o.sigmas()) + v_c = flex.pow2(c.sigmas()) + joint_w = 1.0 / (v_o + v_c) + sumjw = flex.sum(joint_w) + norm_jw = joint_w / sumjw + xbar = flex.sum(o.data() * norm_jw) + ybar = flex.sum(c.data() * norm_jw) + dx = o.data() - xbar + dy = c.data() - ybar + sxy = flex.sum(dx * dy * norm_jw) + + sx = flex.sum(flex.pow2(dx) * norm_jw) + sy = flex.sum(flex.pow2(dy) * norm_jw) + if sx == 0.0 or sy == 0.0: + return [(None, 1)] + # effective sample size of weighted sample + # Kish, Leslie. 1965. Survey Sampling New York: Wiley. (R documentation) + # neff = sum(w)^2 / sum(w^2). But sum(w) == 1 as normalised already + neff = 1 / flex.sum(flex.pow2(norm_jw)) + return [(sxy / ((sx * sy) ** 0.5), neff)] + + assert this.binner is not None + results = [] + for i_bin in this.binner().range_all(): + sel = this.binner().selection(i_bin) + results.append( + cls.weighted_cchalf( + this.select(sel), + other.select(sel), + assume_index_matching=assume_index_matching, + use_binning=False, + )[0] + ) + return results + def merging_stats_from_scaled_array( scaled_miller_array, diff --git a/src/dials/algorithms/symmetry/cosym/__init__.py b/src/dials/algorithms/symmetry/cosym/__init__.py index a2b308272a..95346fdeb7 100644 --- a/src/dials/algorithms/symmetry/cosym/__init__.py +++ b/src/dials/algorithms/symmetry/cosym/__init__.py @@ -21,8 +21,10 @@ from cctbx.sgtbx.lattice_symmetry import metric_subgroups from libtbx import Auto from scitbx import matrix +from scitbx.array_family import flex import dials.util +import dials.util.system from dials.algorithms.indexing.symmetry import find_matching_symmetry from dials.algorithms.symmetry import median_unit_cell, symmetry_base from dials.algorithms.symmetry.cosym import engine as cosym_engine @@ -79,6 +81,15 @@ weights = count standard_error .type = choice .short_caption = "Weights" + .help = "If not None, a weights matrix is used in the cosym procedure." + "weights=count uses the number of reflections used to calculate a pairwise correlation coefficient as its weight" + "weights=standard_error uses the reciprocal of the standard error as the weight. The standard error is given by" + "the sqrt of (1-CC*2)/(n-2), where (n-2) are the degrees of freedom in a pairwise CC calculation." +cc_weights = None sigma + .type = choice + .help = "If not None, a weighted cc-half formula is used for calculating pairwise correlation coefficients and degrees of" + "freedom in the cosym procedure." + "weights=sigma uses the intensity uncertainties to perform inverse variance weighting during the cc calculation." min_pairs = 3 .type = int(value_min=1) @@ -99,10 +110,9 @@ .short_caption = "Maximum number of calls" } -nproc = None +nproc = Auto .type = int(value_min=1) - .help = "Deprecated" - .deprecated = True + .help = "Number of processes" """ ) @@ -200,6 +210,21 @@ def _map_space_group_to_input_cell(intensities, space_group): ) self.input_space_group = self.intensities.space_group() + # ensure still unique after mapping - merge equivalents in the higher symmetry + new_intensities = None + new_dataset_ids = flex.int([]) + for d in set(self.dataset_ids): + sel = self.dataset_ids == d + these_i = self.intensities.select(sel) + these_merged = these_i.merge_equivalents().array() + if not new_intensities: + new_intensities = these_merged + else: + new_intensities = new_intensities.concatenate(these_merged) + new_dataset_ids.extend(flex.int(these_merged.size(), d)) + self.intensities = new_intensities + self.dataset_ids = new_dataset_ids + else: self.input_space_group = None @@ -208,6 +233,13 @@ def _map_space_group_to_input_cell(intensities, space_group): self.intensities, self.params.lattice_group.group() ) self.params.lattice_group = tmp_intensities.space_group_info() + # N.B. currently only multiprocessing used if cc_weights=sigma + if self.params.nproc is Auto: + if self.params.cc_weights == "sigma": + params.nproc = dials.util.system.CPU_COUNT + logger.info("Setting nproc={}".format(params.nproc)) + else: + params.nproc = 1 def _intialise_target(self): if self.params.dimensions is Auto: @@ -229,6 +261,8 @@ def _intialise_target(self): lattice_group=self.lattice_group, dimensions=dimensions, weights=self.params.weights, + cc_weights=self.params.cc_weights, + nproc=self.params.nproc, ) def _determine_dimensions(self): diff --git a/src/dials/algorithms/symmetry/cosym/target.py b/src/dials/algorithms/symmetry/cosym/target.py index 33258fdecb..8de32e43a4 100644 --- a/src/dials/algorithms/symmetry/cosym/target.py +++ b/src/dials/algorithms/symmetry/cosym/target.py @@ -2,6 +2,7 @@ from __future__ import annotations +import concurrent.futures import copy import itertools import logging @@ -15,6 +16,135 @@ from cctbx.array_family import flex logger = logging.getLogger(__name__) +from scipy import sparse + +from dials.algorithms.scaling.scaling_library import ExtendedDatasetStatistics + + +def _lattice_lower_upper_index(lattices, lattice_id): + lower_index = int(lattices[lattice_id]) + upper_index = None + if lattice_id < len(lattices) - 1: + upper_index = int(lattices[lattice_id + 1]) + else: + assert lattice_id == len(lattices) - 1 + return lower_index, upper_index + + +def _compute_rij_matrix_one_row_block( + i, + lattices, + data, + indices, + sym_ops, + patterson_group, + weights=True, + min_pairs=3, +): + cs = data.crystal_symmetry() + n_lattices = len(lattices) + rij_cache = {} + + NN = n_lattices * len(sym_ops) + + rij_row = [] + rij_col = [] + rij_data = [] + wij = None + if weights: + wij_row = [] + wij_col = [] + wij_data = [] + + i_lower, i_upper = _lattice_lower_upper_index(lattices, i) + intensities_i = data.data()[i_lower:i_upper] + sigmas_i = data.sigmas()[i_lower:i_upper] + cb_ops = [sgtbx.change_of_basis_op(cb_op_k) for cb_op_k in sym_ops] + + for j in range(n_lattices): + + j_lower, j_upper = _lattice_lower_upper_index(lattices, j) + intensities_j = data.data()[j_lower:j_upper] + sigmas_j = data.sigmas()[j_lower:j_upper] + + for k, cb_op_k in enumerate(cb_ops): + + indices_i = indices[cb_op_k.as_xyz()][i_lower:i_upper] + + for kk, cb_op_kk in enumerate(cb_ops): + if i == j and k == kk: + # don't include correlation of dataset with itself + continue + + ik = i + (n_lattices * k) + jk = j + (n_lattices * kk) + + key = (i, j, str(cb_op_k.inverse() * cb_op_kk)) + if key in rij_cache: + cc, n, n_pairs = rij_cache[key] + else: + indices_j = indices[cb_op_kk.as_xyz()][j_lower:j_upper] + + matches = miller.match_indices(indices_i, indices_j) + pairs = matches.pairs() + isel_i = pairs.column(0) + isel_j = pairs.column(1) + isel_i = isel_i.select( + patterson_group.epsilon(indices_i.select(isel_i)) == 1 + ) + isel_j = isel_j.select( + patterson_group.epsilon(indices_j.select(isel_j)) == 1 + ) + ms = miller.set( + crystal_symmetry=cs, indices=indices_j.select(isel_j) + ) + ma_j = miller.array( + miller_set=ms, + data=intensities_j.select(isel_j), + sigmas=sigmas_j.select(isel_j), + ) + ms = miller.set( + crystal_symmetry=cs, indices=indices_i.select(isel_i) + ) + ma_i = miller.array( + miller_set=ms, + data=intensities_i.select(isel_i), + sigmas=sigmas_i.select(isel_i), + ) + n_pairs = ma_i.size() + if ma_i.size() < min_pairs: + n, cc = (None, None) + else: + corr, neff = ExtendedDatasetStatistics.weighted_cchalf( + ma_i, ma_j, assume_index_matching=True + )[0] + if neff: + cc = corr + n = neff + else: + n, cc = (None, None) + + rij_cache[key] = (cc, n, n_pairs) + + if ( + n is None + or cc is None + or (min_pairs is not None and n_pairs < min_pairs) + ): + continue + if weights: + wij_row.append(ik) + wij_col.append(jk) + wij_data.append(n) + rij_row.append(ik) + rij_col.append(jk) + rij_data.append(cc) + + rij = sparse.coo_matrix((rij_data, (rij_row, rij_col)), shape=(NN, NN)) + if weights: + wij = sparse.coo_matrix((wij_data, (wij_row, wij_col)), shape=(NN, NN)) + + return rij, wij class Target: @@ -32,6 +162,8 @@ def __init__( min_pairs=3, lattice_group=None, dimensions=None, + nproc=1, + cc_weights=None, ): r"""Initialise a Target object. @@ -60,6 +192,7 @@ def __init__( assert weights in ("count", "standard_error") self._weights = weights self._min_pairs = min_pairs + self._nproc = nproc data = intensities.customized_copy(anomalous_flag=False) cb_op_to_primitive = data.change_of_basis_op_to_primitive_setting() @@ -70,8 +203,11 @@ def __init__( order = lattice_ids.argsort().astype(np.uint64) sorted_data = data.data().select(flex.size_t(order)) sorted_indices = data.indices().select(flex.size_t(order)) + sorted_sigmas = data.sigmas().select(flex.size_t(order)) self._lattice_ids = lattice_ids[order] - self._data = data.customized_copy(indices=sorted_indices, data=sorted_data) + self._data = data.customized_copy( + indices=sorted_indices, data=sorted_data, sigmas=sorted_sigmas + ) assert isinstance(self._data.indices(), type(flex.miller_index())) assert isinstance(self._data.data(), type(flex.double())) @@ -103,8 +239,10 @@ def __init__( logger.debug( "Patterson group: %s", self._patterson_group.info().symbol_and_number() ) - - self.rij_matrix, self.wij_matrix = self._compute_rij_wij() + if cc_weights == "sigma": + self.rij_matrix, self.wij_matrix = self._compute_rij_wij_ccweights() + else: + self.rij_matrix, self.wij_matrix = self._compute_rij_wij() def set_dimensions(self, dimensions): """Set the number of dimensions for analysis. @@ -145,6 +283,66 @@ def _generate_twin_operators(self, lattice_symmetry_max_delta=5.0): return operators + def _compute_rij_wij_ccweights(self): + # Use flex-based methods for calculating matrices. + # Pre-calculate miller indices after application of each cb_op. Only calculate + # this once per cb_op instead of on-the-fly every time we need it. + indices = {} + epsilons = {} + space_group_type = self._data.space_group().type() + for cb_op in self.sym_ops: + cb_op = sgtbx.change_of_basis_op(cb_op) + indices_reindexed = cb_op.apply(self._data.indices()) + miller.map_to_asu(space_group_type, False, indices_reindexed) + cb_op_str = cb_op.as_xyz() + indices[cb_op_str] = indices_reindexed + epsilons[cb_op_str] = self._patterson_group.epsilon(indices_reindexed) + + rij_matrix = None + wij_matrix = None + + with concurrent.futures.ProcessPoolExecutor(max_workers=self._nproc) as pool: + futures = [ + pool.submit( + _compute_rij_matrix_one_row_block, + i, + self._lattices, + self._data, + indices, + self.sym_ops, + self._patterson_group, + weights=True, + min_pairs=self._min_pairs, + ) + for i, _ in enumerate(self._lattices) + ] + for future in concurrent.futures.as_completed(futures): + rij, wij = future.result() + + if rij_matrix is None: + rij_matrix = rij + else: + rij_matrix += rij + if wij is not None: + if wij_matrix is None: + wij_matrix = wij + else: + wij_matrix += wij + + rij_matrix = rij_matrix.toarray().astype(np.float64) + if wij_matrix is not None: + wij_matrix = wij_matrix.toarray().astype(np.float64) + if self._weights == "standard_error": + # N.B. using effective n due to sigma weighting, which can be below 2 + # but approches 1 in the limit, so rather say efective sample size + # for standard error calc is n-1 + sel = np.where(wij_matrix > 1) + se = np.sqrt((1 - np.square(rij_matrix[sel])) / (wij_matrix[sel] - 1)) + wij_matrix = np.zeros_like(rij_matrix) + wij_matrix[sel] = 1 / se + + return rij_matrix, wij_matrix + def _compute_rij_wij(self, use_cache=True): """Compute the rij_wij matrix. diff --git a/src/dials/array_family/flex_ext.py b/src/dials/array_family/flex_ext.py index 5bf9b76fe7..72a02daecc 100644 --- a/src/dials/array_family/flex_ext.py +++ b/src/dials/array_family/flex_ext.py @@ -531,10 +531,10 @@ def concat( from dials.util.multi_dataset_handling import renumber_table_id_columns tables = renumber_table_id_columns(tables) - first = tables[0] - for table in tables[1:]: - first.extend(table) - return first + new = dials_array_family_flex_ext.reflection_table() + for table in tables: + new.extend(table) + return new def match_with_reference(self, other): """ @@ -956,7 +956,7 @@ def compute_corrections(self, experiments): self["qe"] = qe return lp - def extract_shoeboxes(self, imageset, mask=None, nthreads=1): + def extract_shoeboxes(self, imageset, mask=None): """ Helper function to read a load of shoebox data. diff --git a/src/dials/command_line/correlation_matrix.py b/src/dials/command_line/correlation_matrix.py new file mode 100644 index 0000000000..d19c78c65e --- /dev/null +++ b/src/dials/command_line/correlation_matrix.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import logging +import random +import sys + +import numpy as np +from jinja2 import ChoiceLoader, Environment, PackageLoader + +import iotbx.phil + +from dials.algorithms.correlation.analysis import CorrelationMatrix +from dials.array_family import flex +from dials.util import log, show_mail_handle_errors +from dials.util.multi_dataset_handling import ( + assign_unique_identifiers, + parse_multiple_datasets, +) +from dials.util.options import ArgumentParser, reflections_and_experiments_from_files +from dials.util.version import dials_version + +logger = logging.getLogger("dials.algorithms.correlation.analysis") + +phil_scope = iotbx.phil.parse( + """\ +include scope dials.algorithms.correlation.analysis.phil_scope + +seed = 42 + .type = int(value_min=0) + +output { + log = dials.correlation_matrix.log + .type = str + .help = "The log name" + html = dials.correlation_matrix.html + .type = path + .help = "Filename for the html report" + json = None + .type = str + .help = "Filename for the cluster information output in json format" +} +""", + process_includes=True, +) + + +help_message = """ +This module implements a subset of methods used in dials.cosym to perform +correlation and cosine similarity based clustering methods. Data should be passed +through dials.cosym first to implement consistent symmetry. + +Examples:: + + dials.correlation_matrix symmetrized.expt symmetrized.refl + +""" + + +@show_mail_handle_errors() +def run(args=None): + usage = "dials.correlation_matrix [options] symmetrized.expt symmetrized.refl" + + parser = ArgumentParser( + usage=usage, + phil=phil_scope, + read_reflections=True, + read_experiments=True, + check_format=False, + epilog=help_message, + ) + + params, options, args = parser.parse_args( + args=args, show_diff_phil=False, return_unhandled=True + ) + + # Configure the logging + log.config(verbosity=options.verbose, logfile=params.output.log) + + logger.info(dials_version()) + + # Log the diff phil + diff_phil = parser.diff_phil.as_str() + if diff_phil != "": + logger.info("The following parameters have been modified:\n") + logger.info(diff_phil) + + if params.seed is not None: + flex.set_random_seed(params.seed) + np.random.seed(params.seed) + random.seed(params.seed) + + if not params.input.experiments or not params.input.reflections: + parser.print_help() + sys.exit() + + reflections, experiments = reflections_and_experiments_from_files( + params.input.reflections, params.input.experiments + ) + + reflections = parse_multiple_datasets(reflections) + if len(experiments) != len(reflections): + sys.exit( + "Mismatched number of experiments and reflection tables found: %s & %s." + % (len(experiments), len(reflections)) + ) + if len(experiments) < 2: + sys.exit( + "At least 2 datasets are needed for cluster analysis. Please re-run with more datasets." + ) + try: + experiments, reflections = assign_unique_identifiers(experiments, reflections) + matrices = CorrelationMatrix( + experiments=experiments, reflections=reflections, params=params + ) + except ValueError as e: + sys.exit(e) + + matrices.calculate_matrices() + + if params.output.json: + matrices.output_json() + + if params.output.html: + matrices.convert_to_html_json() + + loader = ChoiceLoader( + [PackageLoader("dials", "templates"), PackageLoader("dials", "templates")] + ) + env = Environment(loader=loader) + + template = env.get_template("clusters.html") + html = template.stream( + page_title="DIALS Correlation Matrix", + cc_cluster_json=matrices.cc_json, + cc_cluster_table=matrices.cc_table, + cos_angle_cluster_json=matrices.cos_json, + cos_angle_cluster_table=matrices.cos_table, + image_range_tables=[matrices.table_list], + cosym_graphs=matrices.rij_graphs, + ) + + logger.info( + f"Saving graphical output of correlation matrices to {params.output.html}." + ) + html.dump(params.output.html, errors="xmlcharrefreplace") + + +if __name__ == "__main__": + run() diff --git a/src/dials/command_line/export.py b/src/dials/command_line/export.py index 5b6c55d649..3eecb8ca2e 100644 --- a/src/dials/command_line/export.py +++ b/src/dials/command_line/export.py @@ -45,6 +45,11 @@ XDS format exports a models.expt file as XDS.INP and XPARM.XDS files. If a reflection file is given it will be exported as a SPOT.XDS file. +SHELX format exports intensity data in HKLF 4 format for use in the SHELX suite +of programs. As this file format does not contain unit cell parameters or +symmetry information a minimal instruction file is also written. Optionally the +expected contents of the asymmetric unit can be added to this instruciton file. + PETS format exports intensity data and diffraction data in the CIF format used by PETS. This is primarily intended to produce files suitable for dynamic diffraction refinement using Jana2020, which requires this format. @@ -72,6 +77,11 @@ dials.export indexed.refl format=xds dials.export models.expt format=xds dials.export models.expt indexed.refl format=xds + + # Export to shelx + dials.export scaled.expt scaled.refl format=shelx + dials.export scaled.expt scaled.refl format=shelx shelx.hklout=dials.hkl + dials.export scaled.expt scaled.refl format=shelx composition=C3H7NO2S """ phil_scope = parse( @@ -249,6 +259,9 @@ ins = dials.ins .type = path .help = "The output ins file" + composition = CH + .type = str + .help = "The chemical composition of the asymmetric unit" scale = True .type = bool .help = "Scale reflections to maximise output precision in SHELX 8.2f format" diff --git a/src/dials/command_line/find_rotation_axis.py b/src/dials/command_line/find_rotation_axis.py index 9f684c0d88..cc8ed57d4b 100644 --- a/src/dials/command_line/find_rotation_axis.py +++ b/src/dials/command_line/find_rotation_axis.py @@ -26,12 +26,11 @@ import dxtbx.flumpy as flumpy import libtbx import libtbx.phil +from scitbx import matrix import dials.util import dials.util.log from dials.array_family import flex - -# from dials.array_family import flex from dials.util.options import ArgumentParser, reflections_and_experiments_from_files from dials.util.system import CPU_COUNT from dials.util.version import dials_version @@ -42,11 +41,20 @@ # Define the master PHIL scope for this program. phil_scope = libtbx.phil.parse( """ +d_max = 10.0 + .type = float + .help = "Low resolution limit to apply to the data, intended to remove" + "noisy regions of low angle scatter" + max_two_theta = 10.0 .type = float .help = "Scattering angle limit to select reflections only in the central" "mostly flat region of the Ewald sphere surface" +max_sample_size = 2000 + .help = "The maximum number of reflections to use." + .type = int(value_min=1) + view = False .type = bool .help = "View phi/theta histogram with current rotation axis (azimuth)" @@ -128,12 +136,6 @@ def make_2d_rotmat(theta): return R -def random_sample(arr, n): - """Select random sample of `n` rows from array""" - indices = np.random.choice(arr.shape[0], n, replace=False) - return arr[indices] - - def xyz2cyl(arr): """ Take a set of reflections in XYZ and convert to polar (cylindrical) @@ -154,7 +156,10 @@ def cylinder_histo(xyz, bins=(1000, 500)): """ i, j = np.triu_indices(len(xyz), k=1) diffs = xyz[i] - xyz[j] + del i + del j polar = xyz2cyl(diffs) + del diffs px, py = polar.T H, xedges, yedges = np.histogram2d( @@ -285,13 +290,20 @@ def optimise( return best_azimuth -def extract_spot_data(reflections, experiments, max_two_theta): +def extract_spot_data(reflections, experiments, params): """ - From the spot positions, extract reciprocal space X, Y and angle positions - for each reflection up to the scattering angle max_two_theta + Filter reflections according to various parameters, then extract reciprocal + space X, Y and angle positions. """ + + max_two_theta = params.max_two_theta + d_max = params.d_max + max_sample_size = params.max_sample_size + # Map reflections to reciprocal space reflections.centroid_px_to_mm(experiments) + reflections.map_centroids_to_reciprocal_space(experiments) + reflections["d"] = 1 / reflections["rlp"].norms() # Calculate scattering vectors reflections["s1"] = flex.vec3_double(len(reflections)) @@ -302,7 +314,6 @@ def extract_spot_data(reflections, experiments, max_two_theta): sel_expt = reflections["imageset_id"] == i else: sel_expt = reflections["id"] == i - for i_panel in range(len(expt.detector)): sel = sel_expt & (panel_numbers == i_panel) x, y, _ = reflections["xyzobs.mm.value"].select(sel).parts() @@ -313,11 +324,25 @@ def extract_spot_data(reflections, experiments, max_two_theta): reflections["2theta"].set_selected(sel, tt) # Filter reflections - full_len = len(reflections) - reflections = reflections.select(reflections["2theta"] <= max_two_theta) - if len(reflections) < full_len: + nref = len(reflections) + if max_two_theta: + reflections = reflections.select(reflections["2theta"] <= max_two_theta) + if len(reflections) < nref: + logger.info( + f"{len(reflections)} reflections with 2θ ≤ {max_two_theta}° selected from {nref} total" + ) + nref = len(reflections) + if d_max: + reflections = reflections.select(reflections["d"] <= d_max) + logger.info( + f"{len(reflections)} reflections with d ≤ {d_max} Å selected from {nref} total" + ) + nref = len(reflections) + if max_sample_size and max_sample_size < nref: + reflections.sort("d", reverse=True) + reflections = reflections[0:max_sample_size] logger.info( - f"{len(reflections)} reflections with 2θ ≤ {max_two_theta}° selected from {full_len} total" + f"{len(reflections)} lowest resolution reflections selected from {nref} total" ) x, y, _ = reflections["s1"].parts() @@ -391,7 +416,8 @@ def run(args=None, phil=phil_scope): wavelength = expt.beam.get_wavelength() rotx, roty, _ = expt.goniometer.get_rotation_axis() azimuth_current = np.degrees(np.arctan2(-roty, rotx)) - arr = extract_spot_data(reflections, experiments, params.max_two_theta) + arr = extract_spot_data(reflections, experiments, params) + del reflections if params.azimuth is not None: azimuth_current = params.azimuth @@ -522,7 +548,27 @@ def run(args=None, phil=phil_scope): logger.info( f"\nRotation axis found: {azimuth_deg:.2f} deg. / {azimuth_rad:.3f} rad." ) - expt.goniometer.set_rotation_axis(rotation_axis_to_xyz(azimuth_rad)) + + current_rotation_axis = matrix.col(expt.goniometer.get_rotation_axis()) + new_rotation_axis = matrix.col(rotation_axis_to_xyz(azimuth_rad)) + + # Rotation matrix to take the current_rotation_axis to the new_rotation_axis + angle = current_rotation_axis.angle(new_rotation_axis) + if angle: + axis = matrix.col(current_rotation_axis.cross(new_rotation_axis)).normalize() + R = axis.axis_and_angle_as_r3_rotation_matrix(angle) + else: + R = matrix.sqr(matrix.identity(3)) + + try: + # For a multi-axis goniometer, rotate the base axis by R + axes = expt.goniometer.get_axes() + base_axis = matrix.col(axes[-1]) + axes[-1] = R * base_axis + expt.goniometer.set_axes(axes) + except AttributeError: + # For a single-axis goniometer, just set the new_rotation_axis + expt.goniometer.set_rotation_axis(new_rotation_axis) logger.info(str(expt.goniometer)) logger.info(f"Saving optimised experiments to {params.output.experiments}") diff --git a/src/dials/command_line/refine_error_model.py b/src/dials/command_line/refine_error_model.py index b3ccac4597..c7894c3f7d 100644 --- a/src/dials/command_line/refine_error_model.py +++ b/src/dials/command_line/refine_error_model.py @@ -55,6 +55,9 @@ phil_scope = phil.parse( """ include scope dials.algorithms.scaling.error_model.error_model.phil_scope + min_partiality = 0.4 + .type = float + .help = "Use reflections with at least this partiality in error model optimisation." intensity_choice = *profile sum combine .type = choice .help = "Use profile or summation intensities" @@ -101,13 +104,23 @@ def refine_error_model(params, experiments, reflection_tables): reflection_tables[i] = table space_group = experiments[0].crystal.get_space_group() Ih_table = IhTable( - reflection_tables, space_group, additional_cols=["partiality"], anomalous=True + reflection_tables, + space_group, + additional_cols=["partiality"], + anomalous=True, ) + use_stills_filtering = True + for expt in experiments: + if expt.scan and expt.scan.get_oscillation()[1] != 0.0: + use_stills_filtering = False + break # now do the error model refinement model = BasicErrorModel(basic_params=params.basic) try: - model = run_error_model_refinement(model, Ih_table) + model = run_error_model_refinement( + model, Ih_table, params.min_partiality, use_stills_filtering + ) except (ValueError, RuntimeError) as e: logger.info(e) else: diff --git a/src/dials/command_line/show.py b/src/dials/command_line/show.py index e04d335d2e..8883bb1638 100644 --- a/src/dials/command_line/show.py +++ b/src/dials/command_line/show.py @@ -7,6 +7,7 @@ import iotbx.phil from cctbx import uctbx +from dxtbx.model import ExperimentType from dxtbx.model.experiment_list import ExperimentListFactory from scitbx.math import five_number_summary @@ -87,11 +88,15 @@ def beam_centre_raw_image_px(detector, s0): return x_px + offset[0], y_px + offset[1] -def show_beam(detector, beam): +def show_beam(detector, beam, experiment_type: ExperimentType | None = None): # standard static beam model string s = str(beam) + # time of flight experiments have no scan points + if experiment_type == ExperimentType.TOF: + return s + # report whether the beam is scan-varying if beam.num_scan_points > 0: s += " s0 sampled at " + str(beam.num_scan_points) + " scan points\n" @@ -263,16 +268,20 @@ def show_experiments(experiments, show_scan_varying=False): except AttributeError: pass text.append(str(expt.detector)) + if expt.get_type() == ExperimentType.TOF: + min_wavelength = min(expt.beam.get_wavelength_range()) + s0 = tuple([i / min_wavelength for i in expt.beam.get_unit_s0()]) + else: + s0 = expt.beam.get_s0() text.append( - "Max resolution (at corners): %f" - % (expt.detector.get_max_resolution(expt.beam.get_s0())) + "Max resolution (at corners): %f" % (expt.detector.get_max_resolution(s0)) ) text.append( "Max resolution (inscribed): %f" - % (expt.detector.get_max_inscribed_resolution(expt.beam.get_s0())) + % (expt.detector.get_max_inscribed_resolution(s0)) ) text.append("") - text.append(show_beam(expt.detector, expt.beam)) + text.append(show_beam(expt.detector, expt.beam, expt.get_type())) if expt.scan is not None: text.append(str(expt.scan)) if expt.goniometer is not None: diff --git a/src/dials/command_line/split_still_data.py b/src/dials/command_line/split_still_data.py new file mode 100644 index 0000000000..5dadb03e61 --- /dev/null +++ b/src/dials/command_line/split_still_data.py @@ -0,0 +1,171 @@ +# DIALS_ENABLE_COMMAND_LINE_COMPLETION +""" +Split dials still-shot data files (expt and refl files) into separate files +based on image number or metadata. For example, splitting a dose series dataset. +Either the repeat unit can be specified with the series_repeat option, or +a yaml file can be provided. +If series_repeat=10, then data corresponding to every nth image will be split into +separate files: +group_0_0.expt +group_1_0.expt +.... +group_9_0.expt +and the same for reflection files. +The second index relates to the input file index, so if more than one input file is input, +there will then be group_0_1.expt etc. + +Example yaml syntax is +Point to a data array: + +--- +metadata: + timepoint: + '/path/to/example_master.h5' : '/path/to/meta.h5:/timepoint' +grouping: + split_by: + values: + - timepoint + +or for images specified by a template: +--- +metadata: + dose_point: + '/path/to/example_#####.cbf' : 'repeat=10' +grouping: + split_by: + values: + - dose_point + +""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path + +import iotbx.phil +from dxtbx.serialize import load +from libtbx import Auto + +from dials.util import log +from dials.util.image_grouping import ( + FilePair, + ParsedYAML, + get_grouping_handler, + series_repeat_to_groupings, +) +from dials.util.options import ArgumentParser +from dials.util.system import CPU_COUNT +from dials.util.version import dials_version + +logger = logging.getLogger("dials") + + +phil_str = """ +input { + reflections = None + .type = str + .multiple = True + .help = "Path to an integrated reflections file" + .expert_level = 1 + experiments = None + .type = str + .multiple = True + .help = "Path to an integrated experiments file" + .expert_level = 1 +} +grouping = None + .type = str + .help = "Path to a .yml file defining grouping structure during processing" +series_repeat = None + .type = int(value_min=2) + .expert_level = 2 + .help = "This option allows the user to specify that the data is a repeated series" + "by providing the number of repeated measurements at each point. i.e. it" + "is assumed that $series_repeat measurements are taken at each position" + "and that these form consecutive images in the input image files." +nproc=Auto + .type=int +output.log=dials.split_still_data.log + .type=str +""" + +phil_scope = iotbx.phil.parse(phil_str) + + +def run(args=sys.argv[1:]): + parser = ArgumentParser( + usage="dials.split_still_data integrated.expt integrated.refl series_repeat=10", + read_experiments=False, + read_reflections=False, + phil=phil_scope, + check_format=False, + epilog=__doc__, + ) + params, options, unhandled = parser.parse_args( + args=args, show_diff_phil=False, return_unhandled=True + ) + if unhandled: + for item in unhandled: + if item.endswith(".expt"): + args[args.index(item)] = f"input.experiments = {item}" + elif item.endswith(".refl"): + args[args.index(item)] = f"input.reflections = {item}" + else: + raise ValueError(f"Unhandled argument: {item}") + params, options = parser.parse_args(args=args, show_diff_phil=False) + + if not (params.input.reflections and params.input.experiments): + raise ValueError("Reflections and experiments files must both be specified") + log.config(verbosity=options.verbose, logfile=params.output.log) + logger.info(dials_version()) + + reflection_files = sorted([Path(i).resolve() for i in params.input.reflections]) + experiment_files = sorted([Path(i).resolve() for i in params.input.experiments]) + + datafiles = [FilePair(e, r) for e, r in zip(experiment_files, reflection_files)] + expts = [] + for fp in datafiles: + elist = load.experiment_list(fp.expt, check_format=False) + for e in elist: + if e.scan and e.scan.get_oscillation()[1] != 0.0: + logger.info( + "dials.split_still_data can only be used on still-shot data" + ) + sys.exit() + expts.append(elist) + + if params.nproc is Auto: + params.nproc = CPU_COUNT + logger.info(f"Using nproc={params.nproc}") + + if params.series_repeat: + parsed = series_repeat_to_groupings( + expts, params.series_repeat, groupname="split_by" + ) + handler = get_grouping_handler(parsed, "split_by", nproc=params.nproc) + elif params.grouping: + try: + parsed = ParsedYAML(params.grouping) + except Exception as e: + logger.info(f"Error: {e}\nPlease check input in yaml file.") + groupings = parsed.groupings + if len(groupings) != 1: + logger.info("Only one grouping type can be specified in grouping yaml") + sys.exit() + handler = get_grouping_handler( + parsed, list(parsed.groupings.keys())[0], nproc=params.nproc + ) + try: + _ = handler.split_files_to_groups( + working_directory=Path.cwd(), + data_file_pairs=datafiles, + ) + logger.info("Finished splitting data.") + except Exception as e: + logger.info(f"Error: {e}\nPlease check input.") + + +if __name__ == "__main__": + run() diff --git a/src/dials/templates/clusters.html b/src/dials/templates/clusters.html new file mode 100644 index 0000000000..92a8d0cdf9 --- /dev/null +++ b/src/dials/templates/clusters.html @@ -0,0 +1,89 @@ +{% extends "report_base.html" %} +{% block content %} + +
+ + + + + +
+ +
+ +

Cluster Analysis

+ +
+ + {{ macros.panel('Data sets', 'datasets', {}, tables=image_range_tables) }} + +
+ +
+
+ +
+ +
+
+ +
+ {{ macros.table(cc_cluster_table, + has_column_header=true, + has_row_header=true) }} + {{ macros.plotly_graph("cc_cluster", cc_cluster_json, style="dendrogram-plot") }} +
+
+
+
+ +
+ +
+
+ +
+ {{ macros.table(cos_angle_cluster_table, + has_column_header=true, + has_row_header=true) }} + {{ macros.plotly_graph("cos_angle_cluster", cos_angle_cluster_json, style="dendrogram-plot") }} +
+ +
+
+
+ + {{ macros.panel('Cosym cluster plots', 'cosym_plots', cosym_graphs) }} + +
+
+
+ +
+ +
+ +
+ +
+ + + +{% endblock %} diff --git a/src/dials/util/export_mtz.py b/src/dials/util/export_mtz.py index 68bd71eae3..28514ed4bd 100644 --- a/src/dials/util/export_mtz.py +++ b/src/dials/util/export_mtz.py @@ -221,7 +221,7 @@ def add_batch_list( i0 = image_range[0] for i in range(n_batches): - if experiment.scan: + if experiment.scan and experiment.scan.get_oscillation()[1] != 0.0: phi_start[i], phi_range[i] = experiment.scan.get_image_oscillation(i + i0) # Unit cell and UB matrix for the centre of the image for scan-varying model diff --git a/src/dials/util/export_shelx.py b/src/dials/util/export_shelx.py index 17cf2c4de3..5748c606cf 100644 --- a/src/dials/util/export_shelx.py +++ b/src/dials/util/export_shelx.py @@ -1,9 +1,11 @@ from __future__ import annotations import logging +import re from math import isclose from cctbx import crystal, miller +from cctbx.eltbx import chemical_elements from iotbx.shelx.write_ins import LATT_SYMM from dials.algorithms.scaling.scaling_library import determine_best_unit_cell @@ -96,12 +98,13 @@ def export_shelx(scaled_data, experiment_list, params): _write_ins( experiment_list, best_unit_cell=params.mtz.best_unit_cell, + composition=params.shelx.composition, ins_file=params.shelx.ins, ) logger.info(f"Written {params.shelx.ins}") -def _write_ins(experiment_list, best_unit_cell, ins_file): +def _write_ins(experiment_list, best_unit_cell, composition, ins_file): sg = experiment_list[0].crystal.get_space_group() unit_cells = [] wavelengths = [] @@ -156,14 +159,47 @@ def _write_ins(experiment_list, best_unit_cell, ins_file): f"TITL {sg.type().number()} in {sg.type().lookup_symbol().replace(' ','')}\n" ) f.write( - "CELL {:8.5f} {:8.4f} {:8.4f} {:8.4f} {:8.3f} {:8.3f} {:8.3f}\n".format( + "CELL {:7.5f} {:9.5f} {:9.5f} {:9.5f} {:8.4f} {:8.4f} {:8.4f}\n".format( wl, *uc.parameters() ) ) if uc_sd: f.write( - "ZERR {:8.3f} {:8.4f} {:8.4f} {:8.4f} {:8.3f} {:8.3f} {:8.3f}\n".format( + "ZERR {:7.2f} {:9.5f} {:9.5f} {:9.5f} {:8.4f} {:8.4f} {:8.4f}\n".format( sg.order_z(), *uc_sd ) ) LATT_SYMM(f, sg) + logger.info( + f"Using composition {composition} to write SFAC & UNIT instructions" + ) + sorted_composition = _parse_compound(composition) + f.write("SFAC %s\n" % " ".join(sorted_composition)) + f.write( + "UNIT %s\n" + % " ".join( + [str(sorted_composition[e] * sg.order_z()) for e in sorted_composition] + ) + ) + f.write("HKLF 4\n") # Intensities + f.write("END\n") + + +def _parse_compound(composition): + elements = chemical_elements.proper_caps_list()[:94] + m = re.findall(r"([A-Z][a-z]?)(\d*)", composition) + if all(e[1] == "" for e in m): + # Assume user has just supplied list of elements so UNIT instruction cannot be calculated + result = {element: 0 for element, count in m} + else: + result = {element: int(count or 1) for element, count in m} + # Check for elements without scattering factors in SHELXL + unmatched = list(set(result).difference(elements)) + if unmatched: + raise Sorry(f"Element(s) {' '.join(unmatched)} in {composition} not recognised") + + if "C" in result: + # move C to start of list + elements.insert(0, elements.pop(5)) + sorted_result = {e: result[e] for e in elements if e in result} + return sorted_result diff --git a/src/dials/util/image_grouping.py b/src/dials/util/image_grouping.py index 8279fb59a0..1c53eb838d 100644 --- a/src/dials/util/image_grouping.py +++ b/src/dials/util/image_grouping.py @@ -21,6 +21,7 @@ from yaml.loader import SafeLoader from dxtbx import flumpy +from dxtbx.model import ExperimentList from dxtbx.sequence_filenames import group_files_by_imageset, template_regex from dxtbx.serialize import load @@ -658,6 +659,7 @@ def save_subset(input_: SplittingIterable) -> Optional[Tuple[str, FilePair]]: sel_identifiers = list(identifiers.select(flumpy.from_numpy(sel))) expts.select_on_experiment_identifiers(sel_identifiers) refls = refls.select_on_experiment_identifiers(sel_identifiers) + refls.reset_ids() if expts: exptout = ( input_.working_directory @@ -1038,3 +1040,47 @@ def get_grouping_handler(parsed: ParsedYAML, grouping: str, nproc: int = 1): if grouping not in parsed.groupings: raise ValueError(f"Grouping definition {grouping} not found in parsed yaml.") return handler_class(parsed.groupings[grouping], nproc) + + +def series_repeat_to_groupings( + experiments: List[ExperimentList], series_repeat: int, groupname="split_by" +) -> ParsedYAML: + """ + For a dose series data collection, attempt to create and then parse a + groupings yaml based on the images in the input experiments. + + Callers of this function should be prepared to catch Exceptions! + """ + # if all end with .h5 or .nxs then images, else template? + + images = set() + for expts in experiments: + for iset in expts.imagesets(): + images.update(iset.paths()) + + metalines = "" + if all(image.endswith(".nxs") or image.endswith(".h5") for image in images): + # ok assume all independent: + metadata = [] + for image in images: + metadata.append(f"{image} : 'repeat={series_repeat}'") + metalines = "\n ".join(s for s in metadata) + else: + isets = group_files_by_imageset(images) + metadata = [] + for iset in isets.keys(): + metadata.append(f"{iset} : 'repeat={series_repeat}'") + metalines = "\n ".join(s for s in metadata) + if not metalines: + raise ValueError("Unable to extract images/templates from experiments") + grouping = f""" +metadata: + dose_point: + {metalines} +grouping: + {groupname}: + values: + - dose_point +""" + parsed_yaml = ParsedYAML(yml_str=grouping) + return parsed_yaml diff --git a/src/dials/util/masking.py b/src/dials/util/masking.py index 11e800e5af..6c54cbf0e4 100644 --- a/src/dials/util/masking.py +++ b/src/dials/util/masking.py @@ -3,6 +3,7 @@ import copy import logging import math +import sys import time from collections import namedtuple from typing import Tuple @@ -215,6 +216,13 @@ def generate_mask( detector = imageset.get_detector() beam = imageset.get_beam() + # validate that the input parameters are sensible + if params.d_min is not None and params.d_max is not None: + if params.d_min > params.d_max: + sys.exit( + f"d_min = {params.d_min} > d_max = {params.d_max}: no spots will be found" + ) + # Create the mask for each panel masks = [] for index, panel in enumerate(detector): diff --git a/tests/algorithms/correlation/__init__.py b/tests/algorithms/correlation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/algorithms/correlation/test_analysis.py b/tests/algorithms/correlation/test_analysis.py new file mode 100644 index 0000000000..3f0f2ec9c4 --- /dev/null +++ b/tests/algorithms/correlation/test_analysis.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import pathlib + +from dials.algorithms.correlation.analysis import CorrelationMatrix +from dials.command_line.correlation_matrix import phil_scope +from dials.util.multi_dataset_handling import ( + assign_unique_identifiers, + parse_multiple_datasets, +) +from dials.util.options import ArgumentParser, reflections_and_experiments_from_files + + +def test_corr_mat(dials_data, run_in_tmp_path): + mcp = dials_data("vmxi_proteinase_k_sweeps", pathlib=True) + params = phil_scope.extract() + input_data = [] + parser = ArgumentParser( + usage=" ", + phil=phil_scope, + read_reflections=True, + read_experiments=True, + check_format=False, + epilog=" ", + ) + for i in [0, 1, 2, 3]: + input_data.append(str(mcp / f"experiments_{i}.expt")) + input_data.append(str(mcp / f"reflections_{i}.refl")) + + params, options, args = parser.parse_args( + args=input_data, show_diff_phil=False, return_unhandled=True + ) + + params.output.json = "dials.correlation_matrix.json" + + reflections, experiments = reflections_and_experiments_from_files( + params.input.reflections, params.input.experiments + ) + + reflections = parse_multiple_datasets(reflections) + assert len(experiments) == len(reflections) + assert len(experiments) > 1 + experiments, reflections = assign_unique_identifiers(experiments, reflections) + matrices = CorrelationMatrix(experiments, reflections, params) + matrices.calculate_matrices() + matrices.output_json() + assert pathlib.Path("dials.correlation_matrix.json").is_file() diff --git a/tests/algorithms/scaling/test_error_model.py b/tests/algorithms/scaling/test_error_model.py index eafc142f39..979f0303ab 100644 --- a/tests/algorithms/scaling/test_error_model.py +++ b/tests/algorithms/scaling/test_error_model.py @@ -183,19 +183,24 @@ def test_error_model_on_simulated_data( ) -def test_errormodel(large_reflection_table, test_sg): +@pytest.mark.parametrize("use_stills_filtering", [True, False]) +def test_errormodel(large_reflection_table, test_sg, use_stills_filtering): """Test the initialisation and methods of the error model.""" Ih_table = IhTable([large_reflection_table], test_sg, nblocks=1) block = Ih_table.blocked_data_list[0] params = generated_param() + params.weighting.error_model.basic.stills.min_multiplicity = 2 + params.weighting.error_model.basic.stills.min_Isigma = 0.0 params.weighting.error_model.basic.n_bins = 2 params.weighting.error_model.basic.min_Ih = 1.0 em = BasicErrorModel em.min_reflections_required = 1 error_model = em(basic_params=params.weighting.error_model.basic) error_model.min_reflections_required = 1 - error_model.configure_for_refinement(block) + error_model.configure_for_refinement( + block, use_stills_filtering=use_stills_filtering + ) assert error_model.binner.summation_matrix[0, 1] == 1 assert error_model.binner.summation_matrix[1, 1] == 1 assert error_model.binner.summation_matrix[2, 0] == 1 diff --git a/tests/algorithms/scaling/test_scale.py b/tests/algorithms/scaling/test_scale.py index d01015360c..1670e47947 100644 --- a/tests/algorithms/scaling/test_scale.py +++ b/tests/algorithms/scaling/test_scale.py @@ -982,7 +982,7 @@ def test_target_scale_handle_bad_dataset(dials_data, tmp_path): command = [ shutil.which("dials.scale"), "reflection_selection.method=intensity_ranges", - "Isigma_range=20.0,0.0", + "Isigma_range=18.0,0.0", "full_matrix=None", os.fspath(location / "integrated.refl"), os.fspath(location / "integrated.expt"), diff --git a/tests/array_family/test_reflection_table.py b/tests/array_family/test_reflection_table.py index c63dc70801..1fbf2c9223 100644 --- a/tests/array_family/test_reflection_table.py +++ b/tests/array_family/test_reflection_table.py @@ -1602,11 +1602,12 @@ def test_concat(): ids2[0] = "c" ids2[1] = "d" - table1 = flex.reflection_table.concat([table1, table2]) + table3 = flex.reflection_table.concat([table1, table2]) + ids3 = dict(table3.experiment_identifiers()) - assert list(table1["id"]) == [0, 0, 1, 1, 2, 2, 3, 3] - assert list(ids1.keys()) == [0, 1, 2, 3] - assert list(ids1.values()) == ["a", "b", "c", "d"] + assert list(table3["id"]) == [0, 0, 1, 1, 2, 2, 3, 3] + assert list(ids3.keys()) == [0, 1, 2, 3] + assert list(ids3.values()) == ["a", "b", "c", "d"] # test empty tables table1 = flex.reflection_table() diff --git a/tests/command_line/test_correlation_matrix.py b/tests/command_line/test_correlation_matrix.py new file mode 100644 index 0000000000..bfcbefecc2 --- /dev/null +++ b/tests/command_line/test_correlation_matrix.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import pathlib + +import dials.command_line.correlation_matrix as dials_corr_mat + + +def test_corr_mat(dials_data, run_in_tmp_path): + mcp = dials_data("vmxi_proteinase_k_sweeps", pathlib=True) + args = [] + for i in [0, 1, 2, 3]: + args.append(str(mcp / f"experiments_{i}.expt")) + args.append(str(mcp / f"reflections_{i}.refl")) + dials_corr_mat.run(args=args) + assert pathlib.Path("dials.correlation_matrix.html").is_file() + assert pathlib.Path("dials.correlation_matrix.log").is_file() diff --git a/tests/command_line/test_cosym.py b/tests/command_line/test_cosym.py index 1981cf47b7..9670ebe0cc 100644 --- a/tests/command_line/test_cosym.py +++ b/tests/command_line/test_cosym.py @@ -17,11 +17,23 @@ @pytest.mark.parametrize( - "space_group,engine", [(None, "scitbx"), ("P 1", "scipy"), ("P 4", "scipy")] + "space_group,engine,weights,cc_weights", + [ + (None, "scitbx", None, None), + ("P 1", "scipy", None, None), + ("P 4", "scipy", "standard_error", "sigma"), + ], ) -def test_cosym(dials_data, run_in_tmp_path, space_group, engine): +def test_cosym(dials_data, run_in_tmp_path, space_group, engine, weights, cc_weights): mcp = dials_data("multi_crystal_proteinase_k", pathlib=True) - args = ["space_group=" + str(space_group), "seed=0", f"engine={engine}"] + args = [ + "space_group=" + str(space_group), + "seed=0", + "nproc=1", + f"engine={engine}", + f"weights={weights}", + f"cc_weights={cc_weights}", + ] for i in [1, 2, 3, 4, 5, 7, 8, 10]: args.append(os.fspath(mcp / f"experiments_{i}.json")) args.append(os.fspath(mcp / f"reflections_{i}.pickle")) diff --git a/tests/command_line/test_export.py b/tests/command_line/test_export.py index d7801897e3..51f0c9a78a 100644 --- a/tests/command_line/test_export.py +++ b/tests/command_line/test_export.py @@ -538,8 +538,8 @@ def test_shelx_ins(dials_data, tmp_path): assert (tmp_path / "dials.ins").is_file() cell_esds = { - "CELL": (5.4815, 8.2158, 12.1457, 90.000, 90.000, 90.000), - "ZERR": (0.0005, 0.0007, 0.0011, 0.003, 0.004, 0.004), + "CELL": (5.48154, 8.21578, 12.14570, 90.0000, 90.0000, 90.0000), + "ZERR": (0.00050, 0.00073, 0.00109, 0.0034, 0.0037, 0.0037), } with (tmp_path / "dials.ins").open() as fh: @@ -548,7 +548,7 @@ def test_shelx_ins(dials_data, tmp_path): instruction = tokens[0] if instruction in cell_esds: result = tuple(map(float, tokens[2:8])) - assert result == pytest.approx(cell_esds[instruction], abs=0.001) + assert result == pytest.approx(cell_esds[instruction], abs=0.0001) def test_shelx_ins_best_unit_cell(dials_data, tmp_path): @@ -584,6 +584,39 @@ def test_shelx_ins_best_unit_cell(dials_data, tmp_path): assert result == pytest.approx(cell_esds[instruction], abs=0.001) +def test_shelx_ins_composition(dials_data, tmp_path): + # Call dials.export + result = subprocess.run( + [ + shutil.which("dials.export"), + "intensity=scale", + "format=shelx", + "composition=C3H7NO2S", + dials_data("l_cysteine_4_sweeps_scaled", pathlib=True) + / "scaled_20_25.expt", + dials_data("l_cysteine_4_sweeps_scaled", pathlib=True) + / "scaled_20_25.refl", + ], + cwd=tmp_path, + capture_output=True, + ) + assert not result.returncode and not result.stderr + assert (tmp_path / "dials.ins").is_file() + + sfac_unit = { + "SFAC": "C H N O S", + "UNIT": "12 28 4 8 4", + } + + with (tmp_path / "dials.ins").open() as fh: + for line in fh: + tokens = line.split() + instruction = tokens[0] + if instruction in sfac_unit: + result = " ".join(tokens[1:6]) + assert result == sfac_unit[instruction] + + def test_export_sum_or_profile_only(dials_data, tmp_path): expt = dials_data("insulin_processed", pathlib=True) / "integrated.expt" refl = dials_data("insulin_processed", pathlib=True) / "integrated.refl" diff --git a/tests/command_line/test_find_rotation_axis.py b/tests/command_line/test_find_rotation_axis.py index ab8812fc52..c2a789990f 100644 --- a/tests/command_line/test_find_rotation_axis.py +++ b/tests/command_line/test_find_rotation_axis.py @@ -1,7 +1,9 @@ from __future__ import annotations +from dxtbx.model.goniometer import GoniometerFactory from dxtbx.serialize import load from scitbx import matrix +from scitbx.array_family import flex import dials.command_line.find_rotation_axis as dials_find_rotation_axis @@ -19,6 +21,35 @@ def test_find_rotation_axis(dials_data, run_in_tmp_path): ) axis = matrix.col(experiments[0].goniometer.get_rotation_axis()) - expected = matrix.col((-0.627963, -0.778243, 0)) + expected = matrix.col((-0.626604, -0.779338, 0)) + + assert axis.angle(expected) < 1e-6 + + +def test_find_rotation_axis_multi_axis_goniometer(dials_data, run_in_tmp_path): + myd88 = dials_data("MyD88_processed", pathlib=True) + experiments = load.experiment_list(str(myd88 / "imported.expt"), check_format=False) + + # Replace single-axis goniometer with multi-axis goniometer + axes = flex.vec3_double([(0, -1, 0), (0, -0.642788, 0.766044), (0, -1, 0)]) + angles = flex.double((0, 0, -62)) + names = flex.std_string(("PHI", "KAPPA=CHI", "OMEGA")) + scan_axis = 2 + experiments[0].goniometer = GoniometerFactory.multi_axis( + axes, angles, names, scan_axis + ) + experiments.as_file("modified.expt") + + # Now run find_rotation_axis, and expect the same result. + dials_find_rotation_axis.run(args=["modified.expt", str(myd88 / "strong.refl")]) + assert run_in_tmp_path.joinpath("optimised.expt").is_file() + assert run_in_tmp_path.joinpath("find_rotation_axis-histogram.png").is_file() + assert run_in_tmp_path.joinpath("find_rotation_axis-projection.png").is_file() + experiments = load.experiment_list( + run_in_tmp_path / "optimised.expt", check_format=False + ) + + axis = matrix.col(experiments[0].goniometer.get_rotation_axis()) + expected = matrix.col((-0.626604, -0.779338, 0)) assert axis.angle(expected) < 1e-6 diff --git a/tests/command_line/test_find_spots_server_client.py b/tests/command_line/test_find_spots_server_client.py deleted file mode 100644 index 65cc13f07b..0000000000 --- a/tests/command_line/test_find_spots_server_client.py +++ /dev/null @@ -1,153 +0,0 @@ -from __future__ import annotations - -import shutil -import socket -import subprocess -import sys -import time -import timeit -import urllib.request -from xml.dom import minidom - -import pytest - - -@pytest.fixture -def server_port(tmp_path) -> int: - """Fixture to load a find_spots_server server""" - if sys.hexversion >= 0x3080000 and sys.platform == "darwin": - pytest.skip("find_spots server known to be broken on MacOS with Python 3.8+") - if sys.platform == "win32": - pytest.skip("find_spots server is not supported on Windows") - - # Find a free port to run the server on - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: - sock.bind(("", 0)) - host, port = sock.getsockname() - # Start the server - server_command = ["dials.find_spots_server", f"port={port}", "nproc=3"] - p = subprocess.Popen(server_command, cwd=tmp_path) - wait_for_server(port) - yield port - p.terminate() - p.wait(timeout=3) - p.kill() - - -def test_server_return_codes(dials_data, server_port): - try: - first_file = sorted( - dials_data("centroid_test_data", pathlib=True).glob("*.cbf") - )[0] - response = urllib.request.urlopen( - f"http://127.0.0.1:{server_port}/{first_file}" - ) - assert response.code == 200 - with pytest.raises(urllib.error.HTTPError): - urllib.request.urlopen(f"http://127.0.0.1:{server_port}/some/junk/filename") - finally: - result = subprocess.run( - [shutil.which("dials.find_spots_client"), f"port={server_port}", "stop"], - capture_output=True, - ) - assert not result.returncode and not result.stderr - - -def test_find_spots_server_client(dials_data, tmp_path, server_port): - filenames = sorted(dials_data("centroid_test_data", pathlib=True).glob("*.cbf")) - try: - exercise_client(server_port=server_port, filenames=filenames) - finally: - result = subprocess.run( - [shutil.which("dials.find_spots_client"), f"port={server_port}", "stop"], - cwd=tmp_path, - capture_output=True, - ) - assert not result.returncode and not result.stderr - - -def wait_for_server(port, max_wait=20): - print(f"Waiting up to {max_wait} seconds for server to start") - server_ok = False - start_time = timeit.default_timer() - max_time = start_time + max_wait - while (timeit.default_timer() < max_time) and not server_ok: - try: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.connect(("127.0.0.1", port)) - server_ok = True - except OSError as e: - if (e.errno != 111) and (e.errno != 61): - raise - # ignore connection failures (111 connection refused on linux; 61 connection refused on mac) - time.sleep(0.1) - if not server_ok: - raise Exception("Server failed to start after %d seconds" % max_wait) - print( - "dials.find_spots_server up after %f seconds" - % (timeit.default_timer() - start_time) - ) - - -def exercise_client(server_port, filenames): - assert filenames - client_command = [ - shutil.which("dials.find_spots_client"), - f"port={server_port}", - "min_spot_size=3", - "algorithm=dispersion", - "nproc=1", - filenames[0], - ] - - index_client_command = client_command + [ - "index=True", - "indexing.method=fft1d", - "max_refine=10", - ] - print(index_client_command) - result = subprocess.run(index_client_command, capture_output=True) - assert not result.returncode and not result.stderr - out = f"{result.stdout}" - - xmldoc = minidom.parseString(out) - assert len(xmldoc.getElementsByTagName("image")) == 1 - assert len(xmldoc.getElementsByTagName("spot_count")) == 1 - assert len(xmldoc.getElementsByTagName("spot_count_no_ice")) == 1 - assert len(xmldoc.getElementsByTagName("d_min")) == 1 - assert len(xmldoc.getElementsByTagName("total_intensity")) == 1 - assert len(xmldoc.getElementsByTagName("unit_cell")) == 1 - assert len(xmldoc.getElementsByTagName("n_indexed")) == 1 - assert len(xmldoc.getElementsByTagName("fraction_indexed")) == 1 - - unit_cell = [ - float(f) - for f in xmldoc.getElementsByTagName("unit_cell")[0].childNodes[0].data.split() - ] - - assert unit_cell == pytest.approx( - [39.90, 42.67, 42.37, 89.89, 90.10, 90.13], abs=1e-1 - ) - - client_command = client_command + filenames[1:] - result = subprocess.run(client_command, capture_output=True) - assert not result.returncode and not result.stderr - out = f"{result.stdout}" - - xmldoc = minidom.parseString(out) - images = xmldoc.getElementsByTagName("image") - assert len(images) == 9 - spot_counts = sorted( - int(node.childNodes[0].data) - for node in xmldoc.getElementsByTagName("spot_count") - ) - assert spot_counts == sorted([203, 196, 205, 209, 195, 205, 203, 207, 189]) - spot_counts_no_ice = sorted( - int(node.childNodes[0].data) - for node in xmldoc.getElementsByTagName("spot_count_no_ice") - ) - assert spot_counts_no_ice == sorted([169, 171, 175, 176, 177, 184, 193, 195, 196]) - d_min = sorted( - float(node.childNodes[0].data) for node in xmldoc.getElementsByTagName("d_min") - ) - assert d_min == sorted([1.45, 1.47, 1.55, 1.55, 1.56, 1.59, 1.61, 1.61, 1.64]) diff --git a/tests/command_line/test_import.py b/tests/command_line/test_import.py index 26a1ca04aa..e1f106d2c4 100644 --- a/tests/command_line/test_import.py +++ b/tests/command_line/test_import.py @@ -377,7 +377,7 @@ def test_import_beam_centre(dials_data, tmp_path): def test_fast_slow_beam_centre(dials_regression: pathlib.Path, tmp_path): - # test slow_fast_beam_centre with a multi-panel CS-PAD image + # test fast_slow_beam_centre with a multi-panel CS-PAD image impath = os.path.join( dials_regression, "image_examples", @@ -432,6 +432,59 @@ def test_fast_slow_beam_centre(dials_regression: pathlib.Path, tmp_path): assert offsets == pytest.approx(ref_offsets) +def test_distance_multi_panel(dials_regression: pathlib.Path, tmp_path): + # test setting the distance with a multi-panel CS-PAD image + impath = os.path.join( + dials_regression, + "image_examples", + "LCLS_cspad_nexus", + "idx-20130301060858401.cbf", + ) + result = subprocess.run( + [ + shutil.which("dials.import"), + "distance=100", + "output.experiments=distance.expt", + impath, + ], + cwd=tmp_path, + capture_output=True, + ) + assert not result.returncode and not result.stderr + assert (tmp_path / "distance.expt").is_file() + + experiments = load.experiment_list(tmp_path / "distance.expt") + detector = experiments[0].detector + # all distances should be 100 + assert all(p.get_distance() == pytest.approx(100) for p in detector) + + # check relative panel positions have not changed + from scitbx import matrix + + o = matrix.col(detector[0].get_origin()) + offsets = [] + for p in detector: + intra_pnl = o - matrix.col(p.get_origin()) + offsets.append(intra_pnl.length()) + + result = subprocess.run( + [shutil.which("dials.import"), "output.experiments=reference.expt", impath], + cwd=tmp_path, + capture_output=True, + ) + assert not result.returncode and not result.stderr + assert (tmp_path / "reference.expt").is_file() + + ref_exp = load.experiment_list(tmp_path / "reference.expt") + ref_detector = ref_exp[0].detector + o = matrix.col(ref_detector[0].get_origin()) + ref_offsets = [] + for p in ref_detector: + intra_pnl = o - matrix.col(p.get_origin()) + ref_offsets.append(intra_pnl.length()) + assert offsets == pytest.approx(ref_offsets) + + def test_from_image_files(dials_data, tmp_path): image_files = sorted( dials_data("centroid_test_data", pathlib=True).glob("centroid*.cbf") diff --git a/tests/command_line/test_integrate.py b/tests/command_line/test_integrate.py index 200f8816bc..36147348c0 100644 --- a/tests/command_line/test_integrate.py +++ b/tests/command_line/test_integrate.py @@ -143,10 +143,7 @@ def test_basic_blocking_options(dials_data, tmp_path, block_size, block_units): assert not result.returncode and not result.stderr -@pytest.mark.skipif( - (datetime.date.today() < datetime.date(2024, 6, 5)), - reason="Temporary skip for test that started to fail on Azure pipelines", -) +@pytest.mark.skip(reason="3d threaded integrator") def test_basic_threaded_integrate(dials_data, tmp_path): """Test the threaded integrator on single imageset data.""" diff --git a/tests/command_line/test_reciprocal_lattice_viewer.py b/tests/command_line/test_reciprocal_lattice_viewer.py index 82ed3f7e34..fae67122df 100644 --- a/tests/command_line/test_reciprocal_lattice_viewer.py +++ b/tests/command_line/test_reciprocal_lattice_viewer.py @@ -8,6 +8,5 @@ def test_gltbx_is_available(): because they were not built earlier. This will reliably cause dials.rlv to fail even thought the build setup was apparently fine. """ - import gltbx.gl - - assert gltbx.gl.ext + import gltbx.fonts # noqa: F401 + import gltbx.gl # noqa: F401 diff --git a/tests/command_line/test_split_still_data.py b/tests/command_line/test_split_still_data.py new file mode 100644 index 0000000000..46ed79b286 --- /dev/null +++ b/tests/command_line/test_split_still_data.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +import os +import pathlib + +import pytest + +from dxtbx.serialize import load + +import dials.command_line.split_still_data as split + + +@pytest.mark.xfail( + os.name == "nt", + reason="Failures due to translated paths; see https://github.com/cctbx/dxtbx/issues/613", +) +@pytest.mark.parametrize("use_yaml", [True, False]) +def test_split_still_data(dials_data, run_in_tmp_path, use_yaml): + data = dials_data("cunir_serial_processed", pathlib=True) + args = [ + os.fspath(data / "integrated.expt"), + os.fspath(data / "integrated.refl"), + "nproc=1", + ] + if use_yaml: + images = os.fspath(dials_data("cunir_serial", pathlib=True)) + yml = f""" +--- +metadata: + timepoint: + {images}/merlin0047_#####.cbf : 'repeat=2' +grouping: + group_by: + values: + - timepoint +""" + test_yaml = run_in_tmp_path / "tmp.yaml" + with open(test_yaml, "w") as f: + f.write(yml) + args.append(f"grouping={test_yaml}") + else: + args.append("series_repeat=2") + split.run(args=args) + assert pathlib.Path(run_in_tmp_path / "group_0_0.expt").is_file() + assert pathlib.Path(run_in_tmp_path / "group_0_0.refl").is_file() + assert pathlib.Path(run_in_tmp_path / "group_1_0.expt").is_file() + assert pathlib.Path(run_in_tmp_path / "group_1_0.refl").is_file() + expts1 = load.experiment_list("group_0_0.expt", check_format=False) + assert len(expts1) == 3 + # not old style elist datastructures (no scan, single imageset) + assert expts1[0].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17000" + assert expts1[1].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17002" + assert expts1[2].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17004" + expts2 = load.experiment_list( + run_in_tmp_path / "group_1_0.expt", check_format=False + ) + assert len(expts2) == 2 + assert expts2[0].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17001" + assert expts2[1].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17003" diff --git a/tests/command_line/test_ssx_reduction.py b/tests/command_line/test_ssx_reduction.py index 49f007307c..09599afdea 100644 --- a/tests/command_line/test_ssx_reduction.py +++ b/tests/command_line/test_ssx_reduction.py @@ -91,3 +91,11 @@ def test_ssx_reduction(dials_data, tmp_path): ) assert not result.returncode and not result.stderr assert (tmp_path / "compute_delta_cchalf.html").is_file() + + # will not be able to refine error model due to lack of data, but should rather exit cleanly. + result = subprocess.run( + [shutil.which("dials.refine_error_model"), scale_expts, scale_refls], + cwd=tmp_path, + capture_output=True, + ) + assert not result.returncode and not result.stderr