From 5dd0b6cbf828ddfdb8a2c5a807d98f69501a8817 Mon Sep 17 00:00:00 2001
From: Graeme Winter <graeme.winter@gmail.com>
Date: Thu, 4 Apr 2024 12:43:23 +0100
Subject: [PATCH 01/32] Unused parameter nthreads (#2638)

* Unused parameter nthreads
---
 newsfragments/2638.bugfix          | 1 +
 src/dials/array_family/flex_ext.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 newsfragments/2638.bugfix

diff --git a/newsfragments/2638.bugfix b/newsfragments/2638.bugfix
new file mode 100644
index 0000000000..8d32ae9784
--- /dev/null
+++ b/newsfragments/2638.bugfix
@@ -0,0 +1 @@
+array_family/flex_ext.py: remove nthreads parameter from extract_shoeboxes: is not implemented and is never used anyway
diff --git a/src/dials/array_family/flex_ext.py b/src/dials/array_family/flex_ext.py
index b827a47f7b..cf40979054 100644
--- a/src/dials/array_family/flex_ext.py
+++ b/src/dials/array_family/flex_ext.py
@@ -956,7 +956,7 @@ def compute_corrections(self, experiments):
             self["qe"] = qe
         return lp
 
-    def extract_shoeboxes(self, imageset, mask=None, nthreads=1):
+    def extract_shoeboxes(self, imageset, mask=None):
         """
         Helper function to read a load of shoebox data.
 

From a37bc0e8195a89f0ece774b2146cec63cfdc4664 Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Fri, 12 Apr 2024 09:46:10 +0100
Subject: [PATCH 02/32] Add mesa-libGL to linux bootstrap installations (#2645)

This is to solve a longstanding issue where bootstrap builds would fail on
linux systems more recent than RHEL7.

Cause:

- The conda-forge sysroot stdlib system uses #include_next in order
  to chain to a wrapped stdlib.h.
- gltbx adds the system /usr/include and /usr/local/include multiple
  times to the include search path.
- When the c-f sysroot stdlib tries to #include_next, instead of the
  REAL sysroot stdlib.h, it picks up the system stdlib.h.
- This worked on RHEL7- the system stdlib was old enough that it
  apparently didn't use this mechanism.
- Since the RHEL8 system stdlib is recent enough to use this mechanism,
  it thinks it is being #included twice, and so hits the usual guards.

This "System include path" was added originally to work around the fact
that OpenGL is a complicated dependency and thus you need to use the
library from the system. This problem can also be solved using the
Conda-forge CDT system dependencies.
---
 .conda-envs/linux.txt     | 1 +
 newsfragments/1465.bugfix | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 newsfragments/1465.bugfix

diff --git a/.conda-envs/linux.txt b/.conda-envs/linux.txt
index 06c1e015aa..78105830b6 100644
--- a/.conda-envs/linux.txt
+++ b/.conda-envs/linux.txt
@@ -19,6 +19,7 @@ conda-forge::jpeg
 conda-forge::libboost-devel
 conda-forge::libboost-python-devel
 conda-forge::matplotlib-base>=3.0.2
+conda-forge::mesa-libgl-devel-cos7-x86_64
 conda-forge::mrcfile
 conda-forge::msgpack-cxx
 conda-forge::msgpack-python
diff --git a/newsfragments/1465.bugfix b/newsfragments/1465.bugfix
new file mode 100644
index 0000000000..b0f5300168
--- /dev/null
+++ b/newsfragments/1465.bugfix
@@ -0,0 +1 @@
+Add mesa-libgl CDT dependency when building cctbx. This should solve build problems on RHEL8 and other distributions more modern than RHEL7.

From 0e657827327ec9e852791a4ad2b0f93985036da5 Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Fri, 12 Apr 2024 11:56:02 +0100
Subject: [PATCH 03/32] MNT: Use 2024 editions of prebuilt cctbx

---
 installer/bootstrap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/bootstrap.py b/installer/bootstrap.py
index 33ed202ccf..2b136f0c6f 100755
--- a/installer/bootstrap.py
+++ b/installer/bootstrap.py
@@ -50,7 +50,7 @@
 allowed_ssh_connections = {}
 concurrent_git_connection_limit = threading.Semaphore(5)
 
-_prebuilt_cctbx_base = "2023"  # Latest Nightly release
+_prebuilt_cctbx_base = "2024"
 
 
 def make_executable(filepath):

From f7803cc8363241c75dc2bd186443493ff951cf1f Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Mon, 15 Apr 2024 20:13:33 +0100
Subject: [PATCH 04/32] Fix error in propagation of partiality variance for
 scaling still shot data (#2642)

see e.g. https://en.wikipedia.org/wiki/Propagation_of_uncertainty
section "Example formulae", f=AB.

Co-authored-by: Ben Williams <benjaminhwilliams@users.noreply.github.com>
---
 newsfragments/2642.bugfix                       | 1 +
 src/dials/algorithms/scaling/scaling_library.py | 8 ++++----
 tests/algorithms/scaling/test_scale.py          | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)
 create mode 100644 newsfragments/2642.bugfix

diff --git a/newsfragments/2642.bugfix b/newsfragments/2642.bugfix
new file mode 100644
index 0000000000..97f160137a
--- /dev/null
+++ b/newsfragments/2642.bugfix
@@ -0,0 +1 @@
+Fix error in propagation of partiality variance when scaling still data
diff --git a/src/dials/algorithms/scaling/scaling_library.py b/src/dials/algorithms/scaling/scaling_library.py
index 6076be54cd..fcfc6ef2ec 100644
--- a/src/dials/algorithms/scaling/scaling_library.py
+++ b/src/dials/algorithms/scaling/scaling_library.py
@@ -104,11 +104,11 @@ def choose_initial_scaling_intensities(reflection_table, intensity_choice="profi
                 "intensity.sum.variance"
             ] * flex.pow2(conv * inverse_partiality)
             if "partiality.inv.variance" in reflection_table:
+                # see e.g. https://en.wikipedia.org/wiki/Propagation_of_uncertainty
+                # section "Example formulae", f=AB.
                 reflection_table["variance"] += (
-                    reflection_table["intensity.sum.value"]
-                    * conv
-                    * reflection_table["partiality.inv.variance"]
-                )
+                    flex.pow2(reflection_table["intensity.sum.value"] * conv)
+                ) * reflection_table["partiality.inv.variance"]
         else:
             reflection_table["intensity"] = (
                 reflection_table["intensity.sum.value"] * conv
diff --git a/tests/algorithms/scaling/test_scale.py b/tests/algorithms/scaling/test_scale.py
index d01015360c..1670e47947 100644
--- a/tests/algorithms/scaling/test_scale.py
+++ b/tests/algorithms/scaling/test_scale.py
@@ -982,7 +982,7 @@ def test_target_scale_handle_bad_dataset(dials_data, tmp_path):
     command = [
         shutil.which("dials.scale"),
         "reflection_selection.method=intensity_ranges",
-        "Isigma_range=20.0,0.0",
+        "Isigma_range=18.0,0.0",
         "full_matrix=None",
         os.fspath(location / "integrated.refl"),
         os.fspath(location / "integrated.expt"),

From 2d71415cf82273259a7fa66dfdf61d5fdd3b5eca Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Tue, 16 Apr 2024 08:40:58 +0100
Subject: [PATCH 05/32] Fix crash in exporting scaled still-shot data with
 dials.export (#2646)

Arises due to changes to using imagesequences introduced in #2556
---
 newsfragments/2646.bugfix    | 1 +
 src/dials/util/export_mtz.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 newsfragments/2646.bugfix

diff --git a/newsfragments/2646.bugfix b/newsfragments/2646.bugfix
new file mode 100644
index 0000000000..a0a0c8283f
--- /dev/null
+++ b/newsfragments/2646.bugfix
@@ -0,0 +1 @@
+``dials.export``: Fix crash when exporting scaled still-shot data
diff --git a/src/dials/util/export_mtz.py b/src/dials/util/export_mtz.py
index 68bd71eae3..28514ed4bd 100644
--- a/src/dials/util/export_mtz.py
+++ b/src/dials/util/export_mtz.py
@@ -221,7 +221,7 @@ def add_batch_list(
 
     i0 = image_range[0]
     for i in range(n_batches):
-        if experiment.scan:
+        if experiment.scan and experiment.scan.get_oscillation()[1] != 0.0:
             phi_start[i], phi_range[i] = experiment.scan.get_image_oscillation(i + i0)
 
         # Unit cell and UB matrix for the centre of the image for scan-varying model

From 20c0e1127805db91656afd653d1fc0ffbd1c1ba8 Mon Sep 17 00:00:00 2001
From: Huw Jenkins <huwjenkins@users.noreply.github.com>
Date: Tue, 16 Apr 2024 13:21:55 +0100
Subject: [PATCH 06/32] dials.export: Add option to specify composition of
 asymmetric unit so a complete SHELX instruction file can be written. (#2623)

---------

Co-authored-by: Ben Williams <benjaminhwilliams@users.noreply.github.com>
---
 newsfragments/2623.feature        |  1 +
 src/dials/command_line/export.py  | 13 +++++++++++
 src/dials/util/export_shelx.py    | 38 ++++++++++++++++++++++++++++++-
 tests/command_line/test_export.py | 33 +++++++++++++++++++++++++++
 4 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 newsfragments/2623.feature

diff --git a/newsfragments/2623.feature b/newsfragments/2623.feature
new file mode 100644
index 0000000000..399bf8d668
--- /dev/null
+++ b/newsfragments/2623.feature
@@ -0,0 +1 @@
+``dials.export``: Add option to specify composition of asymmetric unit for SHELX ``.ins`` file output.
diff --git a/src/dials/command_line/export.py b/src/dials/command_line/export.py
index 5b6c55d649..3eecb8ca2e 100644
--- a/src/dials/command_line/export.py
+++ b/src/dials/command_line/export.py
@@ -45,6 +45,11 @@
 XDS format exports a models.expt file as XDS.INP and XPARM.XDS files. If a
 reflection file is given it will be exported as a SPOT.XDS file.
 
+SHELX format exports intensity data in HKLF 4 format for use in the SHELX suite
+of programs. As this file format does not contain unit cell parameters or
+symmetry information a minimal instruction file is also written. Optionally the
+expected contents of the asymmetric unit can be added to this instruciton file.
+
 PETS format exports intensity data and diffraction data in the CIF format
 used by PETS. This is primarily intended to produce files suitable for
 dynamic diffraction refinement using Jana2020, which requires this format.
@@ -72,6 +77,11 @@
   dials.export indexed.refl format=xds
   dials.export models.expt format=xds
   dials.export models.expt indexed.refl format=xds
+
+  # Export to shelx
+  dials.export scaled.expt scaled.refl format=shelx
+  dials.export scaled.expt scaled.refl format=shelx shelx.hklout=dials.hkl
+  dials.export scaled.expt scaled.refl format=shelx composition=C3H7NO2S
 """
 
 phil_scope = parse(
@@ -249,6 +259,9 @@
     ins = dials.ins
       .type = path
       .help = "The output ins file"
+    composition = CH
+      .type = str
+      .help = "The chemical composition of the asymmetric unit"
     scale = True
       .type = bool
       .help = "Scale reflections to maximise output precision in SHELX 8.2f format"
diff --git a/src/dials/util/export_shelx.py b/src/dials/util/export_shelx.py
index 17cf2c4de3..c86db0228f 100644
--- a/src/dials/util/export_shelx.py
+++ b/src/dials/util/export_shelx.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
 import logging
+import re
 from math import isclose
 
 from cctbx import crystal, miller
+from cctbx.eltbx import chemical_elements
 from iotbx.shelx.write_ins import LATT_SYMM
 
 from dials.algorithms.scaling.scaling_library import determine_best_unit_cell
@@ -96,12 +98,13 @@ def export_shelx(scaled_data, experiment_list, params):
     _write_ins(
         experiment_list,
         best_unit_cell=params.mtz.best_unit_cell,
+        composition=params.shelx.composition,
         ins_file=params.shelx.ins,
     )
     logger.info(f"Written {params.shelx.ins}")
 
 
-def _write_ins(experiment_list, best_unit_cell, ins_file):
+def _write_ins(experiment_list, best_unit_cell, composition, ins_file):
     sg = experiment_list[0].crystal.get_space_group()
     unit_cells = []
     wavelengths = []
@@ -167,3 +170,36 @@ def _write_ins(experiment_list, best_unit_cell, ins_file):
                 )
             )
         LATT_SYMM(f, sg)
+        logger.info(
+            f"Using composition {composition} to write SFAC & UNIT instructions"
+        )
+        sorted_composition = _parse_compound(composition)
+        f.write("SFAC %s\n" % " ".join(sorted_composition))
+        f.write(
+            "UNIT %s\n"
+            % " ".join(
+                [str(sorted_composition[e] * sg.order_z()) for e in sorted_composition]
+            )
+        )
+        f.write("HKLF 4\n")  # Intensities
+        f.write("END\n")
+
+
+def _parse_compound(composition):
+    elements = chemical_elements.proper_caps_list()[:94]
+    m = re.findall(r"([A-Z][a-z]?)(\d*)", composition)
+    if all(e[1] == "" for e in m):
+        # Assume user has just supplied list of elements so UNIT instruction cannot be calculated
+        result = {element: 0 for element, count in m}
+    else:
+        result = {element: int(count or 1) for element, count in m}
+    # Check for elements without scattering factors in SHELXL
+    unmatched = list(set(result).difference(elements))
+    if unmatched:
+        raise Sorry(f"Element(s) {' '.join(unmatched)} in {composition} not recognised")
+
+    if "C" in result:
+        # move C to start of list
+        elements.insert(0, elements.pop(5))
+    sorted_result = {e: result[e] for e in elements if e in result}
+    return sorted_result
diff --git a/tests/command_line/test_export.py b/tests/command_line/test_export.py
index d7801897e3..35e71c4981 100644
--- a/tests/command_line/test_export.py
+++ b/tests/command_line/test_export.py
@@ -584,6 +584,39 @@ def test_shelx_ins_best_unit_cell(dials_data, tmp_path):
                 assert result == pytest.approx(cell_esds[instruction], abs=0.001)
 
 
+def test_shelx_ins_composition(dials_data, tmp_path):
+    # Call dials.export
+    result = subprocess.run(
+        [
+            shutil.which("dials.export"),
+            "intensity=scale",
+            "format=shelx",
+            "composition=C3H7NO2S",
+            dials_data("l_cysteine_4_sweeps_scaled", pathlib=True)
+            / "scaled_20_25.expt",
+            dials_data("l_cysteine_4_sweeps_scaled", pathlib=True)
+            / "scaled_20_25.refl",
+        ],
+        cwd=tmp_path,
+        capture_output=True,
+    )
+    assert not result.returncode and not result.stderr
+    assert (tmp_path / "dials.ins").is_file()
+
+    sfac_unit = {
+        "SFAC": "C H N O S",
+        "UNIT": "12 28 4 8 4",
+    }
+
+    with (tmp_path / "dials.ins").open() as fh:
+        for line in fh:
+            tokens = line.split()
+            instruction = tokens[0]
+            if instruction in sfac_unit:
+                result = " ".join(tokens[1:6])
+                assert result == sfac_unit[instruction]
+
+
 def test_export_sum_or_profile_only(dials_data, tmp_path):
     expt = dials_data("insulin_processed", pathlib=True) / "integrated.expt"
     refl = dials_data("insulin_processed", pathlib=True) / "integrated.refl"

From 77cda91c8cac4682d511b0f24e8b49a3637231ff Mon Sep 17 00:00:00 2001
From: Huw Jenkins <huwjenkins@users.noreply.github.com>
Date: Tue, 16 Apr 2024 13:24:50 +0100
Subject: [PATCH 07/32] dials.export format=shelx: Increase number of decimal
 places in .ins file (#2624)

For `dials.export format=shelx` include one more decimal place in CELL and ZERR instructions, to match XPREP output
---
 newsfragments/2624.bugfix         | 1 +
 src/dials/util/export_shelx.py    | 4 ++--
 tests/command_line/test_export.py | 6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)
 create mode 100644 newsfragments/2624.bugfix

diff --git a/newsfragments/2624.bugfix b/newsfragments/2624.bugfix
new file mode 100644
index 0000000000..c4f41d7bf4
--- /dev/null
+++ b/newsfragments/2624.bugfix
@@ -0,0 +1 @@
+``dials.export`` ``format=shelx``: Increased precision of unit cell parameters and their estimated standard deviations written to ``.ins`` file 
diff --git a/src/dials/util/export_shelx.py b/src/dials/util/export_shelx.py
index c86db0228f..5748c606cf 100644
--- a/src/dials/util/export_shelx.py
+++ b/src/dials/util/export_shelx.py
@@ -159,13 +159,13 @@ def _write_ins(experiment_list, best_unit_cell, composition, ins_file):
             f"TITL {sg.type().number()} in {sg.type().lookup_symbol().replace(' ','')}\n"
         )
         f.write(
-            "CELL {:8.5f} {:8.4f} {:8.4f} {:8.4f} {:8.3f} {:8.3f} {:8.3f}\n".format(
+            "CELL {:7.5f} {:9.5f} {:9.5f} {:9.5f} {:8.4f} {:8.4f} {:8.4f}\n".format(
                 wl, *uc.parameters()
             )
         )
         if uc_sd:
             f.write(
-                "ZERR {:8.3f} {:8.4f} {:8.4f} {:8.4f} {:8.3f} {:8.3f} {:8.3f}\n".format(
+                "ZERR {:7.2f} {:9.5f} {:9.5f} {:9.5f} {:8.4f} {:8.4f} {:8.4f}\n".format(
                     sg.order_z(), *uc_sd
                 )
             )
diff --git a/tests/command_line/test_export.py b/tests/command_line/test_export.py
index 35e71c4981..51f0c9a78a 100644
--- a/tests/command_line/test_export.py
+++ b/tests/command_line/test_export.py
@@ -538,8 +538,8 @@ def test_shelx_ins(dials_data, tmp_path):
     assert (tmp_path / "dials.ins").is_file()
 
     cell_esds = {
-        "CELL": (5.4815, 8.2158, 12.1457, 90.000, 90.000, 90.000),
-        "ZERR": (0.0005, 0.0007, 0.0011, 0.003, 0.004, 0.004),
+        "CELL": (5.48154, 8.21578, 12.14570, 90.0000, 90.0000, 90.0000),
+        "ZERR": (0.00050, 0.00073, 0.00109, 0.0034, 0.0037, 0.0037),
     }
 
     with (tmp_path / "dials.ins").open() as fh:
@@ -548,7 +548,7 @@ def test_shelx_ins(dials_data, tmp_path):
             instruction = tokens[0]
             if instruction in cell_esds:
                 result = tuple(map(float, tokens[2:8]))
-                assert result == pytest.approx(cell_esds[instruction], abs=0.001)
+                assert result == pytest.approx(cell_esds[instruction], abs=0.0001)
 
 
 def test_shelx_ins_best_unit_cell(dials_data, tmp_path):

From cac9fd7110994d9c478d88e32751c80fc8521afb Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Wed, 17 Apr 2024 13:56:12 +0100
Subject: [PATCH 08/32] Update gltbx presence test for RLV (#2648)

gltbx now no longer always has the .ext property, as it might
be using pyopengl.

gltbx.fonts is added as an extra test because this also
causes failures of RLV in certain situations.
---
 newsfragments/2648.misc                              | 1 +
 tests/command_line/test_reciprocal_lattice_viewer.py | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)
 create mode 100644 newsfragments/2648.misc

diff --git a/newsfragments/2648.misc b/newsfragments/2648.misc
new file mode 100644
index 0000000000..d5dc857b20
--- /dev/null
+++ b/newsfragments/2648.misc
@@ -0,0 +1 @@
+Update gltbx presence test.
diff --git a/tests/command_line/test_reciprocal_lattice_viewer.py b/tests/command_line/test_reciprocal_lattice_viewer.py
index 82ed3f7e34..fae67122df 100644
--- a/tests/command_line/test_reciprocal_lattice_viewer.py
+++ b/tests/command_line/test_reciprocal_lattice_viewer.py
@@ -8,6 +8,5 @@ def test_gltbx_is_available():
     because they were not built earlier. This will reliably cause dials.rlv to
     fail even thought the build setup was apparently fine.
     """
-    import gltbx.gl
-
-    assert gltbx.gl.ext
+    import gltbx.fonts  # noqa: F401
+    import gltbx.gl  # noqa: F401

From 474cfbc86835c5a0fa466f84d61d56a48e5c1865 Mon Sep 17 00:00:00 2001
From: DiamondLightSource-build-server
 <DiamondLightSource-build-server@users.noreply.github.com>
Date: Wed, 17 Apr 2024 17:16:22 +0100
Subject: [PATCH 09/32] DIALS 3.19.0 Changelog

    towncrier --name=DIALS --version='3.19.0'
---
 CHANGELOG.rst              | 35 +++++++++++++++++++++++++++++++++++
 newsfragments/1465.bugfix  |  1 -
 newsfragments/2553.feature |  1 -
 newsfragments/2602.feature |  1 -
 newsfragments/2605.bugfix  |  3 ---
 newsfragments/2614.feature |  1 -
 newsfragments/2617.misc    |  1 -
 newsfragments/2618.misc    |  1 -
 newsfragments/2619.misc    |  1 -
 newsfragments/2623.feature |  1 -
 newsfragments/2624.bugfix  |  1 -
 newsfragments/2626.misc    |  1 -
 newsfragments/2633.misc    |  1 -
 newsfragments/2634.bugfix  |  1 -
 newsfragments/2638.bugfix  |  1 -
 newsfragments/2642.bugfix  |  1 -
 newsfragments/2646.bugfix  |  1 -
 newsfragments/2648.misc    |  1 -
 18 files changed, 35 insertions(+), 19 deletions(-)
 delete mode 100644 newsfragments/1465.bugfix
 delete mode 100644 newsfragments/2553.feature
 delete mode 100644 newsfragments/2602.feature
 delete mode 100644 newsfragments/2605.bugfix
 delete mode 100644 newsfragments/2614.feature
 delete mode 100644 newsfragments/2617.misc
 delete mode 100644 newsfragments/2618.misc
 delete mode 100644 newsfragments/2619.misc
 delete mode 100644 newsfragments/2623.feature
 delete mode 100644 newsfragments/2624.bugfix
 delete mode 100644 newsfragments/2626.misc
 delete mode 100644 newsfragments/2633.misc
 delete mode 100644 newsfragments/2634.bugfix
 delete mode 100644 newsfragments/2638.bugfix
 delete mode 100644 newsfragments/2642.bugfix
 delete mode 100644 newsfragments/2646.bugfix
 delete mode 100644 newsfragments/2648.misc

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 91b49f400d..fc369daa4a 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,3 +1,38 @@
+DIALS 3.19.0 (2024-04-17)
+=========================
+
+Features
+--------
+
+- ``dials.predict``: Allow usage when image data are not available. (`#2553 <https://github.com/dials/dials/issues/2553>`_)
+- Add ``TOFSpotFinder`` to tailor default params to time of flight experiments and add additional reflection table data. (`#2602 <https://github.com/dials/dials/issues/2602>`_)
+- ``dials.ssx_index``: Allow use of sequences indexer, pink_indexer and low_res_spot_match indexing algorithms. (`#2614 <https://github.com/dials/dials/issues/2614>`_)
+- ``dials.export``: Add option ``composition=`` to specify of asymmetric unit composition for SHELX ``.ins`` file output. (`#2623 <https://github.com/dials/dials/issues/2623>`_)
+
+
+Bugfixes
+--------
+
+- Fix building on RHEL8 and other more recent distributions. (`#1465 <https://github.com/dials/dials/issues/1465>`_)
+- ``dials.index``: Joint indexing is automatically set on for rotation data, off for still data. This can be overridden by explicit use of ``joint_indexing=``. (`#2605 <https://github.com/dials/dials/issues/2605>`_)
+- ``dials.export`` ``format=shelx``: Increased precision of unit cell parameters and their estimated standard deviations written to ``.ins`` file. (`#2624 <https://github.com/dials/dials/issues/2624>`_)
+- ``dials.ssx_index``: Don't combine detector models if individually refined. (`#2634 <https://github.com/dials/dials/issues/2634>`_)
+- ``dials.scale``: Fix error in propagation of partiality variance, when scaling still data. (`#2642 <https://github.com/dials/dials/issues/2642>`_)
+- ``dials.export``: Fix crash when exporting scaled still-shot data. (`#2646 <https://github.com/dials/dials/issues/2646>`_)
+
+
+Deprecations and Removals
+-------------------------
+
+- API: ``array_family/flex_ext.py``: remove ``nthread``s parameter from ``extract_shoeboxes``, as it was never implemented. (`#2638 <https://github.com/dials/dials/issues/2638>`_)
+
+
+Misc
+----
+
+- `#2617 <https://github.com/dials/dials/issues/2617>`_, `#2618 <https://github.com/dials/dials/issues/2618>`_, `#2619 <https://github.com/dials/dials/issues/2619>`_, `#2626 <https://github.com/dials/dials/issues/2626>`_, `#2633 <https://github.com/dials/dials/issues/2633>`_, `#2648 <https://github.com/dials/dials/issues/2648>`_
+
+
 DIALS 3.18.1 (2024-03-26)
 =========================
 
diff --git a/newsfragments/1465.bugfix b/newsfragments/1465.bugfix
deleted file mode 100644
index b0f5300168..0000000000
--- a/newsfragments/1465.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Add mesa-libgl CDT dependency when building cctbx. This should solve build problems on RHEL8 and other distributions more modern than RHEL7.
diff --git a/newsfragments/2553.feature b/newsfragments/2553.feature
deleted file mode 100644
index 2505e12c61..0000000000
--- a/newsfragments/2553.feature
+++ /dev/null
@@ -1 +0,0 @@
-``dials.predict``: allow this command to work in the case that image data are not available.
diff --git a/newsfragments/2602.feature b/newsfragments/2602.feature
deleted file mode 100644
index d9d07deaa2..0000000000
--- a/newsfragments/2602.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add `TOFSpotFinder` to tailor default params to time of flight experiments and add additional reflection table data.
diff --git a/newsfragments/2605.bugfix b/newsfragments/2605.bugfix
deleted file mode 100644
index 9df920cbec..0000000000
--- a/newsfragments/2605.bugfix
+++ /dev/null
@@ -1,3 +0,0 @@
-``dials.index``: Set joint indexing to be automatically defined by default:
-``True`` for rotation data, ``False`` for still shots. This default can be
-overridden by the user by explicit use of ``joint_indexing=``.
diff --git a/newsfragments/2614.feature b/newsfragments/2614.feature
deleted file mode 100644
index 2cc2db1586..0000000000
--- a/newsfragments/2614.feature
+++ /dev/null
@@ -1 +0,0 @@
-``dials.ssx_index``: Allow use of sequences indexer, and pink_indexer and low_res_spot_match indexing algorithms.
diff --git a/newsfragments/2617.misc b/newsfragments/2617.misc
deleted file mode 100644
index b7adf88987..0000000000
--- a/newsfragments/2617.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix Deprecation warning and doc build failure for pinkindexer file
diff --git a/newsfragments/2618.misc b/newsfragments/2618.misc
deleted file mode 100644
index 622dc0c48b..0000000000
--- a/newsfragments/2618.misc
+++ /dev/null
@@ -1 +0,0 @@
-Unpin gemmi version
diff --git a/newsfragments/2619.misc b/newsfragments/2619.misc
deleted file mode 100644
index 4d252b79e1..0000000000
--- a/newsfragments/2619.misc
+++ /dev/null
@@ -1 +0,0 @@
-Do more accurate and reliable discovery of available CPUs and virtual memory across a range of different systems.  This should make various DIALS tools, in particular ``dials.integrate``, function better in situations where system resources are fairly limited or DIALS requires a lot of memory.
\ No newline at end of file
diff --git a/newsfragments/2623.feature b/newsfragments/2623.feature
deleted file mode 100644
index 399bf8d668..0000000000
--- a/newsfragments/2623.feature
+++ /dev/null
@@ -1 +0,0 @@
-``dials.export``: Add option to specify composition of asymmetric unit for SHELX ``.ins`` file output.
diff --git a/newsfragments/2624.bugfix b/newsfragments/2624.bugfix
deleted file mode 100644
index c4f41d7bf4..0000000000
--- a/newsfragments/2624.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-``dials.export`` ``format=shelx``: Increased precision of unit cell parameters and their estimated standard deviations written to ``.ins`` file 
diff --git a/newsfragments/2626.misc b/newsfragments/2626.misc
deleted file mode 100644
index e2a302055d..0000000000
--- a/newsfragments/2626.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add filter in `TOFSpotFinder._post_process` to remove any reflections outside of the time of flight range
diff --git a/newsfragments/2633.misc b/newsfragments/2633.misc
deleted file mode 100644
index 91386d5ab1..0000000000
--- a/newsfragments/2633.misc
+++ /dev/null
@@ -1 +0,0 @@
-Constrain wxPython version to minimum 4.2.0. This ensures releases are not downgraded.
diff --git a/newsfragments/2634.bugfix b/newsfragments/2634.bugfix
deleted file mode 100644
index 161632c309..0000000000
--- a/newsfragments/2634.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-``dials.ssx_index``: Don't combine detector models if individually refined
diff --git a/newsfragments/2638.bugfix b/newsfragments/2638.bugfix
deleted file mode 100644
index 8d32ae9784..0000000000
--- a/newsfragments/2638.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-array_family/flex_ext.py: remove nthreads parameter from extract_shoeboxes: is not implemented and is never used anyway
diff --git a/newsfragments/2642.bugfix b/newsfragments/2642.bugfix
deleted file mode 100644
index 97f160137a..0000000000
--- a/newsfragments/2642.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix error in propagation of partiality variance when scaling still data
diff --git a/newsfragments/2646.bugfix b/newsfragments/2646.bugfix
deleted file mode 100644
index a0a0c8283f..0000000000
--- a/newsfragments/2646.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-``dials.export``: Fix crash when exporting scaled still-shot data
diff --git a/newsfragments/2648.misc b/newsfragments/2648.misc
deleted file mode 100644
index d5dc857b20..0000000000
--- a/newsfragments/2648.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update gltbx presence test.

From 526423d5e03e2f91db6f6a9abe8c4caf6128ca94 Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Thu, 18 Apr 2024 09:09:50 +0100
Subject: [PATCH 10/32] Bump version to 3.19.dev

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index cb759536f0..9b7f43e3ed 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 from build import build
 
-__version_tag__ = "3.18.dev"
+__version_tag__ = "3.19.dev"
 
 setup_kwargs = {
     "name": "dials",

From ec9ae4af31c5e222052a6b7f6f9a3e842193c704 Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Thu, 18 Apr 2024 09:15:15 +0100
Subject: [PATCH 11/32] Remove all find_spots_server tests (#2641)

We've struggled with these for a long time. They should not
be used. We know they are used externally. So, keep the code
and record if the tests fail but don't let them fail whole
builds.
---
 newsfragments/2641.misc                       |   1 +
 .../test_find_spots_server_client.py          | 153 ------------------
 2 files changed, 1 insertion(+), 153 deletions(-)
 create mode 100644 newsfragments/2641.misc
 delete mode 100644 tests/command_line/test_find_spots_server_client.py

diff --git a/newsfragments/2641.misc b/newsfragments/2641.misc
new file mode 100644
index 0000000000..e7fce4ab53
--- /dev/null
+++ b/newsfragments/2641.misc
@@ -0,0 +1 @@
+Remove find_spots_server_client tests.
diff --git a/tests/command_line/test_find_spots_server_client.py b/tests/command_line/test_find_spots_server_client.py
deleted file mode 100644
index 65cc13f07b..0000000000
--- a/tests/command_line/test_find_spots_server_client.py
+++ /dev/null
@@ -1,153 +0,0 @@
-from __future__ import annotations
-
-import shutil
-import socket
-import subprocess
-import sys
-import time
-import timeit
-import urllib.request
-from xml.dom import minidom
-
-import pytest
-
-
-@pytest.fixture
-def server_port(tmp_path) -> int:
-    """Fixture to load a find_spots_server server"""
-    if sys.hexversion >= 0x3080000 and sys.platform == "darwin":
-        pytest.skip("find_spots server known to be broken on MacOS with Python 3.8+")
-    if sys.platform == "win32":
-        pytest.skip("find_spots server is not supported on Windows")
-
-    # Find a free port to run the server on
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
-        sock.bind(("", 0))
-        host, port = sock.getsockname()
-    # Start the server
-    server_command = ["dials.find_spots_server", f"port={port}", "nproc=3"]
-    p = subprocess.Popen(server_command, cwd=tmp_path)
-    wait_for_server(port)
-    yield port
-    p.terminate()
-    p.wait(timeout=3)
-    p.kill()
-
-
-def test_server_return_codes(dials_data, server_port):
-    try:
-        first_file = sorted(
-            dials_data("centroid_test_data", pathlib=True).glob("*.cbf")
-        )[0]
-        response = urllib.request.urlopen(
-            f"http://127.0.0.1:{server_port}/{first_file}"
-        )
-        assert response.code == 200
-        with pytest.raises(urllib.error.HTTPError):
-            urllib.request.urlopen(f"http://127.0.0.1:{server_port}/some/junk/filename")
-    finally:
-        result = subprocess.run(
-            [shutil.which("dials.find_spots_client"), f"port={server_port}", "stop"],
-            capture_output=True,
-        )
-        assert not result.returncode and not result.stderr
-
-
-def test_find_spots_server_client(dials_data, tmp_path, server_port):
-    filenames = sorted(dials_data("centroid_test_data", pathlib=True).glob("*.cbf"))
-    try:
-        exercise_client(server_port=server_port, filenames=filenames)
-    finally:
-        result = subprocess.run(
-            [shutil.which("dials.find_spots_client"), f"port={server_port}", "stop"],
-            cwd=tmp_path,
-            capture_output=True,
-        )
-        assert not result.returncode and not result.stderr
-
-
-def wait_for_server(port, max_wait=20):
-    print(f"Waiting up to {max_wait} seconds for server to start")
-    server_ok = False
-    start_time = timeit.default_timer()
-    max_time = start_time + max_wait
-    while (timeit.default_timer() < max_time) and not server_ok:
-        try:
-            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-                s.connect(("127.0.0.1", port))
-            server_ok = True
-        except OSError as e:
-            if (e.errno != 111) and (e.errno != 61):
-                raise
-            # ignore connection failures (111 connection refused on linux; 61 connection refused on mac)
-            time.sleep(0.1)
-    if not server_ok:
-        raise Exception("Server failed to start after %d seconds" % max_wait)
-    print(
-        "dials.find_spots_server up after %f seconds"
-        % (timeit.default_timer() - start_time)
-    )
-
-
-def exercise_client(server_port, filenames):
-    assert filenames
-    client_command = [
-        shutil.which("dials.find_spots_client"),
-        f"port={server_port}",
-        "min_spot_size=3",
-        "algorithm=dispersion",
-        "nproc=1",
-        filenames[0],
-    ]
-
-    index_client_command = client_command + [
-        "index=True",
-        "indexing.method=fft1d",
-        "max_refine=10",
-    ]
-    print(index_client_command)
-    result = subprocess.run(index_client_command, capture_output=True)
-    assert not result.returncode and not result.stderr
-    out = f"<document>{result.stdout}</document>"
-
-    xmldoc = minidom.parseString(out)
-    assert len(xmldoc.getElementsByTagName("image")) == 1
-    assert len(xmldoc.getElementsByTagName("spot_count")) == 1
-    assert len(xmldoc.getElementsByTagName("spot_count_no_ice")) == 1
-    assert len(xmldoc.getElementsByTagName("d_min")) == 1
-    assert len(xmldoc.getElementsByTagName("total_intensity")) == 1
-    assert len(xmldoc.getElementsByTagName("unit_cell")) == 1
-    assert len(xmldoc.getElementsByTagName("n_indexed")) == 1
-    assert len(xmldoc.getElementsByTagName("fraction_indexed")) == 1
-
-    unit_cell = [
-        float(f)
-        for f in xmldoc.getElementsByTagName("unit_cell")[0].childNodes[0].data.split()
-    ]
-
-    assert unit_cell == pytest.approx(
-        [39.90, 42.67, 42.37, 89.89, 90.10, 90.13], abs=1e-1
-    )
-
-    client_command = client_command + filenames[1:]
-    result = subprocess.run(client_command, capture_output=True)
-    assert not result.returncode and not result.stderr
-    out = f"<document>{result.stdout}</document>"
-
-    xmldoc = minidom.parseString(out)
-    images = xmldoc.getElementsByTagName("image")
-    assert len(images) == 9
-    spot_counts = sorted(
-        int(node.childNodes[0].data)
-        for node in xmldoc.getElementsByTagName("spot_count")
-    )
-    assert spot_counts == sorted([203, 196, 205, 209, 195, 205, 203, 207, 189])
-    spot_counts_no_ice = sorted(
-        int(node.childNodes[0].data)
-        for node in xmldoc.getElementsByTagName("spot_count_no_ice")
-    )
-    assert spot_counts_no_ice == sorted([169, 171, 175, 176, 177, 184, 193, 195, 196])
-    d_min = sorted(
-        float(node.childNodes[0].data) for node in xmldoc.getElementsByTagName("d_min")
-    )
-    assert d_min == sorted([1.45, 1.47, 1.55, 1.55, 1.56, 1.59, 1.61, 1.61, 1.64])

From 9af1ec1261392ac36abe27ad4a11985e055352cd Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Tue, 23 Apr 2024 14:09:07 +0100
Subject: [PATCH 12/32] Enable Python 3.12 (#2651)

- Allow usage of python 3.12 in bootstrap.
- Update default bootstrap python to 3.11.
- Update missed version tag change from release.
---
 .azure-pipelines/azure-pipelines.yml | 2 +-
 .conda-envs/linux.txt                | 3 +--
 .conda-envs/macos.txt                | 3 +--
 .conda-envs/windows.txt              | 3 +--
 installer/bootstrap.py               | 4 ++--
 newsfragments/2651.feature           | 1 +
 setup.py                             | 2 +-
 7 files changed, 8 insertions(+), 10 deletions(-)
 create mode 100644 newsfragments/2651.feature

diff --git a/.azure-pipelines/azure-pipelines.yml b/.azure-pipelines/azure-pipelines.yml
index 7d1cd42a0e..c205a96375 100644
--- a/.azure-pipelines/azure-pipelines.yml
+++ b/.azure-pipelines/azure-pipelines.yml
@@ -2,7 +2,7 @@ variables:
   CACHE_VERSION: 20201102
   isPullRequest: $[startsWith(variables['Build.SourceBranch'], 'refs/pull/')]
   PYTHON_MINIMUM_VERSION: "3.9"
-  PYTHON_TESTING_VERSION: "3.11"
+  PYTHON_TESTING_VERSION: "3.12"
 
 stages:
 - stage: prepare
diff --git a/.conda-envs/linux.txt b/.conda-envs/linux.txt
index 78105830b6..aa9f3300a8 100644
--- a/.conda-envs/linux.txt
+++ b/.conda-envs/linux.txt
@@ -11,11 +11,10 @@ conda-forge::eigen
 conda-forge::future
 conda-forge::gemmi>=0.6.5
 conda-forge::h5py>=3.1.0
-conda-forge::hdf5<1.13
+conda-forge::hdf5
 conda-forge::hdf5plugin
 conda-forge::iota
 conda-forge::jinja2
-conda-forge::jpeg
 conda-forge::libboost-devel
 conda-forge::libboost-python-devel
 conda-forge::matplotlib-base>=3.0.2
diff --git a/.conda-envs/macos.txt b/.conda-envs/macos.txt
index f095f192f7..3977f7ad20 100644
--- a/.conda-envs/macos.txt
+++ b/.conda-envs/macos.txt
@@ -11,11 +11,10 @@ conda-forge::eigen
 conda-forge::future
 conda-forge::gemmi>=0.6.5
 conda-forge::h5py>=3.1.0
-conda-forge::hdf5<1.13
+conda-forge::hdf5
 conda-forge::hdf5plugin
 conda-forge::iota
 conda-forge::jinja2
-conda-forge::jpeg
 conda-forge::libboost-devel
 conda-forge::libboost-python-devel
 conda-forge::libcxx
diff --git a/.conda-envs/windows.txt b/.conda-envs/windows.txt
index dc297f27c6..4b8d404841 100644
--- a/.conda-envs/windows.txt
+++ b/.conda-envs/windows.txt
@@ -11,11 +11,10 @@ conda-forge::eigen
 conda-forge::future
 conda-forge::gemmi>=0.6.5
 conda-forge::h5py>=3.1.0
-conda-forge::hdf5<1.13
+conda-forge::hdf5
 conda-forge::hdf5plugin
 conda-forge::iota
 conda-forge::jinja2
-conda-forge::jpeg
 conda-forge::libboost-devel
 conda-forge::libboost-python-devel
 conda-forge::matplotlib-base>=3.0.2
diff --git a/installer/bootstrap.py b/installer/bootstrap.py
index 2b136f0c6f..ac1aa0a992 100755
--- a/installer/bootstrap.py
+++ b/installer/bootstrap.py
@@ -1432,8 +1432,8 @@ def run():
     parser.add_argument(
         "--python",
         help="Install this minor version of Python (default: %(default)s)",
-        default="3.10",
-        choices=("3.9", "3.10", "3.11"),
+        default="3.11",
+        choices=("3.9", "3.10", "3.11", "3.12"),
     )
     parser.add_argument(
         "--branch",
diff --git a/newsfragments/2651.feature b/newsfragments/2651.feature
new file mode 100644
index 0000000000..312d214171
--- /dev/null
+++ b/newsfragments/2651.feature
@@ -0,0 +1 @@
+DIALS is now compatible with Python 3.12.
diff --git a/setup.py b/setup.py
index 9b7f43e3ed..db32d461de 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 from build import build
 
-__version_tag__ = "3.19.dev"
+__version_tag__ = "3.20.dev"
 
 setup_kwargs = {
     "name": "dials",

From 14384017238fe1485229a5b665dfbedb00d771fa Mon Sep 17 00:00:00 2001
From: David Waterman <dagewa@users.noreply.github.com>
Date: Fri, 26 Apr 2024 16:22:31 +0100
Subject: [PATCH 13/32] Test setting distance for multipanel detectors (#2616)

* Add dials.import test for multi-panel distance overrides.
---
 newsfragments/2616.misc           |  1 +
 tests/command_line/test_import.py | 55 ++++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 newsfragments/2616.misc

diff --git a/newsfragments/2616.misc b/newsfragments/2616.misc
new file mode 100644
index 0000000000..8cc6b8c058
--- /dev/null
+++ b/newsfragments/2616.misc
@@ -0,0 +1 @@
+``dials.import``: add a test for distance overrides for a multi-panel detector.
diff --git a/tests/command_line/test_import.py b/tests/command_line/test_import.py
index 26a1ca04aa..e1f106d2c4 100644
--- a/tests/command_line/test_import.py
+++ b/tests/command_line/test_import.py
@@ -377,7 +377,7 @@ def test_import_beam_centre(dials_data, tmp_path):
 
 
 def test_fast_slow_beam_centre(dials_regression: pathlib.Path, tmp_path):
-    # test slow_fast_beam_centre with a multi-panel CS-PAD image
+    # test fast_slow_beam_centre with a multi-panel CS-PAD image
     impath = os.path.join(
         dials_regression,
         "image_examples",
@@ -432,6 +432,59 @@ def test_fast_slow_beam_centre(dials_regression: pathlib.Path, tmp_path):
     assert offsets == pytest.approx(ref_offsets)
 
 
+def test_distance_multi_panel(dials_regression: pathlib.Path, tmp_path):
+    # test setting the distance with a multi-panel CS-PAD image
+    impath = os.path.join(
+        dials_regression,
+        "image_examples",
+        "LCLS_cspad_nexus",
+        "idx-20130301060858401.cbf",
+    )
+    result = subprocess.run(
+        [
+            shutil.which("dials.import"),
+            "distance=100",
+            "output.experiments=distance.expt",
+            impath,
+        ],
+        cwd=tmp_path,
+        capture_output=True,
+    )
+    assert not result.returncode and not result.stderr
+    assert (tmp_path / "distance.expt").is_file()
+
+    experiments = load.experiment_list(tmp_path / "distance.expt")
+    detector = experiments[0].detector
+    # all distances should be 100
+    assert all(p.get_distance() == pytest.approx(100) for p in detector)
+
+    # check relative panel positions have not changed
+    from scitbx import matrix
+
+    o = matrix.col(detector[0].get_origin())
+    offsets = []
+    for p in detector:
+        intra_pnl = o - matrix.col(p.get_origin())
+        offsets.append(intra_pnl.length())
+
+    result = subprocess.run(
+        [shutil.which("dials.import"), "output.experiments=reference.expt", impath],
+        cwd=tmp_path,
+        capture_output=True,
+    )
+    assert not result.returncode and not result.stderr
+    assert (tmp_path / "reference.expt").is_file()
+
+    ref_exp = load.experiment_list(tmp_path / "reference.expt")
+    ref_detector = ref_exp[0].detector
+    o = matrix.col(ref_detector[0].get_origin())
+    ref_offsets = []
+    for p in ref_detector:
+        intra_pnl = o - matrix.col(p.get_origin())
+        ref_offsets.append(intra_pnl.length())
+    assert offsets == pytest.approx(ref_offsets)
+
+
 def test_from_image_files(dials_data, tmp_path):
     image_files = sorted(
         dials_data("centroid_test_data", pathlib=True).glob("centroid*.cbf")

From 00efaec12ac9245396ebbc320c8eedbb121fd6f8 Mon Sep 17 00:00:00 2001
From: Amy Thompson <52806925+amyjaynethompson@users.noreply.github.com>
Date: Mon, 29 Apr 2024 14:27:00 +0100
Subject: [PATCH 14/32] Dials correlation matrix (#2632)

New dials program to calculate correlation matrices between datasets as a stand-alone module extracting methods from xia2.multiplex.

---------

Co-authored-by: James Beilsten-Edmands <30625594+jbeilstenedmands@users.noreply.github.com>
Co-authored-by: Ben Williams <benjaminhwilliams@users.noreply.github.com>
---
 AUTHORS                                       |   1 +
 newsfragments/2632.feature                    |   1 +
 src/dials/algorithms/correlation/__init__.py  |   0
 src/dials/algorithms/correlation/analysis.py  | 325 ++++++++++++++++++
 src/dials/algorithms/correlation/plots.py     | 208 +++++++++++
 src/dials/command_line/correlation_matrix.py  | 147 ++++++++
 src/dials/templates/clusters.html             |  83 +++++
 tests/algorithms/correlation/__init__.py      |   0
 tests/algorithms/correlation/test_analysis.py |  47 +++
 tests/command_line/test_correlation_matrix.py |  16 +
 10 files changed, 828 insertions(+)
 create mode 100644 newsfragments/2632.feature
 create mode 100644 src/dials/algorithms/correlation/__init__.py
 create mode 100644 src/dials/algorithms/correlation/analysis.py
 create mode 100644 src/dials/algorithms/correlation/plots.py
 create mode 100644 src/dials/command_line/correlation_matrix.py
 create mode 100644 src/dials/templates/clusters.html
 create mode 100644 tests/algorithms/correlation/__init__.py
 create mode 100644 tests/algorithms/correlation/test_analysis.py
 create mode 100644 tests/command_line/test_correlation_matrix.py

diff --git a/AUTHORS b/AUTHORS
index 9d94f7e9e5..7b4453a79d 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -2,6 +2,7 @@ Contributors include:
 
 Aaron Brewster
 Ammaar Saeed
+Amy Thompson
 Asmit Bhowmick
 Benjamin Williams
 Billy Poon
diff --git a/newsfragments/2632.feature b/newsfragments/2632.feature
new file mode 100644
index 0000000000..1d4515a7b8
--- /dev/null
+++ b/newsfragments/2632.feature
@@ -0,0 +1 @@
+``dials.correlation_matrix``: A new command-line tool for correlation and cosine similarity clustering of multi-crystal datasets, independent of ``xia2.multiplex``. It provides HTML output, including clustering heatmaps, dendrograms and corresponding ``dials.cosym`` graphs.
diff --git a/src/dials/algorithms/correlation/__init__.py b/src/dials/algorithms/correlation/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/dials/algorithms/correlation/analysis.py b/src/dials/algorithms/correlation/analysis.py
new file mode 100644
index 0000000000..cb73896eb1
--- /dev/null
+++ b/src/dials/algorithms/correlation/analysis.py
@@ -0,0 +1,325 @@
+from __future__ import annotations
+
+import json
+import logging
+import sys
+from collections import OrderedDict
+
+import numpy as np
+import scipy.spatial.distance as ssd
+from scipy.cluster import hierarchy
+
+import iotbx.phil
+from dxtbx.model import ExperimentList
+from libtbx.phil import scope_extract
+
+from dials.algorithms.correlation.plots import linkage_matrix_to_dict, to_plotly_json
+from dials.algorithms.symmetry.cosym import CosymAnalysis
+from dials.algorithms.symmetry.cosym.plots import plot_coords, plot_rij_histogram
+from dials.array_family.flex import reflection_table
+from dials.util.exclude_images import get_selection_for_valid_image_ranges
+from dials.util.filter_reflections import filtered_arrays_from_experiments_reflections
+from dials.util.multi_dataset_handling import select_datasets_on_identifiers
+
+logger = logging.getLogger("dials.algorithms.correlation.analysis")
+
+phil_scope = iotbx.phil.parse(
+    """\
+partiality_threshold = 0.4
+  .type = float(value_min=0, value_max=1)
+  .help = "Use reflections with a partiality greater than the threshold."
+
+include scope dials.algorithms.symmetry.cosym.phil_scope
+
+relative_length_tolerance = 0.05
+  .type = float(value_min=0)
+  .help = "Datasets are only accepted if unit cell lengths fall within this relative tolerance of the median cell lengths."
+
+absolute_angle_tolerance = 2
+  .type = float(value_min=0)
+  .help = "Datasets are only accepted if unit cell angles fall within this absolute tolerance of the median cell angles."
+
+min_reflections = 10
+  .type = int(value_min=1)
+  .help = "The minimum number of reflections per experiment."
+""",
+    process_includes=True,
+)
+
+
+class CorrelationMatrix:
+    def __init__(
+        self,
+        experiments: ExperimentList,
+        reflections: list[reflection_table],
+        params: scope_extract = None,
+    ):
+        """
+        Set up the required cosym preparations for determining the correlation matricies
+        of a series of input experiments and reflections.
+
+        Args:
+            experiments (dxtbx_model_ext.ExperimentList): list of experiments in dials format
+            reflections (list): list of dials_array_family_flex_ext.reflection_table objects associated with the experiments
+            params (libtbx.phil.scope_extract):
+        """
+
+        # Set up experiments, reflections and params
+
+        if params is None:
+            params = phil_scope.extract()
+        self.params = params
+        self._reflections = []
+
+        if len(reflections) == len(experiments):
+            for refl, expt in zip(reflections, experiments):
+                sel = get_selection_for_valid_image_ranges(refl, expt)
+                self._reflections.append(refl.select(sel))
+        else:
+            sys.exit(
+                "Number of reflection tables does not match number of experiments."
+            )
+
+        # Initial filtering to remove experiments and reflections that do not meet the minimum number of reflections required (params.min_reflections)
+
+        self._experiments, self._reflections = self._filter_min_reflections(
+            experiments, self._reflections
+        )
+
+        # Used for optional json creation that is in a format friendly for import and analysis (for future development)
+        self.ids_to_identifiers_map = {}
+        for table in self._reflections:
+            self.ids_to_identifiers_map.update(table.experiment_identifiers())
+
+        # Filter reflections that do not meet partiality threshold or default I/Sig(I) criteria
+
+        datasets = filtered_arrays_from_experiments_reflections(
+            self._experiments,
+            self._reflections,
+            outlier_rejection_after_filter=False,
+            partiality_threshold=params.partiality_threshold,
+        )
+
+        # Merge intensities to prepare for cosym analysis
+
+        self.datasets = self._merge_intensities(datasets)
+
+        # Set required params for cosym to skip symmetry determination and reduce dimensions
+
+        self.params.lattice_group = self.datasets[0].space_group_info()
+        self.params.space_group = self.datasets[0].space_group_info()
+
+        self.cosym_analysis = CosymAnalysis(self.datasets, self.params)
+
+    def _merge_intensities(self, datasets: list) -> list:
+        """
+        Merge intensities and elimate systematically absent reflections.
+
+        Args:
+            datasets(list): list of cctbx.miller.array objects
+        Returns:
+            datasets_sys_absent_eliminated(list): list of merged cctbx.miller.array objects
+        """
+        individual_merged_intensities = []
+        for unmerged in datasets:
+            individual_merged_intensities.append(
+                unmerged.merge_equivalents().array().set_info(unmerged.info())
+            )
+        datasets_sys_absent_eliminated = [
+            d.eliminate_sys_absent(integral_only=True).primitive_setting()
+            for d in individual_merged_intensities
+        ]
+
+        return datasets_sys_absent_eliminated
+
+    def _filter_min_reflections(
+        self, experiments: ExperimentList, reflections: list[reflection_table]
+    ) -> tuple[ExperimentList, list[reflection_table]]:
+        """
+        Filter all datasets that have less than the specified number of reflections.
+
+        Args:
+            experiments (dxtbx_model_ext.ExperimentList): list of experiments in dials format
+            reflections (list): list of dials_array_family_flex_ext.reflection_table objects associated with the experiments
+
+        Returns:
+            filtered_datasets (tuple): tuple of filtered datasets
+
+        """
+        identifiers = []
+
+        for expt, refl in zip(experiments, reflections):
+            if len(refl) >= self.params.min_reflections:
+                identifiers.append(expt.identifier)
+
+        filtered_datasets = select_datasets_on_identifiers(
+            experiments, reflections, use_datasets=identifiers
+        )
+
+        return filtered_datasets
+
+    def calculate_matrices(self):
+        """
+        Runs the required algorithms within dials.cosym to calculate the rij matrix and optimise the coordinates.
+        These results are passed into matrix computation functions to calculate the cosine similarity (cos-angle) and correlation matrices
+        as well as the corresponding clustering.
+        """
+
+        # Cosym algorithm to calculate the rij matrix (CC matrix when symmetry known)
+        self.cosym_analysis._intialise_target()
+
+        # Cosym proceedures to calculate the cos-angle matrix
+        self.cosym_analysis._determine_dimensions()
+        self.cosym_analysis._optimise(
+            self.cosym_analysis.params.minimization.engine,
+            max_iterations=self.cosym_analysis.params.minimization.max_iterations,
+            max_calls=self.cosym_analysis.params.minimization.max_calls,
+        )
+
+        # Convert cosym output into cc/cos matrices in correct form and compute linkage matrices as clustering method
+        (
+            self.correlation_matrix,
+            self.cc_linkage_matrix,
+        ) = self.compute_correlation_coefficient_matrix(
+            self.cosym_analysis.target.rij_matrix
+        )
+        self.cos_angle, self.cos_linkage_matrix = self.compute_cos_angle_matrix(
+            self.cosym_analysis.coords
+        )
+
+    @staticmethod
+    def compute_correlation_coefficient_matrix(
+        correlation_matrix: np.ndarray,
+    ) -> tuple(np.ndarray, np.ndarray):
+        """
+        Computes the correlation matrix and clustering linkage matrix from the rij cosym matrix.
+
+        Args:
+            correlation_matrix(numpy.ndarray): pair-wise matrix of correlation coefficients
+
+        Returns:
+            correlation_matrix(numpy.ndarray): correlation matrix with corrections to diagonals and accounting for floating point errors
+            cc_linkage_matrix(numpy.ndarray): linkage matrix describing clustering of correlation matrix in dendrogram-style
+
+        """
+
+        logger.info("\nCalculating Correlation Matrix (rij matrix - see dials.cosym)\n")
+
+        # Make diagonals equal to 1 (each dataset correlated with itself)
+        np.fill_diagonal(correlation_matrix, 1)
+
+        # clip values of correlation matrix to account for floating point errors
+        correlation_matrix.clip(-1, 1, out=correlation_matrix)
+
+        # Convert to distance matrix rather than correlation
+        diffraction_dissimilarity = 1 - correlation_matrix
+        try:
+            assert ssd.is_valid_dm(diffraction_dissimilarity, tol=1e-12)
+        except AssertionError:
+            sys.exit(
+                "Correlation matrix does not give a valid distance matrix. Distance matrix is either non-symmetric or does not have a zero-diagonal."
+            )
+
+        # convert the redundant n*n square matrix form into a condensed nC2 array
+        cc_dist_mat = ssd.squareform(diffraction_dissimilarity, checks=False)
+
+        # Clustering method
+        cc_linkage_matrix = hierarchy.linkage(cc_dist_mat, method="average")
+
+        return correlation_matrix, cc_linkage_matrix
+
+    @staticmethod
+    def compute_cos_angle_matrix(
+        coords: np.ndarray,
+    ) -> tuple(np.ndarray, np.ndarray):
+        """
+        Computes the cos_angle matrix and clustering linkage matrix from the optimized cosym coordinates.
+        Args:
+            coords(numpy.ndarray): matrix of coordinates output from cosym optimisation
+
+        Returns:
+            cos_angle(numpy.ndarray): pair-wise cos angle matrix
+            cos_linkage_matrix(numpy.ndarray): linkage matrix describing clustering of cos angle matrix in dendrogram-style
+
+        """
+        logger.info(
+            "Calculating Cos Angle Matrix from optimised cosym coordinates (see dials.cosym)\n"
+        )
+
+        # Convert coordinates to cosine distances and then reversed so closer cosine distances have higher values to match CC matrix
+        cos_dist_mat = ssd.pdist(coords, metric="cosine")
+        cos_angle = 1 - ssd.squareform(cos_dist_mat)
+
+        # Clustering method
+        cos_linkage_matrix = hierarchy.linkage(cos_dist_mat, method="average")
+
+        return cos_angle, cos_linkage_matrix
+
+    def convert_to_html_json(self):
+        """
+        Prepares the required dataset tables and converts analysis into the required format for HTML output.
+        """
+
+        # Convert the cosine and cc matrices into a plotly json format for output graphs
+
+        self.cc_json = to_plotly_json(
+            self.correlation_matrix,
+            self.cc_linkage_matrix,
+            matrix_type="correlation",
+        )
+        self.cos_json = to_plotly_json(
+            self.cos_angle,
+            self.cos_linkage_matrix,
+            matrix_type="cos_angle",
+        )
+
+        self.rij_graphs = OrderedDict()
+
+        self.rij_graphs.update(
+            plot_rij_histogram(self.correlation_matrix, key="cosym_rij_histogram_sg")
+        )
+
+        self.rij_graphs.update(
+            plot_coords(self.cosym_analysis.coords, key="cosym_coordinates_sg")
+        )
+
+        # Generate the table for the html that lists all datasets and image paths present in the analysis
+
+        paths = enumerate(e.imageset.paths()[0] for e in self._experiments)
+        self.table_list = [["Experiment/Image Number", "Image Path"], *map(list, paths)]
+
+    def convert_to_importable_json(self, linkage_matrix: np.ndarray) -> OrderedDict:
+        """
+        Generate a json file of the linkage matrices with unique identifiers rather than dataset numbers
+        May be useful for future developments to import this for further clustering analysis.
+        Args:
+            linkage_matrix(numpy.ndarray): linkage matrix from hierarchy.linkage methods
+        Returns:
+            linkage_mat_as_dict(collections.OrderedDict): linkage matrix converted to dictionary with datasets replaced with dials unique identifiers
+        """
+        linkage_mat_as_dict = linkage_matrix_to_dict(linkage_matrix)
+        for d in linkage_mat_as_dict.values():
+            # Difference in indexing between linkage_mat_as_dict and datasets, so have i-1
+            d["datasets"] = [self.ids_to_identifiers_map[i - 1] for i in d["datasets"]]
+
+        return linkage_mat_as_dict
+
+    def output_json(self):
+        """
+        Outputs the cos and cc json files containing correlation/cos matrices and linkage details.
+        """
+
+        linkage_mat_as_dict_cc = self.convert_to_importable_json(self.cc_linkage_matrix)
+        linkage_mat_as_dict_cos = self.convert_to_importable_json(
+            self.cos_linkage_matrix
+        )
+
+        combined_json_dict = {
+            "correlation_matrix_clustering": linkage_mat_as_dict_cc,
+            "correlation_matrix": self.correlation_matrix.tolist(),
+            "cos_matrix_clustering": linkage_mat_as_dict_cos,
+            "cos_angle_matrix": self.cos_angle.tolist(),
+        }
+
+        with open(self.params.output.json, "w") as f:
+            json.dump(combined_json_dict, f)
diff --git a/src/dials/algorithms/correlation/plots.py b/src/dials/algorithms/correlation/plots.py
new file mode 100644
index 0000000000..ea3a9330e4
--- /dev/null
+++ b/src/dials/algorithms/correlation/plots.py
@@ -0,0 +1,208 @@
+from __future__ import annotations
+
+import copy
+import sys
+from collections import OrderedDict
+
+import numpy as np
+from scipy.cluster import hierarchy
+
+from dials.algorithms.clustering.plots import scipy_dendrogram_to_plotly_json
+
+
+def linkage_matrix_to_dict(linkage_matrix: np.ndarray) -> OrderedDict:
+    """
+    Convert a linkage matrix to a dictionary.
+    Args:
+            linkage_matrix(numpy.ndarray): linkage matrix from hierarchy.linkage methods
+    Returns:
+            link_dict(collections.OrderedDict): linkage matrix converted to dictionary format
+    """
+    tree = hierarchy.to_tree(linkage_matrix, rd=False)
+
+    d = {}
+
+    # https://gist.github.com/mdml/7537455
+
+    def add_node(node):
+        if node.is_leaf():
+            return
+        cluster_id = node.get_id() - len(linkage_matrix) - 1
+        row = linkage_matrix[cluster_id]
+        d[cluster_id + 1] = {
+            "datasets": [i + 1 for i in sorted(node.pre_order())],
+            "height": row[2],
+        }
+
+        # Recursively add the current node's children
+        if node.left:
+            add_node(node.left)
+        if node.right:
+            add_node(node.right)
+
+    add_node(tree)
+
+    link_dict = OrderedDict(sorted(d.items()))
+
+    return link_dict
+
+
+def to_plotly_json(
+    correlation_matrix: np.ndarray,
+    linkage_matrix: np.ndarray,
+    labels: list = None,
+    matrix_type: str = "correlation",
+) -> dict:
+    """
+    Prepares a plotly-style plot of the heatmap corresponding to the input matrix
+    with dendrograms on the top and left sides.
+    Args:
+        correlation_matrix(numpy.ndarray): pair-wise correlation matrix
+        linkage_matrix(numpy.ndarray): linkage matrix describing dendrogram-style clustering of correlation matrix
+        labels(list): desired labels for datasets
+        matrix_type(str): must be "correlation" or "cos_angle"
+    Returns:
+        d(dict): heatmap and dendrogram plot expressed as a dictionary for future graphical display
+    """
+
+    if matrix_type not in ("correlation", "cos_angle"):
+        sys.exit(
+            f"Matrix type input as {matrix_type}, but needs to be 'correlation' or 'cos_angle'"
+        )
+
+    ddict = hierarchy.dendrogram(
+        linkage_matrix,
+        color_threshold=0.05,
+        labels=labels,
+        show_leaf_counts=False,
+        no_plot=True,
+    )
+
+    # Converts the dendrogram to plotly json format - y2_dict is the one above the heatmap
+    y2_dict = scipy_dendrogram_to_plotly_json(
+        ddict, "Dendrogram", xtitle="Individual datasets", ytitle="Ward distance"
+    )
+
+    # x2_dict is the same dendrogram but positioned left of the heatmap
+    x2_dict = copy.deepcopy(y2_dict)
+    for d in y2_dict["data"]:
+        d["yaxis"] = "y2"
+        d["xaxis"] = "x2"
+
+    # Switches x and y data so that the dendrogram will be rotated 90deg
+    # This orientatates it to match with the heatmap
+    for d in x2_dict["data"]:
+        x = d["x"]
+        y = d["y"]
+        d["x"] = y
+        d["y"] = x
+        d["yaxis"] = "y3"
+        d["xaxis"] = "x3"
+
+    # Reorders the matrix into the same order as the dendrogram for the plots
+    D = correlation_matrix
+    index = ddict["leaves"]
+    D = D[index, :]
+    D = D[:, index]
+    ccdict = {
+        "data": [
+            {
+                "name": "%s_matrix" % matrix_type,
+                "x": list(range(D.shape[0])),
+                "y": list(range(D.shape[1])),
+                "z": D.tolist(),
+                "type": "heatmap",
+                "colorbar": {
+                    "title": (
+                        "Correlation coefficient"
+                        if matrix_type == "correlation"
+                        else "cos(angle)"
+                    ),
+                    "titleside": "right",
+                    "xpad": 0,
+                },
+                "colorscale": "YIOrRd",
+                "xaxis": "x",
+                "yaxis": "y",
+            }
+        ],
+        "layout": {
+            "autosize": False,
+            "bargap": 0,
+            "height": 1000,
+            "hovermode": "closest",
+            "margin": {"r": 20, "t": 50, "autoexpand": True, "l": 20},
+            "showlegend": False,
+            "title": "Dendrogram Heatmap",
+            "width": 1000,
+            "xaxis": {
+                "domain": [0.2, 0.9],
+                "mirror": "allticks",
+                "showgrid": False,
+                "showline": False,
+                "showticklabels": True,
+                "tickmode": "array",
+                "ticks": "",
+                "ticktext": y2_dict["layout"]["xaxis"]["ticktext"],
+                "tickvals": list(range(len(y2_dict["layout"]["xaxis"]["ticktext"]))),
+                "tickangle": 300,
+                "title": "",
+                "type": "linear",
+                "zeroline": False,
+            },
+            "yaxis": {
+                "domain": [0, 0.78],
+                "anchor": "x",
+                "mirror": "allticks",
+                "showgrid": False,
+                "showline": False,
+                "showticklabels": True,
+                "tickmode": "array",
+                "ticks": "",
+                "ticktext": y2_dict["layout"]["xaxis"]["ticktext"],
+                "tickvals": list(range(len(y2_dict["layout"]["xaxis"]["ticktext"]))),
+                "title": "",
+                "type": "linear",
+                "zeroline": False,
+            },
+            "xaxis2": {
+                "domain": [0.2, 0.9],
+                "anchor": "y2",
+                "showgrid": False,
+                "showline": False,
+                "showticklabels": False,
+                "zeroline": False,
+            },
+            "yaxis2": {
+                "domain": [0.8, 1],
+                "anchor": "x2",
+                "showgrid": False,
+                "showline": False,
+                "zeroline": False,
+            },
+            "xaxis3": {
+                "domain": [0.0, 0.1],
+                "anchor": "y3",
+                "range": [max(max(d["x"]) for d in x2_dict["data"]), 0],
+                "showgrid": False,
+                "showline": False,
+                "tickangle": 300,
+                "zeroline": False,
+            },
+            "yaxis3": {
+                "domain": [0, 0.78],
+                "anchor": "x3",
+                "showgrid": False,
+                "showline": False,
+                "showticklabels": False,
+                "zeroline": False,
+            },
+        },
+    }
+    d = ccdict
+    d["data"].extend(y2_dict["data"])
+    d["data"].extend(x2_dict["data"])
+
+    d["clusters"] = linkage_matrix_to_dict(linkage_matrix)
+
+    return d
diff --git a/src/dials/command_line/correlation_matrix.py b/src/dials/command_line/correlation_matrix.py
new file mode 100644
index 0000000000..1248a79230
--- /dev/null
+++ b/src/dials/command_line/correlation_matrix.py
@@ -0,0 +1,147 @@
+from __future__ import annotations
+
+import logging
+import random
+import sys
+
+import numpy as np
+from jinja2 import ChoiceLoader, Environment, PackageLoader
+
+import iotbx.phil
+
+from dials.algorithms.correlation.analysis import CorrelationMatrix
+from dials.array_family import flex
+from dials.util import log, show_mail_handle_errors
+from dials.util.multi_dataset_handling import (
+    assign_unique_identifiers,
+    parse_multiple_datasets,
+)
+from dials.util.options import ArgumentParser, reflections_and_experiments_from_files
+from dials.util.version import dials_version
+
+logger = logging.getLogger("dials.algorithms.correlation.analysis")
+
+phil_scope = iotbx.phil.parse(
+    """\
+include scope dials.algorithms.correlation.analysis.phil_scope
+
+seed = 42
+  .type = int(value_min=0)
+
+output {
+  log = dials.correlation_matrix.log
+    .type = str
+    .help = "The log name"
+  html = dials.correlation_matrix.html
+    .type = path
+    .help = "Filename for the html report"
+  json = None
+    .type = str
+    .help = "Filename for the cluster information output in json format"
+}
+""",
+    process_includes=True,
+)
+
+
+help_message = """
+This module implements a subset of methods used in dials.cosym to perform
+correlation and cosine similarity based clustering methods. Data should be passed
+through dials.cosym first to implement consistent symmetry.
+
+Examples::
+
+  dials.correlation_matrix symmetrized.expt symmetrized.refl
+
+"""
+
+
+@show_mail_handle_errors()
+def run(args=None):
+    usage = "dials.correlation_matrix [options] symmetrized.expt symmetrized.refl"
+
+    parser = ArgumentParser(
+        usage=usage,
+        phil=phil_scope,
+        read_reflections=True,
+        read_experiments=True,
+        check_format=False,
+        epilog=help_message,
+    )
+
+    params, options, args = parser.parse_args(
+        args=args, show_diff_phil=False, return_unhandled=True
+    )
+
+    # Configure the logging
+    log.config(verbosity=options.verbose, logfile=params.output.log)
+
+    logger.info(dials_version())
+
+    # Log the diff phil
+    diff_phil = parser.diff_phil.as_str()
+    if diff_phil != "":
+        logger.info("The following parameters have been modified:\n")
+        logger.info(diff_phil)
+
+    if params.seed is not None:
+        flex.set_random_seed(params.seed)
+        np.random.seed(params.seed)
+        random.seed(params.seed)
+
+    if not params.input.experiments or not params.input.reflections:
+        parser.print_help()
+        sys.exit()
+
+    reflections, experiments = reflections_and_experiments_from_files(
+        params.input.reflections, params.input.experiments
+    )
+
+    reflections = parse_multiple_datasets(reflections)
+    if len(experiments) != len(reflections):
+        sys.exit(
+            "Mismatched number of experiments and reflection tables found: %s & %s."
+            % (len(experiments), len(reflections))
+        )
+    if len(experiments) < 2:
+        sys.exit(
+            "At least 2 datasets are needed for cluster analysis. Please re-run with more datasets."
+        )
+    try:
+        experiments, reflections = assign_unique_identifiers(experiments, reflections)
+        matrices = CorrelationMatrix(
+            experiments=experiments, reflections=reflections, params=params
+        )
+    except ValueError as e:
+        sys.exit(e)
+
+    matrices.calculate_matrices()
+
+    if params.output.json:
+        matrices.output_json()
+
+    if params.output.html:
+        matrices.convert_to_html_json()
+
+        loader = ChoiceLoader(
+            [PackageLoader("dials", "templates"), PackageLoader("dials", "templates")]
+        )
+        env = Environment(loader=loader)
+
+        template = env.get_template("clusters.html")
+        html = template.stream(
+            page_title="DIALS Correlation Matrix",
+            cc_cluster_json=matrices.cc_json,
+            cos_angle_cluster_json=matrices.cos_json,
+            image_range_tables=[matrices.table_list],
+            cosym_graphs=matrices.rij_graphs,
+        )
+
+        logger.info(
+            f"Saving graphical output of correlation matrices to {params.output.html}."
+        )
+        html.dump(params.output.html, errors="xmlcharrefreplace")
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/dials/templates/clusters.html b/src/dials/templates/clusters.html
new file mode 100644
index 0000000000..d3f0fd1352
--- /dev/null
+++ b/src/dials/templates/clusters.html
@@ -0,0 +1,83 @@
+{% extends "report_base.html" %}
+{% block content %}
+
+<div class="container-fluid">
+
+    <div class="page-header">
+        <h1>{{ page_title }}</h1>
+    </div>
+
+    <ul class="nav nav-tabs">
+        <li class="active"><a data-toggle="tab" data-target="#home">Summary</a></li>
+
+    </ul>
+
+    <div class="tab-content">
+
+        <div id="home" class="tab-pane in active">
+
+            <h2>Cluster Analysis</h2>
+
+            <div class="panel-group">
+
+                {{ macros.panel('Data sets', 'datasets', {}, tables=image_range_tables) }}
+
+                <div class="panel panel-default">
+                    <div class="panel-heading" data-toggle="collapse" data-target="#collapse_intensity_cluster">
+                        <h4 class="panel-title">
+                            <a>Data correlation matrices</a>
+                        </h4>
+                    </div>
+                    <div id="collapse_intensity_cluster" class="panel-collapse collapse">
+                        <div class="panel-body">
+
+                            <div class="panel panel-default">
+                                <div class="panel-heading" data-toggle="collapse" data-target="#collapse_cc_cluster">
+                                    <h4 class="panel-title">
+                                        <a>Correlation coefficient clustering</a>
+                                    </h4>
+                                </div>
+                                <div id="collapse_cc_cluster" class="panel-collapse collapse">
+                                    <div class="panel-body">
+
+                                        <div class="container-fluid">
+                                            {{ macros.plotly_graph("cc_cluster", cc_cluster_json, style="dendrogram-plot") }}
+                                        </div>
+                                    </div>
+                                </div>
+                            </div>
+
+                            <div class="panel panel-default">
+                                <div class="panel-heading" data-toggle="collapse" data-target="#collapse_cos_angle_cluster">
+                                    <h4 class="panel-title">
+                                        <a>Cos angle clustering</a>
+                                    </h4>
+                                </div>
+                                <div id="collapse_cos_angle_cluster" class="panel-collapse collapse">
+                                    <div class="panel-body">
+
+                                        <div class="container-fluid">
+                                            {{ macros.plotly_graph("cos_angle_cluster", cos_angle_cluster_json, style="dendrogram-plot") }}
+                                        </div>
+
+                                    </div>
+                                </div>
+                            </div>
+
+                            {{ macros.panel('Cosym cluster plots', 'cosym_plots', cosym_graphs) }}
+
+                        </div>
+                    </div>
+                </div>
+
+            </div>
+
+        </div>
+
+    </div>
+
+</div>
+
+
+
+{% endblock %}
diff --git a/tests/algorithms/correlation/__init__.py b/tests/algorithms/correlation/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/algorithms/correlation/test_analysis.py b/tests/algorithms/correlation/test_analysis.py
new file mode 100644
index 0000000000..3f0f2ec9c4
--- /dev/null
+++ b/tests/algorithms/correlation/test_analysis.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import pathlib
+
+from dials.algorithms.correlation.analysis import CorrelationMatrix
+from dials.command_line.correlation_matrix import phil_scope
+from dials.util.multi_dataset_handling import (
+    assign_unique_identifiers,
+    parse_multiple_datasets,
+)
+from dials.util.options import ArgumentParser, reflections_and_experiments_from_files
+
+
+def test_corr_mat(dials_data, run_in_tmp_path):
+    mcp = dials_data("vmxi_proteinase_k_sweeps", pathlib=True)
+    params = phil_scope.extract()
+    input_data = []
+    parser = ArgumentParser(
+        usage=" ",
+        phil=phil_scope,
+        read_reflections=True,
+        read_experiments=True,
+        check_format=False,
+        epilog=" ",
+    )
+    for i in [0, 1, 2, 3]:
+        input_data.append(str(mcp / f"experiments_{i}.expt"))
+        input_data.append(str(mcp / f"reflections_{i}.refl"))
+
+    params, options, args = parser.parse_args(
+        args=input_data, show_diff_phil=False, return_unhandled=True
+    )
+
+    params.output.json = "dials.correlation_matrix.json"
+
+    reflections, experiments = reflections_and_experiments_from_files(
+        params.input.reflections, params.input.experiments
+    )
+
+    reflections = parse_multiple_datasets(reflections)
+    assert len(experiments) == len(reflections)
+    assert len(experiments) > 1
+    experiments, reflections = assign_unique_identifiers(experiments, reflections)
+    matrices = CorrelationMatrix(experiments, reflections, params)
+    matrices.calculate_matrices()
+    matrices.output_json()
+    assert pathlib.Path("dials.correlation_matrix.json").is_file()
diff --git a/tests/command_line/test_correlation_matrix.py b/tests/command_line/test_correlation_matrix.py
new file mode 100644
index 0000000000..bfcbefecc2
--- /dev/null
+++ b/tests/command_line/test_correlation_matrix.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+import pathlib
+
+import dials.command_line.correlation_matrix as dials_corr_mat
+
+
+def test_corr_mat(dials_data, run_in_tmp_path):
+    mcp = dials_data("vmxi_proteinase_k_sweeps", pathlib=True)
+    args = []
+    for i in [0, 1, 2, 3]:
+        args.append(str(mcp / f"experiments_{i}.expt"))
+        args.append(str(mcp / f"reflections_{i}.refl"))
+    dials_corr_mat.run(args=args)
+    assert pathlib.Path("dials.correlation_matrix.html").is_file()
+    assert pathlib.Path("dials.correlation_matrix.log").is_file()

From a8945b6df16f6d8038a8e9baf31e83eb8b4987c0 Mon Sep 17 00:00:00 2001
From: David Waterman <dagewa@users.noreply.github.com>
Date: Tue, 30 Apr 2024 09:27:50 +0100
Subject: [PATCH 15/32] Allow `dials.find_rotation_axis` to work with
 multi-axis goniometers (#2658)

* For a multi-axis goniometer we set the overall rotation axis by altering
the orientation of the base axis. That way we are not changing the
relationship between axes of the goniometer (so we still trust the header
angles), we are just rotating the whole goniometer to fit the observations
---
 newsfragments/2658.bugfix                     |  1 +
 src/dials/command_line/find_rotation_axis.py  | 23 +++++++++++++-
 tests/command_line/test_find_rotation_axis.py | 31 +++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 newsfragments/2658.bugfix

diff --git a/newsfragments/2658.bugfix b/newsfragments/2658.bugfix
new file mode 100644
index 0000000000..6e9e6f86a0
--- /dev/null
+++ b/newsfragments/2658.bugfix
@@ -0,0 +1 @@
+``dials.find_rotation_axis``: Correctly set the orientation of the rotation axis for a multi-axis goniometer.
diff --git a/src/dials/command_line/find_rotation_axis.py b/src/dials/command_line/find_rotation_axis.py
index 9f684c0d88..ce5dfa9131 100644
--- a/src/dials/command_line/find_rotation_axis.py
+++ b/src/dials/command_line/find_rotation_axis.py
@@ -26,6 +26,7 @@
 import dxtbx.flumpy as flumpy
 import libtbx
 import libtbx.phil
+from scitbx import matrix
 
 import dials.util
 import dials.util.log
@@ -522,7 +523,27 @@ def run(args=None, phil=phil_scope):
     logger.info(
         f"\nRotation axis found: {azimuth_deg:.2f} deg. / {azimuth_rad:.3f} rad."
     )
-    expt.goniometer.set_rotation_axis(rotation_axis_to_xyz(azimuth_rad))
+
+    current_rotation_axis = matrix.col(expt.goniometer.get_rotation_axis())
+    new_rotation_axis = matrix.col(rotation_axis_to_xyz(azimuth_rad))
+
+    # Rotation matrix to take the current_rotation_axis to the new_rotation_axis
+    angle = current_rotation_axis.angle(new_rotation_axis)
+    if angle:
+        axis = matrix.col(current_rotation_axis.cross(new_rotation_axis)).normalize()
+        R = axis.axis_and_angle_as_r3_rotation_matrix(angle)
+    else:
+        R = matrix.sqr(matrix.identity(3))
+
+    try:
+        # For a multi-axis goniometer, rotate the base axis by R
+        axes = expt.goniometer.get_axes()
+        base_axis = matrix.col(axes[-1])
+        axes[-1] = R * base_axis
+        expt.goniometer.set_axes(axes)
+    except AttributeError:
+        # For a single-axis goniometer, just set the new_rotation_axis
+        expt.goniometer.set_rotation_axis(new_rotation_axis)
     logger.info(str(expt.goniometer))
 
     logger.info(f"Saving optimised experiments to {params.output.experiments}")
diff --git a/tests/command_line/test_find_rotation_axis.py b/tests/command_line/test_find_rotation_axis.py
index ab8812fc52..02aff530b9 100644
--- a/tests/command_line/test_find_rotation_axis.py
+++ b/tests/command_line/test_find_rotation_axis.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+from dxtbx.model.goniometer import GoniometerFactory
 from dxtbx.serialize import load
 from scitbx import matrix
+from scitbx.array_family import flex
 
 import dials.command_line.find_rotation_axis as dials_find_rotation_axis
 
@@ -22,3 +24,32 @@ def test_find_rotation_axis(dials_data, run_in_tmp_path):
     expected = matrix.col((-0.627963, -0.778243, 0))
 
     assert axis.angle(expected) < 1e-6
+
+
+def test_find_rotation_axis_multi_axis_goniometer(dials_data, run_in_tmp_path):
+    myd88 = dials_data("MyD88_processed", pathlib=True)
+    experiments = load.experiment_list(str(myd88 / "imported.expt"), check_format=False)
+
+    # Replace single-axis goniometer with multi-axis goniometer
+    axes = flex.vec3_double([(0, -1, 0), (0, -0.642788, 0.766044), (0, -1, 0)])
+    angles = flex.double((0, 0, -62))
+    names = flex.std_string(("PHI", "KAPPA=CHI", "OMEGA"))
+    scan_axis = 2
+    experiments[0].goniometer = GoniometerFactory.multi_axis(
+        axes, angles, names, scan_axis
+    )
+    experiments.as_file("modified.expt")
+
+    # Now run find_rotation_axis, and expect the same result.
+    dials_find_rotation_axis.run(args=["modified.expt", str(myd88 / "strong.refl")])
+    assert run_in_tmp_path.joinpath("optimised.expt").is_file()
+    assert run_in_tmp_path.joinpath("find_rotation_axis-histogram.png").is_file()
+    assert run_in_tmp_path.joinpath("find_rotation_axis-projection.png").is_file()
+    experiments = load.experiment_list(
+        run_in_tmp_path / "optimised.expt", check_format=False
+    )
+
+    axis = matrix.col(experiments[0].goniometer.get_rotation_axis())
+    expected = matrix.col((-0.627963, -0.778243, 0))
+
+    assert axis.angle(expected) < 1e-6

From 9b07803526e88ee1bde28cf8f36edda1a648a657 Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Wed, 1 May 2024 08:55:29 +0100
Subject: [PATCH 16/32] Updates to dials.scale error modelling to handle stills
 data (#2654)

---
 newsfragments/2654.feature                    |  1 +
 src/dials/algorithms/scaling/algorithm.py     |  7 +-
 .../algorithms/scaling/error_model/engine.py  | 11 ++-
 .../scaling/error_model/error_model.py        | 78 ++++++++++++++++++-
 src/dials/algorithms/scaling/scaler.py        | 20 ++++-
 src/dials/command_line/refine_error_model.py  | 17 +++-
 tests/algorithms/scaling/test_error_model.py  |  9 ++-
 tests/command_line/test_ssx_reduction.py      |  8 ++
 8 files changed, 135 insertions(+), 16 deletions(-)
 create mode 100644 newsfragments/2654.feature

diff --git a/newsfragments/2654.feature b/newsfragments/2654.feature
new file mode 100644
index 0000000000..a46d4d9535
--- /dev/null
+++ b/newsfragments/2654.feature
@@ -0,0 +1 @@
+``dials.scale``: Add filtering options to default basic error model to allow error modelling of stills data
diff --git a/src/dials/algorithms/scaling/algorithm.py b/src/dials/algorithms/scaling/algorithm.py
index 812bfac0b7..bfe09e3a0e 100644
--- a/src/dials/algorithms/scaling/algorithm.py
+++ b/src/dials/algorithms/scaling/algorithm.py
@@ -641,6 +641,9 @@ def targeted_scaling_algorithm(scaler):
         scaler.make_ready_for_scaling()
         scaler.perform_scaling()
 
+        expand_and_do_outlier_rejection(scaler, calc_cov=True)
+        do_error_analysis(scaler, reselect=True)
+
     if scaler.params.scaling_options.full_matrix and (
         scaler.params.scaling_refinery.engine == "SimpleLBFGS"
     ):
@@ -648,9 +651,11 @@ def targeted_scaling_algorithm(scaler):
             engine=scaler.params.scaling_refinery.full_matrix_engine,
             max_iterations=scaler.params.scaling_refinery.full_matrix_max_iterations,
         )
+    else:
+        scaler.perform_scaling()
 
     expand_and_do_outlier_rejection(scaler, calc_cov=True)
-    # do_error_analysis(scaler, reselect=False)
+    do_error_analysis(scaler, reselect=False)
 
     scaler.prepare_reflection_tables_for_output()
     return scaler
diff --git a/src/dials/algorithms/scaling/error_model/engine.py b/src/dials/algorithms/scaling/error_model/engine.py
index 4b67e7425d..9cf816c1fc 100644
--- a/src/dials/algorithms/scaling/error_model/engine.py
+++ b/src/dials/algorithms/scaling/error_model/engine.py
@@ -21,7 +21,9 @@
 logger = logging.getLogger("dials")
 
 
-def run_error_model_refinement(model, Ih_table):
+def run_error_model_refinement(
+    model, Ih_table, min_partiality=0.4, use_stills_filtering=False
+):
     """
     Refine an error model for the input data, returning the model.
 
@@ -30,7 +32,9 @@ def run_error_model_refinement(model, Ih_table):
         RuntimeError: can be raised in LBFGS minimiser.
     """
     assert Ih_table.n_work_blocks == 1
-    model.configure_for_refinement(Ih_table.blocked_data_list[0])
+    model.configure_for_refinement(
+        Ih_table.blocked_data_list[0], min_partiality, use_stills_filtering
+    )
     if not model.active_parameters:
         logger.info("All error model parameters fixed, skipping refinement")
     else:
@@ -170,7 +174,6 @@ def test_value_convergence(self):
             r2 = self.avals[-2]
         except IndexError:
             return False
-
         if r2 > 0:
             return abs((r2 - r1) / r2) < self._avals_tolerance
         else:
@@ -201,7 +204,7 @@ def _refine_component(self, model, target, parameterisation):
     def run(self):
         """Refine the model."""
         if self.parameters_to_refine == ["a", "b"]:
-            for n in range(20):  # usually converges in around 5 cycles
+            for n in range(50):  # usually converges in around 5 cycles
                 self._refine_a()
                 # now update in model
                 self.avals.append(self.model.components["a"].parameters[0])
diff --git a/src/dials/algorithms/scaling/error_model/error_model.py b/src/dials/algorithms/scaling/error_model/error_model.py
index ec490ec228..b42a4edb3b 100644
--- a/src/dials/algorithms/scaling/error_model/error_model.py
+++ b/src/dials/algorithms/scaling/error_model/error_model.py
@@ -42,6 +42,15 @@
                   "determine both parameters concurrently. If minimisation=None,"
                   "the model parameters are fixed to their initial or given values."
           .expert_level = 3
+        stills {
+            min_Isigma = 2.0
+              .type=float
+              .help = "Minimum uncorrected I/sigma for individual reflections used in error model optimisation"
+            min_multiplicity = 4
+              .type = int
+              .help = "Only reflections with at least this multiplicity (after Isigma filtering) are"
+                      "used in error model optimisation."
+        }
         min_Ih = 25.0
             .type = float
             .help = "Reflections with expected intensity above this value are to."
@@ -248,7 +257,10 @@ def _create_summation_matrix(self):
         n = self.Ih_table.size
         self.binning_info["n_reflections"] = n
         summation_matrix = sparse.matrix(n, self.n_bins)
+        # calculate expected intensity value in pixels on scale of each image
         Ih = self.Ih_table.Ih_values * self.Ih_table.inverse_scale_factors
+        if "partiality" in self.Ih_table.Ih_table:
+            Ih *= self.Ih_table.Ih_table["partiality"].to_numpy()
         size_order = flex.sort_permutation(flumpy.from_numpy(Ih), reverse=True)
         Imax = Ih.max()
         min_Ih = Ih.min()
@@ -383,7 +395,6 @@ def __init__(self, a=None, b=None, basic_params=None):
         see if a user specified fixed value is set. If no fixed values are given
         then the model starts with the default parameters a=1.0 b=0.02
         """
-
         self.free_components = []
         self.sortedy = None
         self.sortedx = None
@@ -408,14 +419,19 @@ def __init__(self, a=None, b=None, basic_params=None):
         if not basic_params.b:
             self._active_parameters.append("b")
 
-    def configure_for_refinement(self, Ih_table, min_partiality=0.4):
+    def configure_for_refinement(
+        self, Ih_table, min_partiality=0.4, use_stills_filtering=False
+    ):
         """
         Add data to allow error model refinement.
 
         Raises: ValueError if insufficient reflections left after filtering.
         """
         self.filtered_Ih_table = self.filter_unsuitable_reflections(
-            Ih_table, self.params, min_partiality
+            Ih_table,
+            self.params,
+            min_partiality,
+            use_stills_filtering,
         )
         # always want binning info so that can calc for output.
         self.binner = ErrorModelBinner(
@@ -455,8 +471,19 @@ def n_refl(self):
         return self.filtered_Ih_table.size
 
     @classmethod
-    def filter_unsuitable_reflections(cls, Ih_table, error_params, min_partiality):
+    def filter_unsuitable_reflections(
+        cls, Ih_table, error_params, min_partiality, use_stills_filtering
+    ):
         """Filter suitable reflections for minimisation."""
+        if use_stills_filtering:
+            return filter_unsuitable_reflections_stills(
+                Ih_table,
+                error_params.stills.min_multiplicity,
+                error_params.stills.min_Isigma,
+                min_partiality=min_partiality,
+                min_reflections_required=cls.min_reflections_required,
+                min_Ih=error_params.min_Ih,
+            )
         return filter_unsuitable_reflections(
             Ih_table,
             min_Ih=error_params.min_Ih,
@@ -570,6 +597,49 @@ def binned_variances_summary(self):
         )
 
 
+def filter_unsuitable_reflections_stills(
+    Ih_table,
+    min_multiplicity,
+    min_Isigma,
+    min_partiality,
+    min_reflections_required,
+    min_Ih,
+):
+    """Filter suitable reflections for minimisation."""
+
+    if "partiality" in Ih_table.Ih_table:
+        sel = Ih_table.Ih_table["partiality"].to_numpy() > min_partiality
+        Ih_table = Ih_table.select(sel)
+
+    sel = (Ih_table.intensities / (Ih_table.variances**0.5)) >= min_Isigma
+    Ih_table = Ih_table.select(sel)
+
+    Ih = Ih_table.Ih_values * Ih_table.inverse_scale_factors
+    if "partiality" in Ih_table.Ih_table:
+        Ih *= Ih_table.Ih_table["partiality"].to_numpy()
+    sel = Ih > min_Ih
+    Ih_table = Ih_table.select(sel)
+
+    n_h = Ih_table.calc_nh()
+    sigmaprime = calc_sigmaprime([1.0, 0.0], Ih_table)
+    delta_hl = calc_deltahl(Ih_table, n_h, sigmaprime)
+    # Optimise on the central bulk distribution of the data - avoid the few
+    # reflections in the long tails.
+    sel = np.abs(delta_hl) < 6.0
+    Ih_table = Ih_table.select(sel)
+
+    sel = Ih_table.calc_nh() >= min_multiplicity
+    Ih_table = Ih_table.select(sel)
+    n = Ih_table.size
+
+    if n < min_reflections_required:
+        raise ValueError(
+            "Insufficient reflections (%s < %s) to perform error modelling."
+            % (n, min_reflections_required)
+        )
+    return Ih_table
+
+
 def filter_unsuitable_reflections(
     Ih_table, min_Ih, min_partiality, min_reflections_required
 ):
diff --git a/src/dials/algorithms/scaling/scaler.py b/src/dials/algorithms/scaling/scaler.py
index d453f65e48..487ad12546 100644
--- a/src/dials/algorithms/scaling/scaler.py
+++ b/src/dials/algorithms/scaling/scaler.py
@@ -350,6 +350,9 @@ def __init__(self, params, experiment, reflection_table, for_multi=False):
         self.free_set_selection = flex.bool(self.n_suitable_refl, False)
         self._free_Ih_table = None  # An array of len n_suitable_refl
         self._configure_model_and_datastructures(for_multi=for_multi)
+        self.is_still = True
+        if self._experiment.scan and self._experiment.scan.get_oscillation()[1] != 0.0:
+            self.is_still = False
         if self.params.weighting.error_model.error_model:
             # reload current error model parameters, or create new null
             self.experiment.scaling_model.load_error_model(
@@ -380,7 +383,10 @@ def perform_error_optimisation(self, update_Ih=True):
         Ih_table, _ = self._create_global_Ih_table(anomalous=True, remove_outliers=True)
         try:
             model = run_error_model_refinement(
-                self._experiment.scaling_model.error_model, Ih_table
+                self._experiment.scaling_model.error_model,
+                Ih_table,
+                self.params.reflection_selection.min_partiality,
+                use_stills_filtering=self.is_still,
             )
         except (ValueError, RuntimeError) as e:
             logger.info(e)
@@ -1500,7 +1506,12 @@ def perform_error_optimisation(self, update_Ih=True):
                 continue
             tables = [s.get_valid_reflections().select(~s.outliers) for s in scalers]
             space_group = scalers[0].experiment.crystal.get_space_group()
-            Ih_table = IhTable(tables, space_group, anomalous=True)
+            Ih_table = IhTable(
+                tables,
+                space_group,
+                anomalous=True,
+                additional_cols=["partiality"],
+            )
             if len(minimisation_groups) == 1:
                 logger.info("Determining a combined error model for all datasets")
             else:
@@ -1509,7 +1520,10 @@ def perform_error_optimisation(self, update_Ih=True):
                 )
             try:
                 model = run_error_model_refinement(
-                    scalers[0]._experiment.scaling_model.error_model, Ih_table
+                    scalers[0]._experiment.scaling_model.error_model,
+                    Ih_table,
+                    min_partiality=self.params.reflection_selection.min_partiality,
+                    use_stills_filtering=scalers[0].is_still,
                 )
             except (ValueError, RuntimeError) as e:
                 logger.info(e)
diff --git a/src/dials/command_line/refine_error_model.py b/src/dials/command_line/refine_error_model.py
index b3ccac4597..c7894c3f7d 100644
--- a/src/dials/command_line/refine_error_model.py
+++ b/src/dials/command_line/refine_error_model.py
@@ -55,6 +55,9 @@
 phil_scope = phil.parse(
     """
     include scope dials.algorithms.scaling.error_model.error_model.phil_scope
+    min_partiality = 0.4
+        .type = float
+        .help = "Use reflections with at least this partiality in error model optimisation."
     intensity_choice = *profile sum combine
         .type = choice
         .help = "Use profile or summation intensities"
@@ -101,13 +104,23 @@ def refine_error_model(params, experiments, reflection_tables):
         reflection_tables[i] = table
     space_group = experiments[0].crystal.get_space_group()
     Ih_table = IhTable(
-        reflection_tables, space_group, additional_cols=["partiality"], anomalous=True
+        reflection_tables,
+        space_group,
+        additional_cols=["partiality"],
+        anomalous=True,
     )
 
+    use_stills_filtering = True
+    for expt in experiments:
+        if expt.scan and expt.scan.get_oscillation()[1] != 0.0:
+            use_stills_filtering = False
+            break
     # now do the error model refinement
     model = BasicErrorModel(basic_params=params.basic)
     try:
-        model = run_error_model_refinement(model, Ih_table)
+        model = run_error_model_refinement(
+            model, Ih_table, params.min_partiality, use_stills_filtering
+        )
     except (ValueError, RuntimeError) as e:
         logger.info(e)
     else:
diff --git a/tests/algorithms/scaling/test_error_model.py b/tests/algorithms/scaling/test_error_model.py
index eafc142f39..979f0303ab 100644
--- a/tests/algorithms/scaling/test_error_model.py
+++ b/tests/algorithms/scaling/test_error_model.py
@@ -183,19 +183,24 @@ def test_error_model_on_simulated_data(
     )
 
 
-def test_errormodel(large_reflection_table, test_sg):
+@pytest.mark.parametrize("use_stills_filtering", [True, False])
+def test_errormodel(large_reflection_table, test_sg, use_stills_filtering):
     """Test the initialisation and methods of the error model."""
 
     Ih_table = IhTable([large_reflection_table], test_sg, nblocks=1)
     block = Ih_table.blocked_data_list[0]
     params = generated_param()
+    params.weighting.error_model.basic.stills.min_multiplicity = 2
+    params.weighting.error_model.basic.stills.min_Isigma = 0.0
     params.weighting.error_model.basic.n_bins = 2
     params.weighting.error_model.basic.min_Ih = 1.0
     em = BasicErrorModel
     em.min_reflections_required = 1
     error_model = em(basic_params=params.weighting.error_model.basic)
     error_model.min_reflections_required = 1
-    error_model.configure_for_refinement(block)
+    error_model.configure_for_refinement(
+        block, use_stills_filtering=use_stills_filtering
+    )
     assert error_model.binner.summation_matrix[0, 1] == 1
     assert error_model.binner.summation_matrix[1, 1] == 1
     assert error_model.binner.summation_matrix[2, 0] == 1
diff --git a/tests/command_line/test_ssx_reduction.py b/tests/command_line/test_ssx_reduction.py
index 49f007307c..09599afdea 100644
--- a/tests/command_line/test_ssx_reduction.py
+++ b/tests/command_line/test_ssx_reduction.py
@@ -91,3 +91,11 @@ def test_ssx_reduction(dials_data, tmp_path):
     )
     assert not result.returncode and not result.stderr
     assert (tmp_path / "compute_delta_cchalf.html").is_file()
+
+    # will not be able to refine error model due to lack of data, but should rather exit cleanly.
+    result = subprocess.run(
+        [shutil.which("dials.refine_error_model"), scale_expts, scale_refls],
+        cwd=tmp_path,
+        capture_output=True,
+    )
+    assert not result.returncode and not result.stderr

From b624db84b968ddd31b84a8031b6116b1c074c168 Mon Sep 17 00:00:00 2001
From: David McDonagh <60879630+toastisme@users.noreply.github.com>
Date: Thu, 2 May 2024 11:31:59 +0100
Subject: [PATCH 17/32] Fix dials.show for time of flight experiments. (#2660)

* Fix dials.show for time of flight experiments.
---
 newsfragments/2660.bugfix      |  1 +
 src/dials/command_line/show.py | 19 ++++++++++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)
 create mode 100644 newsfragments/2660.bugfix

diff --git a/newsfragments/2660.bugfix b/newsfragments/2660.bugfix
new file mode 100644
index 0000000000..5e29f576d1
--- /dev/null
+++ b/newsfragments/2660.bugfix
@@ -0,0 +1 @@
+Fix `dials.show` beam checks for time of flight experiments.
diff --git a/src/dials/command_line/show.py b/src/dials/command_line/show.py
index e04d335d2e..8883bb1638 100644
--- a/src/dials/command_line/show.py
+++ b/src/dials/command_line/show.py
@@ -7,6 +7,7 @@
 
 import iotbx.phil
 from cctbx import uctbx
+from dxtbx.model import ExperimentType
 from dxtbx.model.experiment_list import ExperimentListFactory
 from scitbx.math import five_number_summary
 
@@ -87,11 +88,15 @@ def beam_centre_raw_image_px(detector, s0):
     return x_px + offset[0], y_px + offset[1]
 
 
-def show_beam(detector, beam):
+def show_beam(detector, beam, experiment_type: ExperimentType | None = None):
 
     # standard static beam model string
     s = str(beam)
 
+    # time of flight experiments have no scan points
+    if experiment_type == ExperimentType.TOF:
+        return s
+
     # report whether the beam is scan-varying
     if beam.num_scan_points > 0:
         s += "    s0 sampled at " + str(beam.num_scan_points) + " scan points\n"
@@ -263,16 +268,20 @@ def show_experiments(experiments, show_scan_varying=False):
         except AttributeError:
             pass
         text.append(str(expt.detector))
+        if expt.get_type() == ExperimentType.TOF:
+            min_wavelength = min(expt.beam.get_wavelength_range())
+            s0 = tuple([i / min_wavelength for i in expt.beam.get_unit_s0()])
+        else:
+            s0 = expt.beam.get_s0()
         text.append(
-            "Max resolution (at corners): %f"
-            % (expt.detector.get_max_resolution(expt.beam.get_s0()))
+            "Max resolution (at corners): %f" % (expt.detector.get_max_resolution(s0))
         )
         text.append(
             "Max resolution (inscribed):  %f"
-            % (expt.detector.get_max_inscribed_resolution(expt.beam.get_s0()))
+            % (expt.detector.get_max_inscribed_resolution(s0))
         )
         text.append("")
-        text.append(show_beam(expt.detector, expt.beam))
+        text.append(show_beam(expt.detector, expt.beam, expt.get_type()))
         if expt.scan is not None:
             text.append(str(expt.scan))
         if expt.goniometer is not None:

From 8e800095177707c270df70bb37bf2c85ccef80ad Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Wed, 8 May 2024 14:59:49 +0100
Subject: [PATCH 18/32] Add weighting option to cosym CC calculation (#2666)

---
 newsfragments/2666.feature                    |   1 +
 .../algorithms/scaling/scaling_library.py     |  57 ++++++
 .../algorithms/symmetry/cosym/__init__.py     |  24 ++-
 src/dials/algorithms/symmetry/cosym/target.py | 186 +++++++++++++++++-
 tests/command_line/test_cosym.py              |  18 +-
 5 files changed, 277 insertions(+), 9 deletions(-)
 create mode 100644 newsfragments/2666.feature

diff --git a/newsfragments/2666.feature b/newsfragments/2666.feature
new file mode 100644
index 0000000000..01b33456da
--- /dev/null
+++ b/newsfragments/2666.feature
@@ -0,0 +1 @@
+``dials.cosym``: Add alternative weighting during cosym CC calculation
diff --git a/src/dials/algorithms/scaling/scaling_library.py b/src/dials/algorithms/scaling/scaling_library.py
index fcfc6ef2ec..d8cdd59b8f 100644
--- a/src/dials/algorithms/scaling/scaling_library.py
+++ b/src/dials/algorithms/scaling/scaling_library.py
@@ -15,6 +15,7 @@
 import math
 from copy import deepcopy
 from dataclasses import dataclass
+from typing import List, Tuple
 from unittest.mock import Mock
 
 import numpy as np
@@ -486,6 +487,62 @@ def calc_rsplit(cls, this, other, assume_index_matching=False, use_binning=False
             )
         return results
 
+    @classmethod
+    def weighted_cchalf(
+        cls, this, other, assume_index_matching=False, use_binning=False
+    ) -> List[Tuple]:
+        if not use_binning:
+            assert other.indices().size() == this.indices().size()
+            if this.data().size() == 0:
+                return None, None
+
+            if assume_index_matching:
+                (o, c) = (this, other)
+            else:
+                (o, c) = this.common_sets(other=other, assert_no_singles=True)
+
+            # The case where the denominator is less or equal to zero is
+            # pathological and should never arise in practice.
+            assert len(o.sigmas())
+            assert len(c.sigmas())
+            n = len(o.data())
+            if n == 1:
+                return None, 1
+            v_o = flex.pow2(o.sigmas())
+            v_c = flex.pow2(c.sigmas())
+            joint_w = 1.0 / (v_o + v_c)
+            sumjw = flex.sum(joint_w)
+            norm_jw = joint_w / sumjw
+            xbar = flex.sum(o.data() * norm_jw)
+            ybar = flex.sum(c.data() * norm_jw)
+            dx = o.data() - xbar
+            dy = c.data() - ybar
+            sxy = flex.sum(dx * dy * norm_jw)
+
+            sx = flex.sum(flex.pow2(dx) * norm_jw)
+            sy = flex.sum(flex.pow2(dy) * norm_jw)
+            if sx == 0.0 or sy == 0.0:
+                return None, 0
+            # effective sample size of weighted sample
+            # Kish, Leslie. 1965. Survey Sampling New York: Wiley. (R documentation)
+            # neff = sum(w)^2 / sum(w^2). But sum(w) == 1 as normalised already
+            neff = 1 / flex.sum(flex.pow2(norm_jw))
+            return [(sxy / ((sx * sy) ** 0.5), neff)]
+
+        assert this.binner is not None
+        results = []
+        for i_bin in this.binner().range_all():
+            sel = this.binner().selection(i_bin)
+            results.append(
+                cls.weighted_cchalf(
+                    this.select(sel),
+                    other.select(sel),
+                    assume_index_matching=assume_index_matching,
+                    use_binning=False,
+                )[0]
+            )
+        return results
+
 
 def merging_stats_from_scaled_array(
     scaled_miller_array,
diff --git a/src/dials/algorithms/symmetry/cosym/__init__.py b/src/dials/algorithms/symmetry/cosym/__init__.py
index a2b308272a..a89c71ddf8 100644
--- a/src/dials/algorithms/symmetry/cosym/__init__.py
+++ b/src/dials/algorithms/symmetry/cosym/__init__.py
@@ -23,6 +23,7 @@
 from scitbx import matrix
 
 import dials.util
+import dials.util.system
 from dials.algorithms.indexing.symmetry import find_matching_symmetry
 from dials.algorithms.symmetry import median_unit_cell, symmetry_base
 from dials.algorithms.symmetry.cosym import engine as cosym_engine
@@ -79,6 +80,15 @@
 weights = count standard_error
   .type = choice
   .short_caption = "Weights"
+  .help = "If not None, a weights matrix is used in the cosym procedure."
+          "weights=count uses the number of reflections used to calculate a pairwise correlation coefficient as its weight"
+          "weights=standard_error uses the reciprocal of the standard error as the weight. The standard error is given by"
+          "the sqrt of (1-CC*2)/(n-2), where (n-2) are the degrees of freedom in a pairwise CC calculation."
+cc_weights = None sigma
+  .type = choice
+  .help = "If not None, a weighted cc-half formula is used for calculating pairwise correlation coefficients and degrees of"
+          "freedom in the cosym procedure."
+          "weights=sigma uses the intensity uncertainties to perform inverse variance weighting during the cc calculation."
 
 min_pairs = 3
   .type = int(value_min=1)
@@ -99,10 +109,9 @@
     .short_caption = "Maximum number of calls"
 }
 
-nproc = None
+nproc = Auto
   .type = int(value_min=1)
-  .help = "Deprecated"
-  .deprecated = True
+  .help = "Number of processes"
 """
 )
 
@@ -208,6 +217,13 @@ def _map_space_group_to_input_cell(intensities, space_group):
                 self.intensities, self.params.lattice_group.group()
             )
             self.params.lattice_group = tmp_intensities.space_group_info()
+        # N.B. currently only multiprocessing used if cc_weights=sigma
+        if self.params.nproc is Auto:
+            if self.params.cc_weights == "sigma":
+                params.nproc = dials.util.system.CPU_COUNT
+                logger.info("Setting nproc={}".format(params.nproc))
+            else:
+                params.nproc = 1
 
     def _intialise_target(self):
         if self.params.dimensions is Auto:
@@ -229,6 +245,8 @@ def _intialise_target(self):
             lattice_group=self.lattice_group,
             dimensions=dimensions,
             weights=self.params.weights,
+            cc_weights=self.params.cc_weights,
+            nproc=self.params.nproc,
         )
 
     def _determine_dimensions(self):
diff --git a/src/dials/algorithms/symmetry/cosym/target.py b/src/dials/algorithms/symmetry/cosym/target.py
index 33258fdecb..3c810689ac 100644
--- a/src/dials/algorithms/symmetry/cosym/target.py
+++ b/src/dials/algorithms/symmetry/cosym/target.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import concurrent.futures
 import copy
 import itertools
 import logging
@@ -15,6 +16,120 @@
 from cctbx.array_family import flex
 
 logger = logging.getLogger(__name__)
+from scipy import sparse
+
+from dials.algorithms.scaling.scaling_library import ExtendedDatasetStatistics
+
+
+def _lattice_lower_upper_index(lattices, lattice_id):
+    lower_index = int(lattices[lattice_id])
+    upper_index = None
+    if lattice_id < len(lattices) - 1:
+        upper_index = int(lattices[lattice_id + 1])
+    else:
+        assert lattice_id == len(lattices) - 1
+    return lower_index, upper_index
+
+
+def _compute_rij_matrix_one_row_block(
+    i, lattices, data, indices, sym_ops, patterson_group, weights=True, min_pairs=3
+):
+    cs = data.crystal_symmetry()
+    n_lattices = len(lattices)
+    rij_cache = {}
+
+    NN = n_lattices * len(sym_ops)
+
+    rij_row = []
+    rij_col = []
+    rij_data = []
+    wij = None
+    if weights:
+        wij_row = []
+        wij_col = []
+        wij_data = []
+
+    i_lower, i_upper = _lattice_lower_upper_index(lattices, i)
+    intensities_i = data.data()[i_lower:i_upper]
+    sigmas_i = data.sigmas()[i_lower:i_upper]
+    cb_ops = [sgtbx.change_of_basis_op(cb_op_k) for cb_op_k in sym_ops]
+
+    for j in range(n_lattices):
+
+        j_lower, j_upper = _lattice_lower_upper_index(lattices, j)
+        intensities_j = data.data()[j_lower:j_upper]
+        sigmas_j = data.sigmas()[j_lower:j_upper]
+
+        for k, cb_op_k in enumerate(cb_ops):
+
+            indices_i = indices[cb_op_k.as_xyz()][i_lower:i_upper]
+
+            for kk, cb_op_kk in enumerate(cb_ops):
+                if i == j and k == kk:
+                    # don't include correlation of dataset with itself
+                    continue
+
+                ik = i + (n_lattices * k)
+                jk = j + (n_lattices * kk)
+
+                key = (i, j, str(cb_op_k.inverse() * cb_op_kk))
+                if key in rij_cache:
+                    cc, n = rij_cache[key]
+                else:
+                    indices_j = indices[cb_op_kk.as_xyz()][j_lower:j_upper]
+
+                    matches = miller.match_indices(indices_i, indices_j)
+                    pairs = matches.pairs()
+                    isel_i = pairs.column(0)
+                    isel_j = pairs.column(1)
+                    isel_i = isel_i.select(
+                        patterson_group.epsilon(indices_i.select(isel_i)) == 1
+                    )
+                    isel_j = isel_j.select(
+                        patterson_group.epsilon(indices_j.select(isel_j)) == 1
+                    )
+                    ms = miller.set(
+                        crystal_symmetry=cs, indices=indices_j.select(isel_j)
+                    )
+                    ma_j = miller.array(
+                        miller_set=ms,
+                        data=intensities_j.select(isel_j),
+                        sigmas=sigmas_j.select(isel_j),
+                    )
+                    ms = miller.set(
+                        crystal_symmetry=cs, indices=indices_i.select(isel_i)
+                    )
+                    ma_i = miller.array(
+                        miller_set=ms,
+                        data=intensities_i.select(isel_i),
+                        sigmas=sigmas_i.select(isel_i),
+                    )
+                    corr, neff = ExtendedDatasetStatistics.weighted_cchalf(
+                        ma_i, ma_j, assume_index_matching=True
+                    )[0]
+                    if neff:
+                        cc = corr
+                        n = neff
+                    else:
+                        n, cc = (None, None)
+
+                    rij_cache[key] = (cc, n)
+
+                if n is None or cc is None or (min_pairs is not None and n < min_pairs):
+                    continue
+                if weights:
+                    wij_row.append(ik)
+                    wij_col.append(jk)
+                    wij_data.append(n)
+                rij_row.append(ik)
+                rij_col.append(jk)
+                rij_data.append(cc)
+
+    rij = sparse.coo_matrix((rij_data, (rij_row, rij_col)), shape=(NN, NN))
+    if weights:
+        wij = sparse.coo_matrix((wij_data, (wij_row, wij_col)), shape=(NN, NN))
+
+    return rij, wij
 
 
 class Target:
@@ -32,6 +147,8 @@ def __init__(
         min_pairs=3,
         lattice_group=None,
         dimensions=None,
+        nproc=1,
+        cc_weights=None,
     ):
         r"""Initialise a Target object.
 
@@ -60,6 +177,7 @@ def __init__(
             assert weights in ("count", "standard_error")
         self._weights = weights
         self._min_pairs = min_pairs
+        self._nproc = nproc
 
         data = intensities.customized_copy(anomalous_flag=False)
         cb_op_to_primitive = data.change_of_basis_op_to_primitive_setting()
@@ -70,8 +188,11 @@ def __init__(
         order = lattice_ids.argsort().astype(np.uint64)
         sorted_data = data.data().select(flex.size_t(order))
         sorted_indices = data.indices().select(flex.size_t(order))
+        sorted_sigmas = data.sigmas().select(flex.size_t(order))
         self._lattice_ids = lattice_ids[order]
-        self._data = data.customized_copy(indices=sorted_indices, data=sorted_data)
+        self._data = data.customized_copy(
+            indices=sorted_indices, data=sorted_data, sigmas=sorted_sigmas
+        )
         assert isinstance(self._data.indices(), type(flex.miller_index()))
         assert isinstance(self._data.data(), type(flex.double()))
 
@@ -103,8 +224,10 @@ def __init__(
         logger.debug(
             "Patterson group: %s", self._patterson_group.info().symbol_and_number()
         )
-
-        self.rij_matrix, self.wij_matrix = self._compute_rij_wij()
+        if cc_weights == "sigma":
+            self.rij_matrix, self.wij_matrix = self._compute_rij_wij_ccweights()
+        else:
+            self.rij_matrix, self.wij_matrix = self._compute_rij_wij()
 
     def set_dimensions(self, dimensions):
         """Set the number of dimensions for analysis.
@@ -145,6 +268,63 @@ def _generate_twin_operators(self, lattice_symmetry_max_delta=5.0):
 
         return operators
 
+    def _compute_rij_wij_ccweights(self):
+        # Use flex-based methods for calculating matrices.
+        # Pre-calculate miller indices after application of each cb_op. Only calculate
+        # this once per cb_op instead of on-the-fly every time we need it.
+        indices = {}
+        epsilons = {}
+        space_group_type = self._data.space_group().type()
+        for cb_op in self.sym_ops:
+            cb_op = sgtbx.change_of_basis_op(cb_op)
+            indices_reindexed = cb_op.apply(self._data.indices())
+            miller.map_to_asu(space_group_type, False, indices_reindexed)
+            cb_op_str = cb_op.as_xyz()
+            indices[cb_op_str] = indices_reindexed
+            epsilons[cb_op_str] = self._patterson_group.epsilon(indices_reindexed)
+
+        rij_matrix = None
+        wij_matrix = None
+
+        with concurrent.futures.ProcessPoolExecutor(max_workers=self._nproc) as pool:
+            futures = [
+                pool.submit(
+                    _compute_rij_matrix_one_row_block,
+                    i,
+                    self._lattices,
+                    self._data,
+                    indices,
+                    self.sym_ops,
+                    self._patterson_group,
+                    weights=True,
+                    min_pairs=self._min_pairs,
+                )
+                for i, _ in enumerate(self._lattices)
+            ]
+            for future in concurrent.futures.as_completed(futures):
+                rij, wij = future.result()
+
+                if rij_matrix is None:
+                    rij_matrix = rij
+                else:
+                    rij_matrix += rij
+                if wij is not None:
+                    if wij_matrix is None:
+                        wij_matrix = wij
+                    else:
+                        wij_matrix += wij
+
+        rij_matrix = rij_matrix.toarray().astype(np.float64)
+        if wij_matrix is not None:
+            wij_matrix = wij_matrix.toarray().astype(np.float64)
+            if self._weights == "standard_error":
+                sel = np.where(wij_matrix > 2)
+                se = np.sqrt((1 - np.square(rij_matrix[sel])) / (wij_matrix[sel] - 2))
+                wij_matrix = np.zeros_like(rij_matrix)
+                wij_matrix[sel] = 1 / se
+
+        return rij_matrix, wij_matrix
+
     def _compute_rij_wij(self, use_cache=True):
         """Compute the rij_wij matrix.
 
diff --git a/tests/command_line/test_cosym.py b/tests/command_line/test_cosym.py
index 1981cf47b7..9670ebe0cc 100644
--- a/tests/command_line/test_cosym.py
+++ b/tests/command_line/test_cosym.py
@@ -17,11 +17,23 @@
 
 
 @pytest.mark.parametrize(
-    "space_group,engine", [(None, "scitbx"), ("P 1", "scipy"), ("P 4", "scipy")]
+    "space_group,engine,weights,cc_weights",
+    [
+        (None, "scitbx", None, None),
+        ("P 1", "scipy", None, None),
+        ("P 4", "scipy", "standard_error", "sigma"),
+    ],
 )
-def test_cosym(dials_data, run_in_tmp_path, space_group, engine):
+def test_cosym(dials_data, run_in_tmp_path, space_group, engine, weights, cc_weights):
     mcp = dials_data("multi_crystal_proteinase_k", pathlib=True)
-    args = ["space_group=" + str(space_group), "seed=0", f"engine={engine}"]
+    args = [
+        "space_group=" + str(space_group),
+        "seed=0",
+        "nproc=1",
+        f"engine={engine}",
+        f"weights={weights}",
+        f"cc_weights={cc_weights}",
+    ]
     for i in [1, 2, 3, 4, 5, 7, 8, 10]:
         args.append(os.fspath(mcp / f"experiments_{i}.json"))
         args.append(os.fspath(mcp / f"reflections_{i}.pickle"))

From 3e0807b8fcd74548f975ef5373352301d2463c96 Mon Sep 17 00:00:00 2001
From: Graeme Winter <graeme.winter@gmail.com>
Date: Tue, 14 May 2024 14:48:57 +0100
Subject: [PATCH 19/32] Trap d_min > d_max in masking (#2664)

* Trap d_min > d_max in masking

Stop with an error if d_min > d_max as the user almost certainly did not
mean this. If they are equal still no spots will be found but no error will
be raised. Fixes #2663
---
 newsfragments/2664.bugfix | 1 +
 src/dials/util/masking.py | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 newsfragments/2664.bugfix

diff --git a/newsfragments/2664.bugfix b/newsfragments/2664.bugfix
new file mode 100644
index 0000000000..99af372143
--- /dev/null
+++ b/newsfragments/2664.bugfix
@@ -0,0 +1 @@
+d_min, d_max in masking: raise error if d_min > d_max so no spots will be found
diff --git a/src/dials/util/masking.py b/src/dials/util/masking.py
index 11e800e5af..6c54cbf0e4 100644
--- a/src/dials/util/masking.py
+++ b/src/dials/util/masking.py
@@ -3,6 +3,7 @@
 import copy
 import logging
 import math
+import sys
 import time
 from collections import namedtuple
 from typing import Tuple
@@ -215,6 +216,13 @@ def generate_mask(
     detector = imageset.get_detector()
     beam = imageset.get_beam()
 
+    # validate that the input parameters are sensible
+    if params.d_min is not None and params.d_max is not None:
+        if params.d_min > params.d_max:
+            sys.exit(
+                f"d_min = {params.d_min} > d_max = {params.d_max}: no spots will be found"
+            )
+
     # Create the mask for each panel
     masks = []
     for index, panel in enumerate(detector):

From 81f4287101ebfa15cd26c6f63342497c449c2116 Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Wed, 15 May 2024 17:03:43 +0100
Subject: [PATCH 20/32] Reset reflection table ids in image grouping code 
 (#2667)

Reset reflection table ids in image grouping code to enable use in scripts
---
 newsfragments/2667.misc          | 1 +
 src/dials/util/image_grouping.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 newsfragments/2667.misc

diff --git a/newsfragments/2667.misc b/newsfragments/2667.misc
new file mode 100644
index 0000000000..6ea1b19665
--- /dev/null
+++ b/newsfragments/2667.misc
@@ -0,0 +1 @@
+Reset ids in image grouping code to enable use in scripts
diff --git a/src/dials/util/image_grouping.py b/src/dials/util/image_grouping.py
index 8279fb59a0..d15f4678b0 100644
--- a/src/dials/util/image_grouping.py
+++ b/src/dials/util/image_grouping.py
@@ -658,6 +658,7 @@ def save_subset(input_: SplittingIterable) -> Optional[Tuple[str, FilePair]]:
         sel_identifiers = list(identifiers.select(flumpy.from_numpy(sel)))
         expts.select_on_experiment_identifiers(sel_identifiers)
         refls = refls.select_on_experiment_identifiers(sel_identifiers)
+        refls.reset_ids()
     if expts:
         exptout = (
             input_.working_directory

From 1213accb0de4001f0559188b691d34ca90d2b0a8 Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Wed, 22 May 2024 14:42:27 +0100
Subject: [PATCH 21/32] Fix recent ccweights code for uncalculable correlations
 (#2668)

Consistent return type for uncalculable ccs
---
 newsfragments/2668.bugfix                       | 1 +
 src/dials/algorithms/scaling/scaling_library.py | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 newsfragments/2668.bugfix

diff --git a/newsfragments/2668.bugfix b/newsfragments/2668.bugfix
new file mode 100644
index 0000000000..00980f672f
--- /dev/null
+++ b/newsfragments/2668.bugfix
@@ -0,0 +1 @@
+``dials.cosym``: Make function return structure correct in recently added cc_weights option
diff --git a/src/dials/algorithms/scaling/scaling_library.py b/src/dials/algorithms/scaling/scaling_library.py
index d8cdd59b8f..193887297e 100644
--- a/src/dials/algorithms/scaling/scaling_library.py
+++ b/src/dials/algorithms/scaling/scaling_library.py
@@ -494,7 +494,7 @@ def weighted_cchalf(
         if not use_binning:
             assert other.indices().size() == this.indices().size()
             if this.data().size() == 0:
-                return None, None
+                return [(None, 0)]
 
             if assume_index_matching:
                 (o, c) = (this, other)
@@ -507,7 +507,7 @@ def weighted_cchalf(
             assert len(c.sigmas())
             n = len(o.data())
             if n == 1:
-                return None, 1
+                return [(None, 1)]
             v_o = flex.pow2(o.sigmas())
             v_c = flex.pow2(c.sigmas())
             joint_w = 1.0 / (v_o + v_c)
@@ -522,7 +522,7 @@ def weighted_cchalf(
             sx = flex.sum(flex.pow2(dx) * norm_jw)
             sy = flex.sum(flex.pow2(dy) * norm_jw)
             if sx == 0.0 or sy == 0.0:
-                return None, 0
+                return [(None, 1)]
             # effective sample size of weighted sample
             # Kish, Leslie. 1965. Survey Sampling New York: Wiley. (R documentation)
             # neff = sum(w)^2 / sum(w^2). But sum(w) == 1 as normalised already

From b33fcdcd0dc885133f3c7ff973cc1bbc0a024514 Mon Sep 17 00:00:00 2001
From: David Waterman <dagewa@users.noreply.github.com>
Date: Wed, 22 May 2024 16:57:03 +0100
Subject: [PATCH 22/32] Add reflection selection criteria to
 `dials.find_rotation_axis` (#2670)

* Delete potentially large objects as soon as they are not needed
* Include a d_max filter to remove low res noisy regions
* Set a max_sample_size, default 2000, taking lowest res reflections as these seem best for the search
---
 newsfragments/2670.bugfix                     |  1 +
 src/dials/command_line/find_rotation_axis.py  | 59 +++++++++++++------
 tests/command_line/test_find_rotation_axis.py |  4 +-
 3 files changed, 45 insertions(+), 19 deletions(-)
 create mode 100644 newsfragments/2670.bugfix

diff --git a/newsfragments/2670.bugfix b/newsfragments/2670.bugfix
new file mode 100644
index 0000000000..2f76fa6ed4
--- /dev/null
+++ b/newsfragments/2670.bugfix
@@ -0,0 +1 @@
+``dials.find_rotation_axis``: Add reflection selection criteria to avoid runs that use a very large amount of memory.
diff --git a/src/dials/command_line/find_rotation_axis.py b/src/dials/command_line/find_rotation_axis.py
index ce5dfa9131..cc8ed57d4b 100644
--- a/src/dials/command_line/find_rotation_axis.py
+++ b/src/dials/command_line/find_rotation_axis.py
@@ -31,8 +31,6 @@
 import dials.util
 import dials.util.log
 from dials.array_family import flex
-
-# from dials.array_family import flex
 from dials.util.options import ArgumentParser, reflections_and_experiments_from_files
 from dials.util.system import CPU_COUNT
 from dials.util.version import dials_version
@@ -43,11 +41,20 @@
 # Define the master PHIL scope for this program.
 phil_scope = libtbx.phil.parse(
     """
+d_max = 10.0
+    .type = float
+    .help = "Low resolution limit to apply to the data, intended to remove"
+            "noisy regions of low angle scatter"
+
 max_two_theta = 10.0
     .type = float
     .help = "Scattering angle limit to select reflections only in the central"
             "mostly flat region of the Ewald sphere surface"
 
+max_sample_size = 2000
+    .help = "The maximum number of reflections to use."
+    .type = int(value_min=1)
+
 view = False
     .type = bool
     .help = "View phi/theta histogram with current rotation axis (azimuth)"
@@ -129,12 +136,6 @@ def make_2d_rotmat(theta):
     return R
 
 
-def random_sample(arr, n):
-    """Select random sample of `n` rows from array"""
-    indices = np.random.choice(arr.shape[0], n, replace=False)
-    return arr[indices]
-
-
 def xyz2cyl(arr):
     """
     Take a set of reflections in XYZ and convert to polar (cylindrical)
@@ -155,7 +156,10 @@ def cylinder_histo(xyz, bins=(1000, 500)):
     """
     i, j = np.triu_indices(len(xyz), k=1)
     diffs = xyz[i] - xyz[j]
+    del i
+    del j
     polar = xyz2cyl(diffs)
+    del diffs
 
     px, py = polar.T
     H, xedges, yedges = np.histogram2d(
@@ -286,13 +290,20 @@ def optimise(
     return best_azimuth
 
 
-def extract_spot_data(reflections, experiments, max_two_theta):
+def extract_spot_data(reflections, experiments, params):
     """
-    From the spot positions, extract reciprocal space X, Y and angle positions
-    for each reflection up to the scattering angle max_two_theta
+    Filter reflections according to various parameters, then extract reciprocal
+    space X, Y and angle positions.
     """
+
+    max_two_theta = params.max_two_theta
+    d_max = params.d_max
+    max_sample_size = params.max_sample_size
+
     # Map reflections to reciprocal space
     reflections.centroid_px_to_mm(experiments)
+    reflections.map_centroids_to_reciprocal_space(experiments)
+    reflections["d"] = 1 / reflections["rlp"].norms()
 
     # Calculate scattering vectors
     reflections["s1"] = flex.vec3_double(len(reflections))
@@ -303,7 +314,6 @@ def extract_spot_data(reflections, experiments, max_two_theta):
             sel_expt = reflections["imageset_id"] == i
         else:
             sel_expt = reflections["id"] == i
-
         for i_panel in range(len(expt.detector)):
             sel = sel_expt & (panel_numbers == i_panel)
             x, y, _ = reflections["xyzobs.mm.value"].select(sel).parts()
@@ -314,11 +324,25 @@ def extract_spot_data(reflections, experiments, max_two_theta):
             reflections["2theta"].set_selected(sel, tt)
 
     # Filter reflections
-    full_len = len(reflections)
-    reflections = reflections.select(reflections["2theta"] <= max_two_theta)
-    if len(reflections) < full_len:
+    nref = len(reflections)
+    if max_two_theta:
+        reflections = reflections.select(reflections["2theta"] <= max_two_theta)
+    if len(reflections) < nref:
+        logger.info(
+            f"{len(reflections)} reflections with 2θ ≤ {max_two_theta}° selected from {nref} total"
+        )
+    nref = len(reflections)
+    if d_max:
+        reflections = reflections.select(reflections["d"] <= d_max)
+        logger.info(
+            f"{len(reflections)} reflections with d ≤ {d_max} Å selected from {nref} total"
+        )
+    nref = len(reflections)
+    if max_sample_size and max_sample_size < nref:
+        reflections.sort("d", reverse=True)
+        reflections = reflections[0:max_sample_size]
         logger.info(
-            f"{len(reflections)} reflections with 2θ ≤ {max_two_theta}° selected from {full_len} total"
+            f"{len(reflections)} lowest resolution reflections selected from {nref} total"
         )
 
     x, y, _ = reflections["s1"].parts()
@@ -392,7 +416,8 @@ def run(args=None, phil=phil_scope):
     wavelength = expt.beam.get_wavelength()
     rotx, roty, _ = expt.goniometer.get_rotation_axis()
     azimuth_current = np.degrees(np.arctan2(-roty, rotx))
-    arr = extract_spot_data(reflections, experiments, params.max_two_theta)
+    arr = extract_spot_data(reflections, experiments, params)
+    del reflections
 
     if params.azimuth is not None:
         azimuth_current = params.azimuth
diff --git a/tests/command_line/test_find_rotation_axis.py b/tests/command_line/test_find_rotation_axis.py
index 02aff530b9..c2a789990f 100644
--- a/tests/command_line/test_find_rotation_axis.py
+++ b/tests/command_line/test_find_rotation_axis.py
@@ -21,7 +21,7 @@ def test_find_rotation_axis(dials_data, run_in_tmp_path):
     )
 
     axis = matrix.col(experiments[0].goniometer.get_rotation_axis())
-    expected = matrix.col((-0.627963, -0.778243, 0))
+    expected = matrix.col((-0.626604, -0.779338, 0))
 
     assert axis.angle(expected) < 1e-6
 
@@ -50,6 +50,6 @@ def test_find_rotation_axis_multi_axis_goniometer(dials_data, run_in_tmp_path):
     )
 
     axis = matrix.col(experiments[0].goniometer.get_rotation_axis())
-    expected = matrix.col((-0.627963, -0.778243, 0))
+    expected = matrix.col((-0.626604, -0.779338, 0))
 
     assert axis.angle(expected) < 1e-6

From ef52efd7b5aac3412c7b24aa414b2aa4d62654a2 Mon Sep 17 00:00:00 2001
From: Jenkins <dlshudson@scisoft-mac-1.diamond.ac.uk>
Date: Thu, 23 May 2024 09:29:48 +0100
Subject: [PATCH 23/32] DIALS 3.19.1 Changelog

---
 CHANGELOG.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index fc369daa4a..19d271eed8 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,3 +1,9 @@
+DIALS 3.19.1 (2024-05-23)
+=========================
+
+No significant changes.
+
+
 DIALS 3.19.0 (2024-04-17)
 =========================
 

From 3610641af4a6f8f02b84dcf1f9d7f7d3c65d2c42 Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Thu, 6 Jun 2024 14:19:53 +0100
Subject: [PATCH 24/32] Fix issue with cosym cc_weights calculation for low n
 (#2673)

---
 newsfragments/2673.bugfix                     |  1 +
 src/dials/algorithms/symmetry/cosym/target.py | 48 +++++++++++++------
 2 files changed, 34 insertions(+), 15 deletions(-)
 create mode 100644 newsfragments/2673.bugfix

diff --git a/newsfragments/2673.bugfix b/newsfragments/2673.bugfix
new file mode 100644
index 0000000000..22cb041c81
--- /dev/null
+++ b/newsfragments/2673.bugfix
@@ -0,0 +1 @@
+``dials.cosym``: For cc_weights=sigma, ensure correct filtering based on min_pairs parameters
diff --git a/src/dials/algorithms/symmetry/cosym/target.py b/src/dials/algorithms/symmetry/cosym/target.py
index 3c810689ac..8de32e43a4 100644
--- a/src/dials/algorithms/symmetry/cosym/target.py
+++ b/src/dials/algorithms/symmetry/cosym/target.py
@@ -32,7 +32,14 @@ def _lattice_lower_upper_index(lattices, lattice_id):
 
 
 def _compute_rij_matrix_one_row_block(
-    i, lattices, data, indices, sym_ops, patterson_group, weights=True, min_pairs=3
+    i,
+    lattices,
+    data,
+    indices,
+    sym_ops,
+    patterson_group,
+    weights=True,
+    min_pairs=3,
 ):
     cs = data.crystal_symmetry()
     n_lattices = len(lattices)
@@ -74,7 +81,7 @@ def _compute_rij_matrix_one_row_block(
 
                 key = (i, j, str(cb_op_k.inverse() * cb_op_kk))
                 if key in rij_cache:
-                    cc, n = rij_cache[key]
+                    cc, n, n_pairs = rij_cache[key]
                 else:
                     indices_j = indices[cb_op_kk.as_xyz()][j_lower:j_upper]
 
@@ -104,18 +111,26 @@ def _compute_rij_matrix_one_row_block(
                         data=intensities_i.select(isel_i),
                         sigmas=sigmas_i.select(isel_i),
                     )
-                    corr, neff = ExtendedDatasetStatistics.weighted_cchalf(
-                        ma_i, ma_j, assume_index_matching=True
-                    )[0]
-                    if neff:
-                        cc = corr
-                        n = neff
-                    else:
+                    n_pairs = ma_i.size()
+                    if ma_i.size() < min_pairs:
                         n, cc = (None, None)
-
-                    rij_cache[key] = (cc, n)
-
-                if n is None or cc is None or (min_pairs is not None and n < min_pairs):
+                    else:
+                        corr, neff = ExtendedDatasetStatistics.weighted_cchalf(
+                            ma_i, ma_j, assume_index_matching=True
+                        )[0]
+                        if neff:
+                            cc = corr
+                            n = neff
+                        else:
+                            n, cc = (None, None)
+
+                    rij_cache[key] = (cc, n, n_pairs)
+
+                if (
+                    n is None
+                    or cc is None
+                    or (min_pairs is not None and n_pairs < min_pairs)
+                ):
                     continue
                 if weights:
                     wij_row.append(ik)
@@ -318,8 +333,11 @@ def _compute_rij_wij_ccweights(self):
         if wij_matrix is not None:
             wij_matrix = wij_matrix.toarray().astype(np.float64)
             if self._weights == "standard_error":
-                sel = np.where(wij_matrix > 2)
-                se = np.sqrt((1 - np.square(rij_matrix[sel])) / (wij_matrix[sel] - 2))
+                # N.B. using effective n due to sigma weighting, which can be below 2
+                # but approches 1 in the limit, so rather say efective sample size
+                # for standard error calc is n-1
+                sel = np.where(wij_matrix > 1)
+                se = np.sqrt((1 - np.square(rij_matrix[sel])) / (wij_matrix[sel] - 1))
                 wij_matrix = np.zeros_like(rij_matrix)
                 wij_matrix[sel] = 1 / se
 

From 32f5e094fd17b56de271f287cad1a77be9535dd9 Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Thu, 6 Jun 2024 14:21:57 +0100
Subject: [PATCH 25/32] Fix issue with cosym cc calculation when running with a
 space group (#2674)

When running dials.cosym with a space group, ensure data are
merged after mapping from P1 to the requested space group.

Fixes an issue for calculation of pairwise ccs with unequal numbers
of equivalent reflections between two datasets
---
 newsfragments/2674.bugfix                       |  1 +
 src/dials/algorithms/symmetry/cosym/__init__.py | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 newsfragments/2674.bugfix

diff --git a/newsfragments/2674.bugfix b/newsfragments/2674.bugfix
new file mode 100644
index 0000000000..28100bbc86
--- /dev/null
+++ b/newsfragments/2674.bugfix
@@ -0,0 +1 @@
+``dials.cosym``: Fix to give more accurate cc calculation when running with a space_group set
diff --git a/src/dials/algorithms/symmetry/cosym/__init__.py b/src/dials/algorithms/symmetry/cosym/__init__.py
index a89c71ddf8..95346fdeb7 100644
--- a/src/dials/algorithms/symmetry/cosym/__init__.py
+++ b/src/dials/algorithms/symmetry/cosym/__init__.py
@@ -21,6 +21,7 @@
 from cctbx.sgtbx.lattice_symmetry import metric_subgroups
 from libtbx import Auto
 from scitbx import matrix
+from scitbx.array_family import flex
 
 import dials.util
 import dials.util.system
@@ -209,6 +210,21 @@ def _map_space_group_to_input_cell(intensities, space_group):
             )
             self.input_space_group = self.intensities.space_group()
 
+            # ensure still unique after mapping - merge equivalents in the higher symmetry
+            new_intensities = None
+            new_dataset_ids = flex.int([])
+            for d in set(self.dataset_ids):
+                sel = self.dataset_ids == d
+                these_i = self.intensities.select(sel)
+                these_merged = these_i.merge_equivalents().array()
+                if not new_intensities:
+                    new_intensities = these_merged
+                else:
+                    new_intensities = new_intensities.concatenate(these_merged)
+                new_dataset_ids.extend(flex.int(these_merged.size(), d))
+            self.intensities = new_intensities
+            self.dataset_ids = new_dataset_ids
+
         else:
             self.input_space_group = None
 

From 42954cee869ae576e8ac5f832d532fdcf0abb3ab Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Thu, 6 Jun 2024 17:36:18 +0100
Subject: [PATCH 26/32] Update cctbx prebuilt dependencies to 2024 releases
 (#2613)

---
 newsfragments/2613.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 newsfragments/2613.misc

diff --git a/newsfragments/2613.misc b/newsfragments/2613.misc
new file mode 100644
index 0000000000..45974be105
--- /dev/null
+++ b/newsfragments/2613.misc
@@ -0,0 +1 @@
+Update cctbx-base dependency to 2024 versions.

From b7bcc78e120e8cedb0228645b3176d6d5ed02769 Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Wed, 12 Jun 2024 09:28:12 +0100
Subject: [PATCH 27/32] Add dials.split_still_data utility program (#2672)

---
 newsfragments/2672.feature                  |   1 +
 src/dials/command_line/split_still_data.py  | 171 ++++++++++++++++++++
 src/dials/util/image_grouping.py            |  45 ++++++
 tests/command_line/test_split_still_data.py |  59 +++++++
 4 files changed, 276 insertions(+)
 create mode 100644 newsfragments/2672.feature
 create mode 100644 src/dials/command_line/split_still_data.py
 create mode 100644 tests/command_line/test_split_still_data.py

diff --git a/newsfragments/2672.feature b/newsfragments/2672.feature
new file mode 100644
index 0000000000..e410954029
--- /dev/null
+++ b/newsfragments/2672.feature
@@ -0,0 +1 @@
+XXX.feature: Add dials.split_still_data for splitting dials-processed still data based on image number (e.g. dose series)
diff --git a/src/dials/command_line/split_still_data.py b/src/dials/command_line/split_still_data.py
new file mode 100644
index 0000000000..5dadb03e61
--- /dev/null
+++ b/src/dials/command_line/split_still_data.py
@@ -0,0 +1,171 @@
+# DIALS_ENABLE_COMMAND_LINE_COMPLETION
+"""
+Split dials still-shot data files (expt and refl files) into separate files
+based on image number or metadata. For example, splitting a dose series dataset.
+Either the repeat unit can be specified with the series_repeat option, or
+a yaml file can be provided.
+If series_repeat=10, then data corresponding to every nth image will be split into
+separate files:
+group_0_0.expt
+group_1_0.expt
+....
+group_9_0.expt
+and the same for reflection files.
+The second index relates to the input file index, so if more than one input file is input,
+there will then be group_0_1.expt etc.
+
+Example yaml syntax is
+Point to a data array:
+
+---
+metadata:
+  timepoint:
+    '/path/to/example_master.h5' : '/path/to/meta.h5:/timepoint'
+grouping:
+  split_by:
+    values:
+      - timepoint
+
+or for images specified by a template:
+---
+metadata:
+  dose_point:
+    '/path/to/example_#####.cbf' : 'repeat=10'
+grouping:
+  split_by:
+    values:
+      - dose_point
+
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from pathlib import Path
+
+import iotbx.phil
+from dxtbx.serialize import load
+from libtbx import Auto
+
+from dials.util import log
+from dials.util.image_grouping import (
+    FilePair,
+    ParsedYAML,
+    get_grouping_handler,
+    series_repeat_to_groupings,
+)
+from dials.util.options import ArgumentParser
+from dials.util.system import CPU_COUNT
+from dials.util.version import dials_version
+
+logger = logging.getLogger("dials")
+
+
+phil_str = """
+input {
+  reflections = None
+    .type = str
+    .multiple = True
+    .help = "Path to an integrated reflections file"
+    .expert_level = 1
+  experiments = None
+    .type = str
+    .multiple = True
+    .help = "Path to an integrated experiments file"
+    .expert_level = 1
+}
+grouping = None
+  .type = str
+  .help = "Path to a .yml file defining grouping structure during processing"
+series_repeat = None
+  .type = int(value_min=2)
+  .expert_level = 2
+  .help = "This option allows the user to specify that the data is a repeated series"
+          "by providing the number of repeated measurements at each point. i.e. it"
+          "is assumed that $series_repeat measurements are taken at each position"
+          "and that these form consecutive images in the input image files."
+nproc=Auto
+  .type=int
+output.log=dials.split_still_data.log
+  .type=str
+"""
+
+phil_scope = iotbx.phil.parse(phil_str)
+
+
+def run(args=sys.argv[1:]):
+    parser = ArgumentParser(
+        usage="dials.split_still_data integrated.expt integrated.refl series_repeat=10",
+        read_experiments=False,
+        read_reflections=False,
+        phil=phil_scope,
+        check_format=False,
+        epilog=__doc__,
+    )
+    params, options, unhandled = parser.parse_args(
+        args=args, show_diff_phil=False, return_unhandled=True
+    )
+    if unhandled:
+        for item in unhandled:
+            if item.endswith(".expt"):
+                args[args.index(item)] = f"input.experiments = {item}"
+            elif item.endswith(".refl"):
+                args[args.index(item)] = f"input.reflections = {item}"
+            else:
+                raise ValueError(f"Unhandled argument: {item}")
+        params, options = parser.parse_args(args=args, show_diff_phil=False)
+
+    if not (params.input.reflections and params.input.experiments):
+        raise ValueError("Reflections and experiments files must both be specified")
+    log.config(verbosity=options.verbose, logfile=params.output.log)
+    logger.info(dials_version())
+
+    reflection_files = sorted([Path(i).resolve() for i in params.input.reflections])
+    experiment_files = sorted([Path(i).resolve() for i in params.input.experiments])
+
+    datafiles = [FilePair(e, r) for e, r in zip(experiment_files, reflection_files)]
+    expts = []
+    for fp in datafiles:
+        elist = load.experiment_list(fp.expt, check_format=False)
+        for e in elist:
+            if e.scan and e.scan.get_oscillation()[1] != 0.0:
+                logger.info(
+                    "dials.split_still_data can only be used on still-shot data"
+                )
+                sys.exit()
+        expts.append(elist)
+
+    if params.nproc is Auto:
+        params.nproc = CPU_COUNT
+        logger.info(f"Using nproc={params.nproc}")
+
+    if params.series_repeat:
+        parsed = series_repeat_to_groupings(
+            expts, params.series_repeat, groupname="split_by"
+        )
+        handler = get_grouping_handler(parsed, "split_by", nproc=params.nproc)
+    elif params.grouping:
+        try:
+            parsed = ParsedYAML(params.grouping)
+        except Exception as e:
+            logger.info(f"Error: {e}\nPlease check input in yaml file.")
+        groupings = parsed.groupings
+        if len(groupings) != 1:
+            logger.info("Only one grouping type can be specified in grouping yaml")
+            sys.exit()
+        handler = get_grouping_handler(
+            parsed, list(parsed.groupings.keys())[0], nproc=params.nproc
+        )
+    try:
+        _ = handler.split_files_to_groups(
+            working_directory=Path.cwd(),
+            data_file_pairs=datafiles,
+        )
+        logger.info("Finished splitting data.")
+    except Exception as e:
+        logger.info(f"Error: {e}\nPlease check input.")
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/dials/util/image_grouping.py b/src/dials/util/image_grouping.py
index d15f4678b0..1c53eb838d 100644
--- a/src/dials/util/image_grouping.py
+++ b/src/dials/util/image_grouping.py
@@ -21,6 +21,7 @@
 from yaml.loader import SafeLoader
 
 from dxtbx import flumpy
+from dxtbx.model import ExperimentList
 from dxtbx.sequence_filenames import group_files_by_imageset, template_regex
 from dxtbx.serialize import load
 
@@ -1039,3 +1040,47 @@ def get_grouping_handler(parsed: ParsedYAML, grouping: str, nproc: int = 1):
     if grouping not in parsed.groupings:
         raise ValueError(f"Grouping definition {grouping} not found in parsed yaml.")
     return handler_class(parsed.groupings[grouping], nproc)
+
+
+def series_repeat_to_groupings(
+    experiments: List[ExperimentList], series_repeat: int, groupname="split_by"
+) -> ParsedYAML:
+    """
+    For a dose series data collection, attempt to create and then parse a
+    groupings yaml based on the images in the input experiments.
+
+    Callers of this function should be prepared to catch Exceptions!
+    """
+    # if all end with .h5 or .nxs then images, else template?
+
+    images = set()
+    for expts in experiments:
+        for iset in expts.imagesets():
+            images.update(iset.paths())
+
+    metalines = ""
+    if all(image.endswith(".nxs") or image.endswith(".h5") for image in images):
+        # ok assume all independent:
+        metadata = []
+        for image in images:
+            metadata.append(f"{image} : 'repeat={series_repeat}'")
+        metalines = "\n    ".join(s for s in metadata)
+    else:
+        isets = group_files_by_imageset(images)
+        metadata = []
+        for iset in isets.keys():
+            metadata.append(f"{iset} : 'repeat={series_repeat}'")
+        metalines = "\n    ".join(s for s in metadata)
+    if not metalines:
+        raise ValueError("Unable to extract images/templates from experiments")
+    grouping = f"""
+metadata:
+  dose_point:
+    {metalines}
+grouping:
+  {groupname}:
+    values:
+      - dose_point
+"""
+    parsed_yaml = ParsedYAML(yml_str=grouping)
+    return parsed_yaml
diff --git a/tests/command_line/test_split_still_data.py b/tests/command_line/test_split_still_data.py
new file mode 100644
index 0000000000..46ed79b286
--- /dev/null
+++ b/tests/command_line/test_split_still_data.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+import os
+import pathlib
+
+import pytest
+
+from dxtbx.serialize import load
+
+import dials.command_line.split_still_data as split
+
+
+@pytest.mark.xfail(
+    os.name == "nt",
+    reason="Failures due to translated paths; see https://github.com/cctbx/dxtbx/issues/613",
+)
+@pytest.mark.parametrize("use_yaml", [True, False])
+def test_split_still_data(dials_data, run_in_tmp_path, use_yaml):
+    data = dials_data("cunir_serial_processed", pathlib=True)
+    args = [
+        os.fspath(data / "integrated.expt"),
+        os.fspath(data / "integrated.refl"),
+        "nproc=1",
+    ]
+    if use_yaml:
+        images = os.fspath(dials_data("cunir_serial", pathlib=True))
+        yml = f"""
+---
+metadata:
+  timepoint:
+    {images}/merlin0047_#####.cbf : 'repeat=2'
+grouping:
+  group_by:
+    values:
+      - timepoint
+"""
+        test_yaml = run_in_tmp_path / "tmp.yaml"
+        with open(test_yaml, "w") as f:
+            f.write(yml)
+        args.append(f"grouping={test_yaml}")
+    else:
+        args.append("series_repeat=2")
+    split.run(args=args)
+    assert pathlib.Path(run_in_tmp_path / "group_0_0.expt").is_file()
+    assert pathlib.Path(run_in_tmp_path / "group_0_0.refl").is_file()
+    assert pathlib.Path(run_in_tmp_path / "group_1_0.expt").is_file()
+    assert pathlib.Path(run_in_tmp_path / "group_1_0.refl").is_file()
+    expts1 = load.experiment_list("group_0_0.expt", check_format=False)
+    assert len(expts1) == 3
+    # not old style elist datastructures (no scan, single imageset)
+    assert expts1[0].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17000"
+    assert expts1[1].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17002"
+    assert expts1[2].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17004"
+    expts2 = load.experiment_list(
+        run_in_tmp_path / "group_1_0.expt", check_format=False
+    )
+    assert len(expts2) == 2
+    assert expts2[0].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17001"
+    assert expts2[1].imageset.get_path(0).split("_")[-1].rstrip(".cbf") == "17003"

From 73b8d96035695b1fa91e5b6705720d1e72d64d82 Mon Sep 17 00:00:00 2001
From: Amy Thompson <52806925+amyjaynethompson@users.noreply.github.com>
Date: Tue, 18 Jun 2024 10:36:58 +0100
Subject: [PATCH 28/32] With clustering tables (#2671)

migrated over the rest of the clustering code from xia2.multiplex and added cluster tables to output
---
 newsfragments/2671.feature                   |  1 +
 src/dials/algorithms/correlation/analysis.py | 87 ++++++++++++++++++--
 src/dials/algorithms/correlation/cluster.py  | 53 ++++++++++++
 src/dials/command_line/correlation_matrix.py |  2 +
 src/dials/templates/clusters.html            |  6 ++
 5 files changed, 144 insertions(+), 5 deletions(-)
 create mode 100644 newsfragments/2671.feature
 create mode 100644 src/dials/algorithms/correlation/cluster.py

diff --git a/newsfragments/2671.feature b/newsfragments/2671.feature
new file mode 100644
index 0000000000..8daf56a644
--- /dev/null
+++ b/newsfragments/2671.feature
@@ -0,0 +1 @@
+``dials.correlation_matrix``: Add tables with cluster information to html output
diff --git a/src/dials/algorithms/correlation/analysis.py b/src/dials/algorithms/correlation/analysis.py
index cb73896eb1..cf41589e0e 100644
--- a/src/dials/algorithms/correlation/analysis.py
+++ b/src/dials/algorithms/correlation/analysis.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import copy
 import json
 import logging
 import sys
@@ -12,11 +13,14 @@
 import iotbx.phil
 from dxtbx.model import ExperimentList
 from libtbx.phil import scope_extract
+from scitbx.array_family import flex
 
+from dials.algorithms.correlation.cluster import ClusterInfo
 from dials.algorithms.correlation.plots import linkage_matrix_to_dict, to_plotly_json
 from dials.algorithms.symmetry.cosym import CosymAnalysis
 from dials.algorithms.symmetry.cosym.plots import plot_coords, plot_rij_histogram
 from dials.array_family.flex import reflection_table
+from dials.util import tabulate
 from dials.util.exclude_images import get_selection_for_valid_image_ranges
 from dials.util.filter_reflections import filtered_arrays_from_experiments_reflections
 from dials.util.multi_dataset_handling import select_datasets_on_identifiers
@@ -53,6 +57,7 @@ def __init__(
         experiments: ExperimentList,
         reflections: list[reflection_table],
         params: scope_extract = None,
+        ids_to_identifiers_map: dict = None,
     ):
         """
         Set up the required cosym preparations for determining the correlation matricies
@@ -70,6 +75,7 @@ def __init__(
             params = phil_scope.extract()
         self.params = params
         self._reflections = []
+        self.ids_to_identifiers_map = ids_to_identifiers_map
 
         if len(reflections) == len(experiments):
             for refl, expt in zip(reflections, experiments):
@@ -87,9 +93,14 @@ def __init__(
         )
 
         # Used for optional json creation that is in a format friendly for import and analysis (for future development)
-        self.ids_to_identifiers_map = {}
-        for table in self._reflections:
-            self.ids_to_identifiers_map.update(table.experiment_identifiers())
+        # Also to retain dataset ids when used in multiplex
+        if self.ids_to_identifiers_map is None:
+            self.ids_to_identifiers_map = {}
+            for table in self._reflections:
+                self.ids_to_identifiers_map.update(table.experiment_identifiers())
+
+        self.labels = list(dict.fromkeys(self.ids_to_identifiers_map))
+        self._labels_all = flex.size_t(self.labels)
 
         # Filter reflections that do not meet partiality threshold or default I/Sig(I) criteria
 
@@ -100,6 +111,8 @@ def __init__(
             partiality_threshold=params.partiality_threshold,
         )
 
+        self.unmerged_datasets = datasets
+
         # Merge intensities to prepare for cosym analysis
 
         self.datasets = self._merge_intensities(datasets)
@@ -183,10 +196,26 @@ def calculate_matrices(self):
         ) = self.compute_correlation_coefficient_matrix(
             self.cosym_analysis.target.rij_matrix
         )
+
+        self.correlation_clusters = self.cluster_info(
+            linkage_matrix_to_dict(self.cc_linkage_matrix)
+        )
+
+        logger.info("\nIntensity correlation clustering summary:")
+        self.cc_table = ClusterInfo.as_table(self.correlation_clusters)
+        logger.info(tabulate(self.cc_table, headers="firstrow", tablefmt="rst"))
         self.cos_angle, self.cos_linkage_matrix = self.compute_cos_angle_matrix(
             self.cosym_analysis.coords
         )
 
+        self.cos_angle_clusters = self.cluster_info(
+            linkage_matrix_to_dict(self.cos_linkage_matrix)
+        )
+
+        logger.info("\nCos(angle) clustering summary:")
+        self.cos_table = ClusterInfo.as_table(self.cos_angle_clusters)
+        logger.info(tabulate(self.cos_table, headers="firstrow", tablefmt="rst"))
+
     @staticmethod
     def compute_correlation_coefficient_matrix(
         correlation_matrix: np.ndarray,
@@ -203,7 +232,7 @@ def compute_correlation_coefficient_matrix(
 
         """
 
-        logger.info("\nCalculating Correlation Matrix (rij matrix - see dials.cosym)\n")
+        logger.info("\nCalculating Correlation Matrix (rij matrix - see dials.cosym)")
 
         # Make diagonals equal to 1 (each dataset correlated with itself)
         np.fill_diagonal(correlation_matrix, 1)
@@ -243,7 +272,7 @@ def compute_cos_angle_matrix(
 
         """
         logger.info(
-            "Calculating Cos Angle Matrix from optimised cosym coordinates (see dials.cosym)\n"
+            "\nCalculating Cos Angle Matrix from optimised cosym coordinates (see dials.cosym)"
         )
 
         # Convert coordinates to cosine distances and then reversed so closer cosine distances have higher values to match CC matrix
@@ -255,6 +284,52 @@ def compute_cos_angle_matrix(
 
         return cos_angle, cos_linkage_matrix
 
+    def cluster_info(self, cluster_dict: dict) -> list:
+        """
+        Generate list of cluster objects with associated statistics.
+        Args:
+            cluster_dict(dict): dictionary of clusters (generated from linkage_matrix_to_dict)
+
+        Returns:
+            info(list): list of ClusterInfo objects to describe all clusters of a certain type (ie correlation or cos angle)
+        """
+
+        info = []
+        for cluster_id, cluster in cluster_dict.items():
+            uc_params = [flex.double() for i in range(6)]
+            for j in cluster["datasets"]:
+                uc_j = self.datasets[j - 1].unit_cell().parameters()
+                for i in range(6):
+                    uc_params[i].append(uc_j[i])
+            average_uc = [flex.mean(uc_params[i]) for i in range(6)]
+            intensities_cluster = []
+            labels_cluster = []
+            ids = [self._labels_all[id - 1] for id in cluster["datasets"]]
+            for idx, k in zip(self._labels_all, self.unmerged_datasets):
+                if idx in ids:
+                    intensities_cluster.append(k)
+                    labels_cluster.append(idx)
+            merged = None
+            for d in intensities_cluster:
+                if merged is None:
+                    merged = copy.deepcopy(d)
+                else:
+                    merged = merged.concatenate(d, assert_is_similar_symmetry=False)
+            merging = merged.merge_equivalents()
+            merged_intensities = merging.array()
+            multiplicities = merging.redundancies()
+            info.append(
+                ClusterInfo(
+                    cluster_id,
+                    labels_cluster,
+                    flex.mean(multiplicities.data().as_double()),
+                    merged_intensities.completeness(),
+                    unit_cell=average_uc,
+                    height=cluster.get("height"),
+                )
+            )
+        return info
+
     def convert_to_html_json(self):
         """
         Prepares the required dataset tables and converts analysis into the required format for HTML output.
@@ -265,11 +340,13 @@ def convert_to_html_json(self):
         self.cc_json = to_plotly_json(
             self.correlation_matrix,
             self.cc_linkage_matrix,
+            labels=self.labels,
             matrix_type="correlation",
         )
         self.cos_json = to_plotly_json(
             self.cos_angle,
             self.cos_linkage_matrix,
+            labels=self.labels,
             matrix_type="cos_angle",
         )
 
diff --git a/src/dials/algorithms/correlation/cluster.py b/src/dials/algorithms/correlation/cluster.py
new file mode 100644
index 0000000000..ca271481ba
--- /dev/null
+++ b/src/dials/algorithms/correlation/cluster.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+from libtbx.str_utils import wordwrap
+
+
+class ClusterInfo:
+    def __init__(
+        self, cluster_id, labels, multiplicity, completeness, unit_cell, height=None
+    ):
+        self.cluster_id = cluster_id
+        self.labels = labels
+        self.multiplicity = multiplicity
+        self.completeness = completeness
+        self.unit_cell = unit_cell
+        self.height = height
+
+    def __str__(self):
+        lines = [
+            "Cluster %i" % self.cluster_id,
+            "  Number of datasets: %i" % len(self.labels),
+            "  Completeness: %.1f %%" % (self.completeness * 100),
+            "  Multiplicity: %.2f" % self.multiplicity,
+            "  Datasets:" + ",".join("%s" % s for s in self.labels),
+        ]
+        if self.height is not None:
+            lines.append("  height: %f" % self.height)
+        return "\n".join(lines)
+
+    @staticmethod
+    def as_table(cluster_info):
+        headers = [
+            "Cluster",
+            "No. datasets",
+            "Datasets",
+            "Height",
+            "Multiplicity",
+            "Completeness",
+        ]
+        rows = []
+        for info in cluster_info:
+            rows.append(
+                [
+                    "%i" % info.cluster_id,
+                    "%i" % len(info.labels),
+                    wordwrap(" ".join("%s" % l for l in info.labels)),
+                    "%.2g" % info.height,
+                    "%.1f" % info.multiplicity,
+                    "%.2f" % info.completeness,
+                ]
+            )
+
+        rows.insert(0, headers)
+        return rows
diff --git a/src/dials/command_line/correlation_matrix.py b/src/dials/command_line/correlation_matrix.py
index 1248a79230..d19c78c65e 100644
--- a/src/dials/command_line/correlation_matrix.py
+++ b/src/dials/command_line/correlation_matrix.py
@@ -132,7 +132,9 @@ def run(args=None):
         html = template.stream(
             page_title="DIALS Correlation Matrix",
             cc_cluster_json=matrices.cc_json,
+            cc_cluster_table=matrices.cc_table,
             cos_angle_cluster_json=matrices.cos_json,
+            cos_angle_cluster_table=matrices.cos_table,
             image_range_tables=[matrices.table_list],
             cosym_graphs=matrices.rij_graphs,
         )
diff --git a/src/dials/templates/clusters.html b/src/dials/templates/clusters.html
index d3f0fd1352..92a8d0cdf9 100644
--- a/src/dials/templates/clusters.html
+++ b/src/dials/templates/clusters.html
@@ -41,6 +41,9 @@ <h4 class="panel-title">
                                     <div class="panel-body">
 
                                         <div class="container-fluid">
+                                            {{ macros.table(cc_cluster_table,
+                                                has_column_header=true,
+                                                has_row_header=true) }}
                                             {{ macros.plotly_graph("cc_cluster", cc_cluster_json, style="dendrogram-plot") }}
                                         </div>
                                     </div>
@@ -57,6 +60,9 @@ <h4 class="panel-title">
                                     <div class="panel-body">
 
                                         <div class="container-fluid">
+                                            {{ macros.table(cos_angle_cluster_table,
+                                                has_column_header=true,
+                                                has_row_header=true) }}
                                             {{ macros.plotly_graph("cos_angle_cluster", cos_angle_cluster_json, style="dendrogram-plot") }}
                                         </div>
 

From dcfd0fac6fed25603f5cd5d49314020489f40435 Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Tue, 18 Jun 2024 14:39:10 +0100
Subject: [PATCH 29/32] MNT: Add libGLU to linux dependencies

Although the conda-forge CDT takes care of the GL dependency, we were still
relying on libGLU to be present/usable from the conda environment. If this
wasn't, then gltbx would fall back to including system libraries, which
caused failures due to conflicting versions of other libraries.
---
 .conda-envs/linux.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.conda-envs/linux.txt b/.conda-envs/linux.txt
index aa9f3300a8..c4b5eeabbc 100644
--- a/.conda-envs/linux.txt
+++ b/.conda-envs/linux.txt
@@ -17,6 +17,7 @@ conda-forge::iota
 conda-forge::jinja2
 conda-forge::libboost-devel
 conda-forge::libboost-python-devel
+conda-forge::libglu
 conda-forge::matplotlib-base>=3.0.2
 conda-forge::mesa-libgl-devel-cos7-x86_64
 conda-forge::mrcfile

From a513fee11dd4bb02c5d47241f93ec03d6dcb1266 Mon Sep 17 00:00:00 2001
From: James Beilsten-Edmands
 <30625594+jbeilstenedmands@users.noreply.github.com>
Date: Tue, 18 Jun 2024 15:31:08 +0100
Subject: [PATCH 30/32] Don't modify in place for flex.reflection_table.concat
 (#2679)

---
 newsfragments/2679.bugfix                   | 1 +
 src/dials/array_family/flex_ext.py          | 8 ++++----
 tests/array_family/test_reflection_table.py | 9 +++++----
 3 files changed, 10 insertions(+), 8 deletions(-)
 create mode 100644 newsfragments/2679.bugfix

diff --git a/newsfragments/2679.bugfix b/newsfragments/2679.bugfix
new file mode 100644
index 0000000000..2d9a35c7b1
--- /dev/null
+++ b/newsfragments/2679.bugfix
@@ -0,0 +1 @@
+Fix flex.reflection_table.concat to not modify in-place.
diff --git a/src/dials/array_family/flex_ext.py b/src/dials/array_family/flex_ext.py
index cf40979054..164474a346 100644
--- a/src/dials/array_family/flex_ext.py
+++ b/src/dials/array_family/flex_ext.py
@@ -531,10 +531,10 @@ def concat(
         from dials.util.multi_dataset_handling import renumber_table_id_columns
 
         tables = renumber_table_id_columns(tables)
-        first = tables[0]
-        for table in tables[1:]:
-            first.extend(table)
-        return first
+        new = dials_array_family_flex_ext.reflection_table()
+        for table in tables:
+            new.extend(table)
+        return new
 
     def match_with_reference(self, other):
         """
diff --git a/tests/array_family/test_reflection_table.py b/tests/array_family/test_reflection_table.py
index c63dc70801..1fbf2c9223 100644
--- a/tests/array_family/test_reflection_table.py
+++ b/tests/array_family/test_reflection_table.py
@@ -1602,11 +1602,12 @@ def test_concat():
     ids2[0] = "c"
     ids2[1] = "d"
 
-    table1 = flex.reflection_table.concat([table1, table2])
+    table3 = flex.reflection_table.concat([table1, table2])
+    ids3 = dict(table3.experiment_identifiers())
 
-    assert list(table1["id"]) == [0, 0, 1, 1, 2, 2, 3, 3]
-    assert list(ids1.keys()) == [0, 1, 2, 3]
-    assert list(ids1.values()) == ["a", "b", "c", "d"]
+    assert list(table3["id"]) == [0, 0, 1, 1, 2, 2, 3, 3]
+    assert list(ids3.keys()) == [0, 1, 2, 3]
+    assert list(ids3.values()) == ["a", "b", "c", "d"]
 
     # test empty tables
     table1 = flex.reflection_table()

From a592df1169541030a4c8707e12d7695d1604e39b Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Tue, 18 Jun 2024 16:41:12 +0100
Subject: [PATCH 31/32] MNT: Disable broken 3d-threaded-integrator test

---
 tests/command_line/test_integrate.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/command_line/test_integrate.py b/tests/command_line/test_integrate.py
index 200f8816bc..36147348c0 100644
--- a/tests/command_line/test_integrate.py
+++ b/tests/command_line/test_integrate.py
@@ -143,10 +143,7 @@ def test_basic_blocking_options(dials_data, tmp_path, block_size, block_units):
     assert not result.returncode and not result.stderr
 
 
-@pytest.mark.skipif(
-    (datetime.date.today() < datetime.date(2024, 6, 5)),
-    reason="Temporary skip for test that started to fail on Azure pipelines",
-)
+@pytest.mark.skip(reason="3d threaded integrator")
 def test_basic_threaded_integrate(dials_data, tmp_path):
     """Test the threaded integrator on single imageset data."""
 

From 64e6b8e3498f0b6297a1fdeeef1417964ca6f814 Mon Sep 17 00:00:00 2001
From: Nicholas Devenish <ndevenish@gmail.com>
Date: Tue, 18 Jun 2024 22:24:41 +0100
Subject: [PATCH 32/32] MNT: Restrict wxPython build version on macOS

This is non-ideal, but build_number=6 dropped support for older
macOS systems (earlier than Big Sur). Pin this here, and work
on dropping other platforms after this release.
---
 .conda-envs/macos.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.conda-envs/macos.txt b/.conda-envs/macos.txt
index 3977f7ad20..108cf8d281 100644
--- a/.conda-envs/macos.txt
+++ b/.conda-envs/macos.txt
@@ -52,6 +52,6 @@ conda-forge::sqlite
 conda-forge::tabulate
 conda-forge::tqdm
 conda-forge::urllib3
-conda-forge::wxpython>=4.2.0
+conda-forge::wxpython>=4.2.0=*_5
 conda-forge::xz
 conda-forge::zlib