diff --git a/src/cell_type_mapper/anndata_iterator/anndata_iterator.py b/src/cell_type_mapper/anndata_iterator/anndata_iterator.py
index 38fc7a66..ad3552b3 100644
--- a/src/cell_type_mapper/anndata_iterator/anndata_iterator.py
+++ b/src/cell_type_mapper/anndata_iterator/anndata_iterator.py
@@ -19,7 +19,6 @@
     _load_disjoint_csr)
 
 from cell_type_mapper.utils.anndata_utils import (
-    read_df_from_h5ad,
     infer_attrs
 )
 
@@ -154,7 +153,7 @@ def __init__(
         else:
             raise RuntimeError(
                 "Do not know how to iterate over anndata "
-                f"file\n{h5ad_path}")
+                f"with attrs\n{attrs}")
 
     @property
     def layer(self):
@@ -226,7 +225,6 @@ def _initialize_as_csc(
             boolean indicating whether or not to leave the h5 handle
             open (should be false when using cuda)
         """
-        write_as_csr = True
         self.tmp_dir = tempfile.mkdtemp(
             dir=tmp_dir,
             prefix='anndata_iterator_')
@@ -239,14 +237,12 @@ def _initialize_as_csc(
         file_size_bytes = file_stats.st_size
         fudge_factor = 1.1  # just in case
 
-        obs = read_df_from_h5ad(h5ad_path, df_name='obs')
-        var = read_df_from_h5ad(h5ad_path, df_name='var')
-        array_shape = (len(obs), len(var))
+        array_shape = infer_attrs(
+            src_path=h5ad_path,
+            dataset=self.layer
+        )['shape']
 
         if free_bytes < fudge_factor*file_size_bytes:
-            write_as_csr = False
-
-        if not write_as_csr:
             raise RuntimeError(
                 "Cannot write data as CSR\n"
                 f"free_bytes {free_bytes}; file size {file_size_bytes}\n"
diff --git a/src/cell_type_mapper/test_utils/anndata_utils.py b/src/cell_type_mapper/test_utils/anndata_utils.py
index 6d7472ef..27b9a481 100644
--- a/src/cell_type_mapper/test_utils/anndata_utils.py
+++ b/src/cell_type_mapper/test_utils/anndata_utils.py
@@ -20,9 +20,13 @@ def create_h5ad_without_encoding_type(
     case where, for whatever reason, that metadata is missing
     from the h5ad file.
 
-    Note: this function will only copy over obs, var, and
-    the contents of X and layers/. It will ignore the other
-    data structures in src_path.
+    Note: this function will only copies
+        obs
+        var
+        X
+        layers/
+        raw/
+        uns
     """
     obs = read_df_from_h5ad(src_path, df_name='obs')
     var = read_df_from_h5ad(src_path, df_name='var')
diff --git a/src/cell_type_mapper/utils/anndata_utils.py b/src/cell_type_mapper/utils/anndata_utils.py
index 15e438d8..14cc38b0 100644
--- a/src/cell_type_mapper/utils/anndata_utils.py
+++ b/src/cell_type_mapper/utils/anndata_utils.py
@@ -186,6 +186,10 @@ def copy_layer_to_x(
             original_h5ad_path=original_h5ad_path,
             new_h5ad_path=new_h5ad_path,
             layer_key=layer_key)
+    else:
+        raise RuntimeError(
+            f"Unclear how to parse encoding-type {encoding_type}"
+        )
 
 
 def _copy_layer_to_x_dense(
@@ -313,6 +317,11 @@ def shuffle_csr_h5ad_rows(
         dataset='X'
     )
 
+    if attrs['encoding-type'] != 'csr_matrix':
+        raise RuntimeError(
+            f'{src_path} is not CSR encoded. Attrs for X are:\n'
+            f'{attrs}')
+
     obs = read_df_from_h5ad(
         h5ad_path=src_path, df_name='obs')
 
diff --git a/src/cell_type_mapper/validation/utils.py b/src/cell_type_mapper/validation/utils.py
index 4da4859c..12853903 100644
--- a/src/cell_type_mapper/validation/utils.py
+++ b/src/cell_type_mapper/validation/utils.py
@@ -1,5 +1,3 @@
-import anndata
-import gc
 import h5py
 import numpy as np
 import pandas as pd
@@ -127,6 +125,11 @@ def is_data_ge_zero(
         elif 'csr' in attrs['encoding-type'] \
                 or 'csc' in attrs['encoding-type']:
             dtype = in_file[f'{layer_key}/data'].dtype
+        else:
+            raise RuntimeError(
+                "Unclear what to make of encoding-typ in attrs:\n"
+                f"{attrs}"
+            )
 
         if np.issubdtype(dtype, np.integer):
             iinfo = np.iinfo(dtype)
@@ -234,41 +237,6 @@ def map_gene_ids_in_var(
     return new_var, mapping_output['n_unmapped']
 
 
-def _get_minmax_x_using_anndata(
-        h5ad_path,
-        rows_at_a_time=100000,
-        layer='X'):
-    """
-    If you cannot intuit how X is encoded in the h5ad file, just use
-    anndata's API
-
-    Returns
-    -------
-    (min_val, max_val)
-    """
-    if layer != 'X':
-        raise NotImplementedError(
-            "No efficient way to get minmax from layers; only X")
-
-    max_val = None
-    min_val = None
-    a_data = anndata.read_h5ad(h5ad_path, backed='r')
-    chunk_iterator = a_data.chunked_X(rows_at_a_time)
-    for chunk_package in chunk_iterator:
-        chunk = chunk_package[0]
-        this_max = chunk.max()
-        if max_val is None or this_max > max_val:
-            max_val = this_max
-        this_min = chunk.min()
-        if min_val is None or this_min < min_val:
-            min_val = this_min
-
-    del a_data
-    gc.collect()
-
-    return (min_val, max_val)
-
-
 def _get_minmax_from_dense(x_dataset):
     """
     Get the minimum and maximum values from the X array if it is dense