diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3b7e848..6b23803 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -17,7 +17,7 @@ jobs:
       - name: Build wheels
         uses: pypa/cibuildwheel@v2.16.5
         env:
-          CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8"  #  >=3.8 <3.12
+          CIBW_PROJECT_REQUIRES_PYTHON: ">=3.10"  #  >=3.8 <3.12
           CIBW_SKIP: "*-win32 *-manylinux_i686 pp* *musl* "  # cp311-macosx*
           CIBW_ARCHS_MACOS: "x86_64 arm64"
           CIBW_ARCHS_LINUX: "auto"
diff --git a/README.rst b/README.rst
index a19c837..d992af3 100644
--- a/README.rst
+++ b/README.rst
@@ -42,15 +42,16 @@ for calling structural variants using paired-end or long read sequencing data. S
 ⚙️ Installation
 ---------------
 
-Dysgu requires Python >=3.8 - 3.10 plus htslib and has been tested on linux and MacOS.
+Dysgu can be installed via pip using Python 3.10 - 3.12 and has been tested on linux and MacOS.
 The list of python packages needed can be found in requirements.txt.
 To install::
 
-    conda install -c bioconda -c conda-forge dysgu
+    pip install dysgu
 
-Or, fetch from PyPI::
+Or, from conda::
+
+    conda install -c bioconda -c conda-forge dysgu
 
-    pip install dysgu
 
 To build from source, run the install script: ``bash INSTALL.sh``.
 
diff --git a/dysgu/call_component.pyx b/dysgu/call_component.pyx
index a897f5e..ee482dc 100644
--- a/dysgu/call_component.pyx
+++ b/dysgu/call_component.pyx
@@ -36,7 +36,7 @@ ctypedef enum ReadEnum_t:
     BREAKEND = 4
 
 
-cdef n_aligned_bases(AlignedSegment aln):
+cpdef n_aligned_bases(AlignedSegment aln):
     cdef int opp, l, aligned, large_gaps, n_small_gaps, i
     cdef uint32_t cigar_value
     cdef uint32_t cigar_l = aln._delegate.core.n_cigar
@@ -123,7 +123,9 @@ cdef count_attributes2(reads1, reads2, spanning, float insert_ppf, generic_ins,
             MAPQsupp += a.mapq
             if a.has_tag("NM"):
                 if a_bases:
-                    NMsupp += float(a.get_tag("NM")) / a_bases
+                    nm = float(a.get_tag("NM"))
+                    NMsupp += nm / a_bases
+                    NMbase += (nm - large_gaps) / a_bases
             if a.has_tag("AS"):
                 this_as = a.get_tag("AS")
                 if this_as > maxASsupp:
@@ -173,7 +175,9 @@ cdef count_attributes2(reads1, reads2, spanning, float insert_ppf, generic_ins,
             MAPQsupp += a.mapq
             if a.has_tag("NM"):
                 if a_bases:
-                    NMsupp += float(a.get_tag("NM")) / a_bases
+                    nm = float(a.get_tag("NM"))
+                    NMsupp += nm / a_bases
+                    NMbase += (nm - large_gaps) / a_bases
             if a.has_tag("AS"):
                 this_as = a.get_tag("AS")
                 if this_as > maxASsupp:
@@ -202,7 +206,9 @@ cdef count_attributes2(reads1, reads2, spanning, float insert_ppf, generic_ins,
             clipped_base_quals += cbq
             clipped_bases += cb
 
-    cdef int tot = er.supp + total_pri
+    cdef float tot = er.plus + er.minus  # er.supp + total_pri
+    if tot == 0:
+        return
     er.NMpri = (NMpri / total_pri) * 100 if total_pri > 0 else 0
     er.NMsupp = (NMsupp / er.supp) * 100 if er.supp > 0 else 0
     er.NMbase = (NMbase / tot) * 100 if tot > 0 else 0
@@ -222,7 +228,7 @@ cdef count_attributes2(reads1, reads2, spanning, float insert_ppf, generic_ins,
 
 
 cdef int within_read_end_position(event_pos, svtype, cigartuples, cigar_index):
-    cdef int i, end, idx, opp, length, cigar_skip, n_aligned_bases, target_bases
+    cdef int i, end
     if svtype == 1:  # insertion (or other e.g. duplication/inversion within read)
         return event_pos + 1
     else:  # deletion type
@@ -1607,7 +1613,7 @@ cdef get_reads(infile, nodes_info, buffered_reads, n2n, bint add_to_buffer, site
         if sites_index and int_node in sites_index:
             continue
         n = n2n[int_node]
-        if int_node in buffered_reads:
+        if buffered_reads and int_node in buffered_reads:
             aligns.append((n, buffered_reads[int_node]))
             continue
         fpos.append((n, int_node))
@@ -1664,59 +1670,58 @@ cdef list multi(data, bam, int insert_size, int insert_stdev, float insert_ppf,
     # Sometimes partitions are not linked, happens when there is not much support between partitions
     # Then need to decide whether to call from a single partition
     n2n = data.n2n
-    seen = set(range(len(data.parts)))
+    seen = set(range(len(data.parts))) if data.parts else {}
     out_counts = defaultdict(int)  # The number of 'outward' links to other clusters
     cdef int buffered_reads = 0
     cdef bint add_to_buffer = 1
     cdef int int_node
     cdef unsigned long[:] partition
     events = []
-    if info:
-        sites_info = list(info.values())
-    else:
-        sites_info = []
-    # u and v are the part ids, d[0] and d[1] are the lists of nodes for those parts
-    for (u, v), d in data.s_between: #.items():
-        rd_u = get_reads(bam, d[0], data.reads, n2n, add_to_buffer, info)   # [(Nodeinfo, alignment)..]
-        rd_v = get_reads(bam, d[1], data.reads, n2n, add_to_buffer, info)
-        total_reads = len(rd_u) + len(rd_v)
-        buffered_reads += total_reads
-        if add_to_buffer and buffered_reads > 50000:
-            add_to_buffer = 0
-        out_counts[u] += len(rd_u)
-        out_counts[v] += len(rd_v)
-        if len(rd_u) == 0 or len(rd_v) == 0:
-            continue
-        if u in seen:
-            seen.remove(u)
-        if v in seen:
-            seen.remove(v)
-
-        events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1,
-                           assemble_contigs,
-                           sites_info, paired_end)
-
-        # finds reads that should be a single partition
-        # u_reads, v_reads, u_single, v_single = filter_single_partitions(rd_u, rd_v)
-        # if len(u_reads) > 0 and len(v_reads) > 0:
-        #     events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1, assemble_contigs,
-        #                        sites_info, paired_end)
-        # if u_single:
-        #     res = single(u_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
-        #                  sites_info, paired_end, length_extend, divergence)
-        #     if res:
-        #         if isinstance(res, EventResult):
-        #             events.append(res)
-        #         else:
-        #             events += res
-        # if v_single:
-        #     res = single(v_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
-        #                  sites_info, paired_end, length_extend, divergence)
-        #     if res:
-        #         if isinstance(res, EventResult):
-        #             events.append(res)
-        #         else:
-        #             events += res
+    sites_info = list(info.values()) if info else []
+
+    if data.s_between:
+        # u and v are the part ids, d[0] and d[1] are the lists of nodes for those parts
+        for (u, v), d in data.s_between: #.items():
+            rd_u = get_reads(bam, d[0], data.reads, n2n, add_to_buffer, info)   # [(Nodeinfo, alignment)..]
+            rd_v = get_reads(bam, d[1], data.reads, n2n, add_to_buffer, info)
+            total_reads = len(rd_u) + len(rd_v)
+            buffered_reads += total_reads
+            if add_to_buffer and buffered_reads > 50000:
+                add_to_buffer = 0
+            out_counts[u] += len(rd_u)
+            out_counts[v] += len(rd_v)
+            if len(rd_u) == 0 or len(rd_v) == 0:
+                continue
+            if u in seen:
+                seen.remove(u)
+            if v in seen:
+                seen.remove(v)
+
+            events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1,
+                               assemble_contigs,
+                               sites_info, paired_end)
+
+            # finds reads that should be a single partition
+            # u_reads, v_reads, u_single, v_single = filter_single_partitions(rd_u, rd_v)
+            # if len(u_reads) > 0 and len(v_reads) > 0:
+            #     events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1, assemble_contigs,
+            #                        sites_info, paired_end)
+            # if u_single:
+            #     res = single(u_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
+            #                  sites_info, paired_end, length_extend, divergence)
+            #     if res:
+            #         if isinstance(res, EventResult):
+            #             events.append(res)
+            #         else:
+            #             events += res
+            # if v_single:
+            #     res = single(v_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
+            #                  sites_info, paired_end, length_extend, divergence)
+            #     if res:
+            #         if isinstance(res, EventResult):
+            #             events.append(res)
+            #         else:
+            #             events += res
 
     # Process any singles / unconnected blocks
     if seen:
@@ -1737,33 +1742,33 @@ cdef list multi(data, bam, int insert_size, int insert_stdev, float insert_ppf,
                         events += res
 
     # Check for events within clustered nodes
-    for k, d in data.s_within:  #.items():
-        o_count = out_counts[k]
-        i_counts = len(d)
-        if i_counts > max_single_size:
-            continue
-        if o_count > 0 and i_counts > (2*min_support) and i_counts > o_count:
-            rds = get_reads(bam, d, data.reads, data.n2n, 0, info)
-            if len(rds) < lower_bound_support or (len(sites_info) != 0 and len(rds) == 0):
-                    continue
-            res = single(rds, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
-                         sites_info, paired_end, length_extend, divergence)
-            if res:
-                if isinstance(res, EventResult):
-                    events.append(res)
-                else:
-                    events += res
+    if data.s_within:
+        for k, d in data.s_within:  #.items():
+            o_count = out_counts[k]
+            i_counts = len(d)
+            if i_counts > max_single_size:
+                continue
+            if o_count > 0 and i_counts > (2*min_support) and i_counts > o_count:
+                rds = get_reads(bam, d, data.reads, data.n2n, 0, info)
+                if len(rds) < lower_bound_support or (len(sites_info) != 0 and len(rds) == 0):
+                        continue
+                res = single(rds, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
+                             sites_info, paired_end, length_extend, divergence)
+                if res:
+                    if isinstance(res, EventResult):
+                        events.append(res)
+                    else:
+                        events += res
     return events
 
 
 cpdef list call_from_block_model(bam, data, clip_length, insert_size, insert_stdev, insert_ppf, min_support, lower_bound_support,
                                  assemble_contigs, max_single_size, sites_index, bint paired_end, int length_extend, float divergence):
-    n_parts = len(data.parts)
+    n_parts = len(data.parts) if data.parts else 0
     events = []
-    if data.info:
-        info = data.info
-    else:
-        info = None
+    info = data.info
+    if data.reads is None:
+        data.reads = {}
     # next deal with info - need to filter these into the partitions, then deal with them in single / one_edge
     cdef EventResult_t e
     if n_parts >= 1:
@@ -1773,10 +1778,7 @@ cpdef list call_from_block_model(bam, data, clip_length, insert_size, insert_std
         if len(data.n2n) > max_single_size:
             return []
         rds = get_reads(bam, data.n2n.keys(), data.reads, data.n2n, 0, info)
-        if info:
-            sites_info = list(info.values())
-        else:
-            sites_info = []
+        sites_info = list(info.values()) if info else []
         if len(rds) < lower_bound_support or (len(sites_info) != 0 and len(rds) == 0):
             return []
         ev = single(rds, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs, sites_info, paired_end, length_extend, divergence)
diff --git a/dysgu/filter_normals.py b/dysgu/filter_normals.py
index bafbf30..26f4ca6 100644
--- a/dysgu/filter_normals.py
+++ b/dysgu/filter_normals.py
@@ -9,6 +9,8 @@
 import os
 from dysgu.map_set_utils import Py_BasicIntervalTree, is_overlapping
 from dysgu.graph import AlignmentsSA
+from dysgu.extra_metrics import gap_size_upper_bound
+from dysgu.call_component import n_aligned_bases
 import logging
 from collections import defaultdict
 from enum import Enum
@@ -118,6 +120,7 @@ def output_vcf(args, template_vcf):
     input_command = ' '.join(argv)
     hdr.add_line('##FILTER=<ID=normal,Description="SV has read support in a normal vcf or alignment file">')
     hdr.add_line('##FILTER=<ID=lowSupport,Description="Not enough supporting evidence / too many nuisance reads">')
+    hdr.add_line('##FILTER=<ID=highDivergence,Description="Too many mismatches or small gaps in reads">')
     hdr.add_line(f'##command="{input_command}"')
     return pysam.VariantFile("-" if not args['svs_out'] else args['svs_out'], "w", header=hdr)
 
@@ -395,23 +398,37 @@ def matching_supplementary(aln, infile, posA, posB):
     return False
 
 
+def within_read_end_position(event_pos, svtype, cigartuples, cigar_index):
+    if svtype == 1:  # insertion (or other e.g. duplication/inversion within read)
+        return event_pos + 1
+    else:  # deletion type
+        end = event_pos + cigartuples[cigar_index][1]
+        return end
+
 def matching_gap(posA, posB, r, svtype, is_insertion, svlen, pos_threshold=100, span_threshold=0.1):
     ct = r.cigartuples
     pos = r.pos
+    pos_original = None
     min_length = svlen * 0.2  # 0.25
     max_distance = svlen * 10
     ins_like = svtype == "DUP" or svtype == "INV"
     debug = False
+    # debug = True
+    cigar_index = 0
     for k, l in ct:
         if k == CHARD_CLIP or k == CSOFT_CLIP:
+            cigar_index += 1
             continue
         if k == CMATCH or k == CDIFF:
             pos += l
+            cigar_index += 1
             continue
         if is_insertion and k == CDEL:
             pos += l
+            cigar_index += 1
             continue
         elif not is_insertion and k == CINS:
+
             span_similarity = min(l, svlen) / max(l, svlen)
             if l > 250 and ins_like and span_similarity > 0.85 and abs(posA - pos) < max(l, svlen):
                 if debug: echo("ret0", svlen, f"{posA}-{posA + svlen}", f"{pos}-{pos + l}", (k, l))
@@ -419,7 +436,24 @@ def matching_gap(posA, posB, r, svtype, is_insertion, svlen, pos_threshold=100,
             elif svlen * 2 < l and (abs(posA - pos) < max_distance or (abs(posB - pos + l) < max_distance)):
                 if debug: echo("ret1")
                 return True
+
+            # if l > 100:
+            #     pos_original = pos
+            #     l, pos, _ = gap_size_upper_bound(r, cigar_index, pos, pos+1)
+            #     span_similarity = min(l, svlen) / max(l, svlen)
+            #     if l > 250 and ins_like and span_similarity > 0.85 and abs(posA - pos) < max(l, svlen):
+            #         if debug: echo("ret0b", svlen, f"{posA}-{posA + svlen}", f"{pos}-{pos + l}", (k, l))
+            #         return True
+            #     elif svlen * 2 < l and (abs(posA - pos) < max_distance or (abs(posB - pos + l) < max_distance)):
+            #         if debug: echo("ret1b")
+            #         return True
+            #     if pos_original is not None:
+            #         pos = pos_original
+            #         pos_original = None
+
+            cigar_index += 1
             continue
+
         end = pos + l
         if l > min_length and abs(posA - pos) < max_distance:
             if is_insertion:
@@ -429,10 +463,23 @@ def matching_gap(posA, posB, r, svtype, is_insertion, svlen, pos_threshold=100,
             elif span_position_distance(posA, posB, pos, end, pos_threshold, span_threshold):
                 if debug: echo("ret3")
                 return True
+
+        if l > 100 and ((k == CINS and is_insertion) or (k == CDEL and svtype == "DEL")):
+            this_l, this_pos, this_end = gap_size_upper_bound(r, cigar_index, pos, pos + 1)
+            if this_l > min_length and abs(posA - this_pos) < max_distance:
+                if is_insertion:
+                    if span_position_distance(posA, posA + svlen, this_pos, this_end, pos_threshold, span_threshold):
+                        if debug: echo("ret2b", f"{posA}-{posA + svlen}", f"{this_pos}-{this_end}")
+                        return True
+                elif span_position_distance(posA, posB, this_pos, this_end, pos_threshold, span_threshold):
+                    if debug: echo("ret3b")
+                    return True
+
         if k == CDEL:
             pos += l
         if pos > posB + svlen:
             break
+        cigar_index += 1
     return False
 
 
@@ -542,7 +589,7 @@ def cache_nearby_soft_clip(posA, posB, align, join_type, svtype, cached, distanc
 
 def any_nearby_soft_clip(posA, posB, align, join_type, svtype, distance=30, clip_length=3):
     ct = align.cigartuples
-    if svtype == "BND" or svtype == "INV" or svtype == "DUP":  # respect join orientation
+    if svtype == "BND" or svtype == "DUP":  # respect join orientation
         if join_type[0] == "3":
             if clip_position_matches(ct[-1], clip_length, posA, align.reference_end, distance):
                 return True
@@ -558,7 +605,7 @@ def any_nearby_soft_clip(posA, posB, align, join_type, svtype, distance=30, clip
             return True
         if clip_position_matches(ct[0], clip_length, posB, align.pos, distance):
             return True
-    elif svtype == "INS" or svtype == "TRA":  # check all clips
+    elif svtype == "INS" or svtype == "TRA" or svtype == "INV":  # check all clips
         if clip_position_matches(ct[-1], clip_length, posA, align.reference_end, distance):
             return True
         if clip_position_matches(ct[-1], clip_length, posB, align.reference_end, distance):
@@ -680,15 +727,32 @@ def has_low_WR_support(r, sample, support_fraction, n_overlapping, n_mapq0):
 
 def too_many_clipped_reads(r, clipped_reads, support_fraction):
     sf = support_fraction / 2
-    cov = r.samples[r.samples.keys()[0]]['COV']
-    max_nearby_clipped_reads = round(3 + sf * cov)
-    if clipped_reads > max_nearby_clipped_reads:
+    d = r.samples[r.samples.keys()[0]]
+    # cov = r.samples[r.samples.keys()[0]]['COV']
+    m = max(d['ICN'], d['OCN'])
+    if m == 0:
+        cov = d['COV']
+    else:
+        cov = d['COV'] * (min(d['ICN'], d['OCN']) / m)
+    # max_nearby_clipped_reads = round(3 + sf * cov)
+    max_nearby_clipped_reads = round(1 + sf * cov)
+    # echo("max", clipped_reads,  max_nearby_clipped_reads, support_fraction)
+    if clipped_reads >= max_nearby_clipped_reads:
         return True
     return False
 
 
-def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad, keep_all, sample,
-                          support_fraction):
+def poorly_aligned_region(sum_edit_distance, sum_n_small_gap, NMbase, n_aligns, fmt, max_divergence=0.08):
+    # Normal-reads error
+    if sum_edit_distance/n_aligns > max_divergence or sum_n_small_gap/n_aligns > max_divergence / 2:
+        return True
+    # Variant error rate > 4 times the normal-reads error rate
+    NMbase = (NMbase / n_aligns) * 100
+    if "NMB" in fmt and fmt["NMB"] > 4 * NMbase:
+        return True
+
+def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad, sample,
+                          support_fraction, max_divergence):
     ct = r.info["CT"]
     chromA = r.chrom
     posA = r.pos
@@ -701,47 +765,54 @@ def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad,
     has_contig = 'CONTIG' in r.info or 'CONTIG2' in r.info or r.alts[0][0] != "<"
     is_paired_end = False
     good_step = good_step_translocation(r, sample)
-    debug = False #True
+    sum_edit_distance = 0
+    sum_n_small_gap = 0
+    n_aligns = 0
+    NMbase = 0
+    # debug = True
+
+    debug = False
+    # debug = True
     for is_paired_end, aln in iterate_bams_single_region(bams, chromA, posA, pad, bam_is_paired_end):
+
         if is_paired_end:
             distance = 50
             clip_length = 3
             if aln.flag & 12 or (aln.flag & 2 and not has_clip(aln)):
                 continue
             if chromB == infile.get_reference_name(aln.rnext) and is_overlapping(next_start, next_end, aln.pnext - pad, aln.pnext + pad):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if matching_supplementary(aln, infile, posA, posB):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if has_contig:
                 cache_nearby_soft_clip(posA, posB, aln, ct, "TRA", cached, distance, clip_length)
         else:
+            n_aligns += 1
+            a_bases, large_gaps, n_small_gaps = n_aligned_bases(aln)
+            if a_bases > 0:
+                sum_n_small_gap += n_small_gaps / a_bases
+            if aln.has_tag("NM"):
+                if a_bases:
+                    nm = float(aln.get_tag("NM"))
+                    sum_edit_distance += nm / a_bases
+                    NMbase += (nm - large_gaps) / a_bases
             distance = 500
             clip_length = 50
             if aln.flag & 4 or not has_clip(aln):
                 continue
             if matching_supplementary(aln, infile, posA, posB):
                 if debug: echo("tra0")
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if not good_step and matching_ins_translocation(posA, aln):
                 if debug: echo("tra1")
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if has_contig:
                 cache_nearby_soft_clip(posA, posB, aln, ct, "TRA", cached, distance, clip_length)
                 if any_nearby_soft_clip(posA, posB, aln, ct, "TRA", 30, clip_length=50):
                     nearby_soft_clips += 1
             elif any_nearby_soft_clip(posA, posB, aln, ct, "TRA", 30, clip_length=250):
                 if debug: echo("tra2")
-                if keep_all:
-                    r.filter.add("lowSupport")
-                return False
+                return "lowSupport"
 
     for is_paired_end, aln in iterate_bams_single_region(bams, chromB, posB, pad, bam_is_paired_end):
         if is_paired_end:
@@ -750,56 +821,55 @@ def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad,
             if aln.flag & 12 or (aln.flag & 2 and not has_clip(aln)):
                 continue
             if r.chrom == infile.get_reference_name(aln.rnext) and is_overlapping(current_start, current_end, aln.pnext - pad, aln.pnext + pad):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if matching_supplementary(aln, infile, posB, posA):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if has_contig:
                 cache_nearby_soft_clip(r.pos, posB, aln, ct, "TRA", cached, distance, clip_length)
         else:
+            n_aligns += 1
+            a_bases, large_gaps, n_small_gaps = n_aligned_bases(aln)
+            if a_bases > 0:
+                sum_n_small_gap += n_small_gaps / a_bases
+            if aln.has_tag("NM"):
+                if a_bases:
+                    nm = float(aln.get_tag("NM"))
+                    sum_edit_distance += nm / a_bases
+                    NMbase += (nm - large_gaps) / a_bases
             distance = 500
             clip_length = 50
             if aln.flag & 4 or not has_clip(aln):
                 continue
             if matching_supplementary(aln, infile, posB, posA):
                 if debug: echo("tra3")
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if not good_step and matching_ins_translocation(posB, aln):
                 if debug: echo("tra4")
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if has_contig:
                 cache_nearby_soft_clip(posA, posB, aln, ct, "TRA", cached, distance, clip_length)
                 if any_nearby_soft_clip(r.pos, posB, aln, ct, "TRA", 30, clip_length=50):
                     nearby_soft_clips += 1
             elif any_nearby_soft_clip(posA, posB, aln, ct, "TRA", 30, clip_length=250):
                 if debug: echo("tra5")
-                if keep_all:
-                    r.filter.add("lowSupport")
-                return False
+                return "lowSupport"
 
     if not is_paired_end and too_many_clipped_reads(r, nearby_soft_clips, support_fraction):
         if debug: echo("tra6")
-        if keep_all:
-            r.filter.add("lowSupport")
-        return False
+        return "lowSupport"
     if cached and matching_soft_clips(r, cached, is_paired_end):
         if debug: echo("tra7")
-        if keep_all:
-            r.filter.add("normal")
-        return False
-    return True
+        return "normal"
+
+    if not is_paired_end and poorly_aligned_region(sum_edit_distance, sum_n_small_gap, NMbase, n_aligns, r.samples[sample], max_divergence):
+        return "highDivergence"
+    return None
 
 
-def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pad, sample, keep_all):
+def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pad, sample, max_divergence):
     svlen = r.info["SVLEN"]
     svtype = r.info["SVTYPE"]
+    any_contigs = ("CONTIGA" in r.info and r.info["CONTIGA"]) or ("CONTIGB" in r.info and r.info["CONTIGB"])
     is_insertion = svtype == "INS"
     posA = r.pos
     ct = r.info["CT"]
@@ -809,9 +879,24 @@ def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pa
     is_paired_end = False
     n_mapq_0 = 0
     n_overlpapping = 0
-    # debug = False if r.rid != "229738" else True
-    debug = False #False
+    n_aligns = 0
+    sum_edit_distance = 0
+    sum_n_small_gap = 0
+    NMbase = 0
+
+    debug = False
+    # debug = True
     for is_paired_end, aln in iterate_bams(bams, r.chrom, posA, r.chrom, posB, pad, bam_is_paired_end):
+        n_aligns += 1
+        a_bases, large_gaps, n_small_gaps = n_aligned_bases(aln)
+        if a_bases > 0:
+            sum_n_small_gap += n_small_gaps / a_bases
+        if aln.has_tag("NM"):
+            if a_bases:
+                nm = float(aln.get_tag("NM"))
+                sum_edit_distance += nm / a_bases
+                NMbase += (nm - large_gaps) / a_bases
+
         if is_paired_end:
             a_posA = min(aln.pos, aln.pnext)
             a_posB = max(aln.pos, aln.pnext)
@@ -820,9 +905,7 @@ def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pa
             if aln.rname != aln.rnext:
                 if matching_supplementary(aln, infile, posA, posB):
                     if debug: echo("1")
-                    if keep_all:
-                        r.filter.add("normal")
-                    return False
+                    return "normal"
                 cache_nearby_soft_clip(posA, posB, aln, ct, svtype, cached)
                 continue
             if abs(a_posA - posA) > pad or abs(a_posB - posB) > pad:
@@ -830,32 +913,22 @@ def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pa
             if not is_insertion and not aln.flag & 10:  # proper pair, mate unmapped
                 if span_position_distance(posA, posB, a_posA, a_posB, pos_threshold=20, span_threshold=0.7):
                     if debug: echo("2")
-                    if keep_all:
-                        r.filter.add("normal")
-                    return False
+                    return "normal"
             if not is_insertion and matching_supplementary(aln, infile, posA, posB):
                 if debug: echo("3")
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if svlen < 80 and matching_gap(posA, posB, aln, svtype, is_insertion, svlen):
                 if debug: echo("4")
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             cache_nearby_soft_clip(posA, posB, aln, ct, svtype, cached, distance=50, clip_length=3)
 
         else:
             if matching_gap(posA, posB, aln, svtype, is_insertion, svlen, pos_threshold=30, span_threshold=0.7):
                 if debug: echo("5", svlen, aln.qname)
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if matching_supplementary(aln, infile, posA, posB):
                 if debug: echo("6")
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if is_overlapping(posA, posA + 1, aln.pos, aln.reference_end):
                 n_overlpapping += 1
                 if aln.mapq == 0:
@@ -865,29 +938,29 @@ def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pa
                 covered += 1
             if any_nearby_soft_clip(posA, posB, aln, ct, svtype, 30, clip_length=50):
                 nearby_soft_clips += 1
-            # cache_nearby_soft_clip(posA, posB, aln, ct, svtype, cached, distance=min(500, svlen * 0.5), clip_length=50)
+            cache_nearby_soft_clip(posA, posB, aln, ct, svtype, cached, distance=min(500, svlen * 0.5), clip_length=50)
 
     if not is_paired_end and not covered:
         if debug: echo("7")
-        if keep_all:
-            r.filter.add("lowSupport")
-        return False
+        return "lowSupport"
     if not is_paired_end and has_low_WR_support(r, sample, support_fraction, n_overlpapping, n_mapq_0):
         if debug: echo("8")
-        if keep_all:
-            r.filter.add("lowSupport")
-        return False
-    if not is_paired_end and too_many_clipped_reads(r, nearby_soft_clips, support_fraction):
+        return "lowSupport"
+    # echo(svtype)
+    # if svtype == "INV":
+    #     support_fraction = support_fraction / 20
+    # echo(support_fraction)
+    if (not is_paired_end or (svtype in "INVTRA" and not any_contigs)) and too_many_clipped_reads(r, nearby_soft_clips, support_fraction):
+    # if not is_paired_end and too_many_clipped_reads(r, nearby_soft_clips, support_fraction):
         if debug: echo("9")
-        if keep_all:
-            r.filter.add("lowSupport")
-        return False
+        return "lowSupport"
     if cached and matching_soft_clips(r, cached, is_paired_end):
         if debug: echo("10")
-        if keep_all:
-            r.filter.add("normal")
-        return False
-    return True
+        return "normal"
+    if not is_paired_end and poorly_aligned_region(sum_edit_distance, sum_n_small_gap, NMbase, n_aligns, r.samples[sample], max_divergence):
+        return "highDivergence"
+
+    return None
 
 
 def update_filter_value(r, sample_name, old_filter_values, pass_prob, new_value=""):
@@ -918,11 +991,14 @@ def run_filtering(args):
     min_prob = args['min_prob']
     pass_prob = args['pass_prob']
     support_fraction = args['support_fraction']
+    max_divergence = args['max_divergence']
     filter_results = defaultdict(int)
     written = 0
     for idx, r in enumerate(vcf):
-        # if r.id != "165324":
+        # if r.id != "187669":
         #     continue
+        # echo(dict(r.info))
+        # echo(dict(r.samples[sample_name]))
         old_filter_value = list(r.filter.keys())
         r.filter.clear()
         if min_prob != 0 and 'PROB' in r.samples[sample_name] and r.samples[sample_name]['PROB'] < min_prob:
@@ -947,7 +1023,7 @@ def run_filtering(args):
                 if chrom2_tid in intervals:
                     posB_overlaps = set(intervals[chrom2_tid].allOverlappingIntervals(posB, posB + 1))
                     if posA_overlaps.intersection(posB_overlaps):
-                        filter_results['dropped, normal SV overlap'] += 1
+                        filter_results['dropped, normal'] += 1
                         if keep_all:
                             update_filter_value(r, sample_name, old_filter_value, pass_prob, new_value="normal")
                             out_vcf.write(r)
@@ -959,18 +1035,19 @@ def run_filtering(args):
             continue
         sv_type = get_sv_type(r, chrom_tid, chrom2_tid)
         if sv_type == "TRA" or sv_type == "BND":
-            good = process_translocation(r, r.info["CHR2"], posB, bams, infile, bam_is_paired_end, pad=pad, keep_all=keep_all, sample=sample_name, support_fraction=support_fraction)
+            reason = process_translocation(r, r.info["CHR2"], posB, bams, infile, bam_is_paired_end, pad=pad, sample=sample_name, support_fraction=support_fraction, max_divergence=max_divergence)
         else:
-            good = process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pad=pad, sample=sample_name, keep_all=keep_all)
-        if good:
+            reason = process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pad=pad, sample=sample_name, max_divergence=max_divergence)
+        if reason is None:
             update_filter_value(r, sample_name, old_filter_value, pass_prob)
             out_vcf.write(r)
             written += 1
         else:
             if keep_all:
+                r.filter.add(reason)
                 update_filter_value(r, sample_name, old_filter_value, pass_prob, new_value="normal")
                 out_vcf.write(r)
-            filter_results['dropped, normal read support'] += 1
+            filter_results[f'dropped, {reason}'] += 1
     out_vcf.close()
     logging.info(f'Filter results: {dict(sorted(filter_results.items()))}')
     logging.info("dysgu filter {} complete, n={}, h:m:s, {}".format(args['input_vcf'],
diff --git a/dysgu/graph.pyx b/dysgu/graph.pyx
index 74d3f5a..a4674af 100644
--- a/dysgu/graph.pyx
+++ b/dysgu/graph.pyx
@@ -1358,9 +1358,9 @@ cdef tuple count_support_between(Py_SimpleGraph G, parts):
     cdef tuple t
     cdef unsigned long[:] p
     if len(parts) == 0:
-        return {}, {}
+        return None, None
     elif len(parts) == 1:
-        return {}, {0: parts[0]}
+        return None, {0: parts[0]}
     cdef Py_Int2IntMap p2i = map_set_utils.Py_Int2IntMap()
     for i, p in enumerate(parts):
         for node in p:
@@ -1390,7 +1390,7 @@ cdef tuple count_support_between(Py_SimpleGraph G, parts):
                     if t in seen_t:
                         continue
                     if t not in counts:
-                        counts[t] = [set([]), set([])]
+                        counts[t] = (set([]), set([]))
                     if j < i:
                         counts[t][0].add(child)
                         counts[t][1].add(node)
@@ -1406,7 +1406,7 @@ cdef tuple count_support_between(Py_SimpleGraph G, parts):
                     self_counts[i].append(node)
         seen_t.update(current_t)  # Only count edge once
         for t in current_t:  # save memory by converting support_between to array
-            counts[t] = [np.fromiter(m, dtype="uint32", count=len(m)) for m in counts[t]]
+            counts[t] = tuple(np.fromiter(m, dtype="uint32", count=len(m)) for m in counts[t])
 
     return counts, self_counts
 
@@ -1519,18 +1519,19 @@ cpdef proc_component(node_to_name, component, read_buffer, infile, Py_SimpleGrap
     # Explore component for locally interacting nodes; create partitions using these
     partitions = get_partitions(G, component)
     support_between, support_within = count_support_between(G, partitions)
-    if len(support_between) == 0 and len(support_within) == 0:
+
+    if not support_between and not support_within:
         if not paired_end:
             if len(n2n) >= min_support or len(reads) >= min_support or info:
-                return GraphComponent((), (), {}, (), n2n, info)
+                return GraphComponent(None, None, None, None, n2n, info)
             else:
                 return
         else:
             # single paired end template can have 3 nodes e.g. two reads plus supplementary
             if min_support == 1 and (len(n2n) >= min_support or len(reads) >= min_support):
-                return GraphComponent((), (), {}, (), n2n, info)
+                return GraphComponent(None, None, None, None, n2n, info)
             elif len(reads) >= min_support or info:
-                return GraphComponent((), (), {}, (), n2n, info)
+                return GraphComponent(None, None, None, None, n2n, info)
             else:
                 return
     # Debug:
@@ -1552,4 +1553,11 @@ cpdef proc_component(node_to_name, component, read_buffer, infile, Py_SimpleGrap
     #     echo("s_between", sb)
     #     echo("s_within", support_within)
 
-    return GraphComponent(partitions, tuple(support_between.items()), reads, tuple(support_within.items()), n2n, info)
+    support_between = tuple(support_between.items()) if support_between else None
+    support_within = tuple(support_within.items()) if support_within else None
+    return GraphComponent(partitions,
+                          support_between,
+                          reads,
+                          support_within,
+                          n2n,
+                          info)
diff --git a/dysgu/io_funcs.pyx b/dysgu/io_funcs.pyx
index baa3f7b..d486bfa 100644
--- a/dysgu/io_funcs.pyx
+++ b/dysgu/io_funcs.pyx
@@ -129,7 +129,7 @@ cpdef list col_names(small_output):
 
     if small_output:
         return ["chrA", "posA", "chrB", "posB", "sample", "event_id", "grp_id", "n_in_grp", "kind", "type", "svtype", "join_type", "cipos95A", "cipos95B", 'contigA', 'contigB', "svlen", "svlen_precise", "rep", "gc",
-          ["GT", "GQ", "MAPQpri", "su", "spanning", "pe", "supp", "sc", "bnd",
+          ["GT", "GQ", "MAPQpri", "su", "spanning", "pe", "supp", "sc", "bnd", "NMbase",
          "raw_reads_10kb", "neigh10kb", "plus",
                 "minus", "remap_score", "remap_ed", "bad_clip_count", "fcc", "inner_cn", "outer_cn", "prob"]
             ]
@@ -264,7 +264,7 @@ def make_main_record(r, dysgu_version, index, format_f, df_rows, add_kind, small
         info_extras += [f"MeanPROB={round(mean_prob, 3)}", f"MaxPROB={round(max_prob, 3)}"]
 
     if small_output:
-        fmt_keys = "GT:GQ:MAPQP:SU:WR:PE:SR:SC:BND:COV:NEIGH10:PS:MS:RMS:RED:BCC:FCC:ICN:OCN:PROB"
+        fmt_keys = "GT:GQ:MAPQP:SU:WR:PE:SR:SC:BND:NMB:COV:NEIGH10:PS:MS:RMS:RED:BCC:FCC:ICN:OCN:PROB"
     else:
         fmt_keys = "GT:GQ:NMP:NMS:NMB:MAPQP:MAPQS:NP:MAS:SU:WR:PE:SR:SC:BND:SQC:SCW:SQR:BE:COV:MCOV:LNK:NEIGH:NEIGH10:RB:PS:MS:SBT:NG:NSA:NXA:NMU:NDC:RMS:RED:BCC:FCC:STL:RAS:FAS:ICN:OCN:CMP:RR:JIT:PROB"
 
@@ -309,7 +309,7 @@ def make_main_record(r, dysgu_version, index, format_f, df_rows, add_kind, small
 
 def get_fmt(r, small_output):
     if small_output:
-        v = [r["GT"], r["GQ"], r['MAPQpri'], r['su'], r['spanning'], r['pe'], r['supp'], r['sc'], r['bnd'],
+        v = [r["GT"], r["GQ"], r['MAPQpri'], r['su'], r['spanning'], r['pe'], r['supp'], r['sc'], r['bnd'], r['NMbase'],
              r['raw_reads_10kb'], r['neigh10kb'], r["plus"], r["minus"], r["remap_score"], r["remap_ed"],
              r["bad_clip_count"], round(r["fcc"], 3), round(r["inner_cn"], 3), round(r["outer_cn"], 3), r['prob']
              ]
diff --git a/dysgu/main.py b/dysgu/main.py
index dd22cba..0a54d0c 100644
--- a/dysgu/main.py
+++ b/dysgu/main.py
@@ -430,6 +430,7 @@ def call_events(ctx, **kwargs):
 @click.option("-d", "--wd", help="Working directory to use/create when merging", type=click.Path(exists=False), required=False)
 @click.option("-c", "--clean", help="Remove working directory when finished", is_flag=True, flag_value=True, show_default=False, default=False)
 @click.option("--progress", help="Prints detailed progress information",  is_flag=True, flag_value=True, show_default=False, default=False)
+@click.option("--update-cohort", help="Updated this cohort file with new calls from input_files", required=False, type=click.Path(exists=True))
 @click.option("--collapse-nearby", help="Merges more aggressively by collapsing nearby SV",
               default="True", type=click.Choice(["True", "False"]), show_default=True)
 @click.option("--merge-across", help="Merge records across input samples", default="True",
@@ -470,8 +471,9 @@ def view_data(ctx, **kwargs):
 @click.option("--target-sample", help="If input_vcf if multi-sample, use target-sample as input", required=False, type=str, default="")
 @click.option("--keep-all", help="All SVs classified as normal will be kept in the output, labelled as filter=normal", is_flag=True, flag_value=True, show_default=False, default=False)
 @click.option("--ignore-read-groups", help="Ignore ReadGroup RG tags when parsing sample names. Filenames will be used instead", is_flag=True, flag_value=True, show_default=False, default=False)
-@click.option("--min-prob", help="Remove SVs with PROB value < min-prob", default=0.1, type=float, show_default=True)
-@click.option("--pass-prob", help="Re-label SVs as PASS if PROB value >= pass-prob", default=1.0, type=float, show_default=True)
+@click.option("--max-divergence", help="Remove SV if normal_bam displays divergence > max-divergence at same location", default=0.08, type=float, show_default=True)
+@click.option("--min-prob", help="Remove SV with PROB value < min-prob", default=0.1, type=float, show_default=True)
+@click.option("--pass-prob", help="Re-label SV as PASS if PROB value >= pass-prob", default=1.0, type=float, show_default=True)
 @click.option("--interval-size", help="Interval size for searching normal-vcf/normal-bams", default=1000, type=int, show_default=True)
 @click.option("--random-bam-sample", help="Choose N random normal-bams to search. Use -1 to ignore", default=-1, type=int, show_default=True)
 @click.pass_context
diff --git a/dysgu/sites_utils.py b/dysgu/sites_utils.py
index 222e5c0..223c183 100644
--- a/dysgu/sites_utils.py
+++ b/dysgu/sites_utils.py
@@ -65,7 +65,7 @@ def vcf_reader(pth, infile, parse_probs, sample_name, ignore_sample, default_pro
             continue
         if ignore_sample and sample_name in r.samples and "GT" in r.samples[sample_name]:
             this_gt = r.samples[sample_name]['GT']
-            if not (this_gt == "0/0" or this_gt == "0" or this_gt == "." or this_gt == "./." or this_gt == "0|0"):
+            if not (this_gt == (0, 0) or this_gt == "0/0" or this_gt == "0" or this_gt == "." or this_gt == "./." or this_gt == "0|0"):
                 continue
         if "CHROM2" in r.info:
             chrom2_info = r.info["CHROM2"]
@@ -149,7 +149,6 @@ def vcf_reader(pth, infile, parse_probs, sample_name, ignore_sample, default_pro
             raise ValueError("PROB or MeanPROB must be in a float in range 0-1, error at CHROM {}, POS {}".format(chrom, start))
 
         recs[chrom].append(Site(chrom, start, chrom2, stop, svt, idx, r.id, svlen, pval))
-
     if not_parsed:
         logging.warning("Some records had incompatible SVTYPE: {}".format(
             str({k: v for k, v in Counter(not_parsed).items()})))
diff --git a/dysgu/view.py b/dysgu/view.py
index b2db62b..1b17bc4 100644
--- a/dysgu/view.py
+++ b/dysgu/view.py
@@ -631,6 +631,54 @@ def shard_data(args, input_files, Global, show_progress):
             os.remove(item)
 
 
+def update_cohort_only(args):
+
+    cohort = pysam.VariantFile(args["update_cohort"])
+    cohort_samples = set(cohort.header.samples)
+    # contigs = list(cohort.header.contigs)
+    samples = {}
+    # iters = {}
+    #current = {}
+    samps_counts = defaultdict(int)
+    for pth in args["input_files"]:
+        v = pysam.VariantFile(pth)
+        if v.index is None:
+            raise ValueError(f"Input files must be indexed. {pth} has not index")
+        samps = list(v.header.samples)
+        if len(samps) > 1:
+            raise ValueError(f"Only one sample supported per input file but {pth} has {list(samps)}")
+        samp = samps[0]
+        samps_counts[samp] += 1
+        if samp in samples:
+            logging.warning(f"{samp} already in samples list, {samp} is assumed to be {samp}_{samps_counts[samp]} in cohort file")
+            samp = f"{samp}_{samps_counts[samp]}"
+        if samp not in cohort_samples:
+            raise ValueError(f"Input file has sample name {samp}, but this was not found in the cohort file {args['update_cohort']}")
+        samples[samp] = v
+
+    for r in cohort.fetch():
+        ch_svtype = r.info["SVTYPE"]
+        for k in samples:
+            ch_fmt = r.samples[k]
+            sample_items = [i for i in list(samples[k].fetch(r.chrom, r.pos, r.pos + 10)) if (ch_svtype == i.info["SVTYPE"] and abs(i.pos - r.pos) < 10)]
+            if not sample_items:
+                continue
+            index = 0
+            if len(sample_items) > 1:
+                # choose row with any genotype in sample, or last row
+                for index, candidate in enumerate(sample_items):
+                    candidate_gt = candidate.samples[k]["GT"]
+                    if candidate_gt != (0, 0):
+                        break
+            item = sample_items[index]
+            if item.samples[k]["GT"] == (0, 0):
+                continue
+            echo("samples items", len(sample_items))
+            if ch_fmt["PROB"] != item.samples[k]["PROB"]:
+                echo(k, r.chrom, r.pos, item.chrom, item.pos, dict(ch_fmt)["PROB"], dict(item.samples[k])["PROB"])
+        echo("---")
+
+
 def view_file(args):
 
     t0 = time.time()
@@ -645,22 +693,29 @@ def view_file(args):
         if not all(os.path.splitext(i)[1] == ".csv" for i in args["input_files"]):
             raise ValueError("All input files must have .csv extension")
 
-    args["metrics"] = False  # only option supported so far
-    args["contigs"] = False
+    if args["update_cohort"]:
+        for opt, v in (("out_format", "vcf"), ("merge_within", "False"), ("merge_across", "True"), ("add_kind", "False"), ("separate", "False")):
+            if args[opt] != v:
+                raise ValueError(f"{opt}={v} fot supported with --update-cohort")
+        update_cohort_only(args)
 
-    soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
-    manager = multiprocessing.Manager()
-    Global = manager.Namespace()
-    Global.open_files = soft  # we dont know how many file descriptors are already open by the user, assume all
-    Global.soft = soft
-
-    if not args["wd"]:
-        seen_names, names_list = get_names_list(args["input_files"])
-        process_file_list(args, args["input_files"], seen_names, names_list, log_messages=True)
     else:
-        shard_data(args, args["input_files"], Global=Global, show_progress=args['progress'])
-        if args['clean']:
-            os.rmdir(args['wd'])
+        args["metrics"] = False  # only option supported so far
+        args["contigs"] = False
+
+        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
+        manager = multiprocessing.Manager()
+        Global = manager.Namespace()
+        Global.open_files = soft  # we dont know how many file descriptors are already open by the user, assume all
+        Global.soft = soft
+
+        if not args["wd"]:
+            seen_names, names_list = get_names_list(args["input_files"])
+            process_file_list(args, args["input_files"], seen_names, names_list, log_messages=True)
+        else:
+            shard_data(args, args["input_files"], Global=Global, show_progress=args['progress'])
+            if args['clean']:
+                os.rmdir(args['wd'])
 
     logging.info("dysgu merge complete h:m:s, {}".format(str(datetime.timedelta(seconds=int(time.time() - t0))),
                                                     time.time() - t0))
diff --git a/setup.py b/setup.py
index 22940f5..484e2a2 100644
--- a/setup.py
+++ b/setup.py
@@ -144,7 +144,7 @@ def get_extra_args():
 
 # Dysgu modules
 for item in ["sv2bam", "io_funcs", "graph", "coverage", "assembler", "call_component",
-             "map_set_utils", "cluster", "sv_category", "extra_metrics"]:  # "post_call_metrics",
+             "map_set_utils", "cluster", "sv_category", "extra_metrics"]:
 
     ext_modules.append(Extension(f"dysgu.{item}",
                                  [f"dysgu/{item}.pyx"],
@@ -166,7 +166,7 @@ def get_extra_args():
     description="Structural variant calling",
     license="MIT",
     version='1.6.3',
-    python_requires='>=3.8',
+    python_requires='>=3.10',
     install_requires=[  # runtime requires
             'setuptools>=63.0',
             'cython',
@@ -179,7 +179,6 @@ def get_extra_args():
             'scikit-learn>=0.22',
             'sortedcontainers',
             'lightgbm',
-            #'edlib',
         ],
     setup_requires=[
             'setuptools>=63.0',
@@ -193,7 +192,6 @@ def get_extra_args():
             'scikit-learn>=0.22',
             'sortedcontainers',
             'lightgbm',
-            #'edlib'
         ],
     packages=["dysgu", "dysgu.tests", "dysgu.scikitbio", "dysgu.edlib"],
     ext_modules=cythonize(ext_modules),