diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a4621b2..6b23803 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -14,24 +14,23 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
-
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.16.0
+        uses: pypa/cibuildwheel@v2.16.5
         env:
-          CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8, <3.12"
-          CIBW_SKIP: "*-win32 *-manylinux_i686 pp* cp311-macosx* *musl* "
+          CIBW_PROJECT_REQUIRES_PYTHON: ">=3.10"  #  >=3.8 <3.12
+          CIBW_SKIP: "*-win32 *-manylinux_i686 pp* *musl* "  # cp311-macosx*
           CIBW_ARCHS_MACOS: "x86_64 arm64"
           CIBW_ARCHS_LINUX: "auto"
           CIBW_BEFORE_ALL_MACOS: bash ci/osx-deps
           CIBW_BEFORE_ALL_LINUX: bash ci/manylinux-deps
-          CIBW_BEFORE_BUILD: pip install -r requirements.txt
-          CIBW_TEST_COMMAND: dysgu test
-          
-        #    ...
-        # with:
-        #   package-dir: .
-        #   output-dir: wheelhouse
-        #   config-file: "{package}/pyproject.toml"
+          CIBW_BEFORE_BUILD_MACOS: |
+            ln -s /Library/Frameworks/Python.framework/Versions/3.11/include/python3.11/cpython/longintrepr.h /Library/Frameworks/Python.framework/Versions/3.11/include/python3.11
+            pip install -r requirements.txt
+          CIBW_BEFORE_BUILD_LINUX: pip install -r requirements.txt
+          CIBW_REPAIR_WHEEL_COMMAND_MACOS: delocate-wheel --require-archs x86_64 -w {dest_dir} -v {wheel}
+          CIBW_TEST_SKIP: "*-macosx_arm64"
+          CIBW_TEST_REQUIRES: cython click>=8.0 numpy scipy pandas pysam>=0.22.0 networkx>=2.4 scikit-learn>=0.22 sortedcontainers lightgbm
+          CIBW_TEST_COMMAND: dysgu test --verbose
 
       - uses: actions/upload-artifact@v3
         with:
diff --git a/.gitignore b/.gitignore
index 4046d24..0a8f8e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,4 @@ dysgu/.DS_Store
 docs/build/
 docs/source/_templates/
 dev
+/dysgu/edlib/edlib.cpp
diff --git a/README.rst b/README.rst
index a19c837..d992af3 100644
--- a/README.rst
+++ b/README.rst
@@ -42,15 +42,16 @@ for calling structural variants using paired-end or long read sequencing data. S
 ⚙️ Installation
 ---------------
 
-Dysgu requires Python >=3.8 - 3.10 plus htslib and has been tested on linux and MacOS.
+Dysgu can be installed via pip using Python 3.10 - 3.12 and has been tested on linux and MacOS.
 The list of python packages needed can be found in requirements.txt.
 To install::
 
-    conda install -c bioconda -c conda-forge dysgu
+    pip install dysgu
 
-Or, fetch from PyPI::
+Or, from conda::
+
+    conda install -c bioconda -c conda-forge dysgu
 
-    pip install dysgu
 
 To build from source, run the install script: ``bash INSTALL.sh``.
 
diff --git a/dysgu/call_component.pyx b/dysgu/call_component.pyx
index e466ed2..ec9b4e0 100644
--- a/dysgu/call_component.pyx
+++ b/dysgu/call_component.pyx
@@ -1,6 +1,6 @@
 #cython: language_level=3, boundscheck=True, c_string_type=unicode, c_string_encoding=utf8, infer_types=True
 from __future__ import absolute_import
-from collections import Counter, defaultdict
+from collections import Counter, defaultdict, namedtuple
 import logging
 import numpy as np
 cimport numpy as np
@@ -36,7 +36,7 @@ ctypedef enum ReadEnum_t:
     BREAKEND = 4
 
 
-cdef n_aligned_bases(AlignedSegment aln):
+cpdef n_aligned_bases(AlignedSegment aln):
     cdef int opp, l, aligned, large_gaps, n_small_gaps, i
     cdef uint32_t cigar_value
     cdef uint32_t cigar_l = aln._delegate.core.n_cigar
@@ -123,7 +123,9 @@ cdef count_attributes2(reads1, reads2, spanning, float insert_ppf, generic_ins,
             MAPQsupp += a.mapq
             if a.has_tag("NM"):
                 if a_bases:
-                    NMsupp += float(a.get_tag("NM")) / a_bases
+                    nm = float(a.get_tag("NM"))
+                    NMsupp += nm / a_bases
+                    NMbase += (nm - large_gaps) / a_bases
             if a.has_tag("AS"):
                 this_as = a.get_tag("AS")
                 if this_as > maxASsupp:
@@ -173,7 +175,9 @@ cdef count_attributes2(reads1, reads2, spanning, float insert_ppf, generic_ins,
             MAPQsupp += a.mapq
             if a.has_tag("NM"):
                 if a_bases:
-                    NMsupp += float(a.get_tag("NM")) / a_bases
+                    nm = float(a.get_tag("NM"))
+                    NMsupp += nm / a_bases
+                    NMbase += (nm - large_gaps) / a_bases
             if a.has_tag("AS"):
                 this_as = a.get_tag("AS")
                 if this_as > maxASsupp:
@@ -202,7 +206,9 @@ cdef count_attributes2(reads1, reads2, spanning, float insert_ppf, generic_ins,
             clipped_base_quals += cbq
             clipped_bases += cb
 
-    cdef int tot = er.supp + total_pri
+    cdef float tot = er.plus + er.minus  # er.supp + total_pri
+    if tot == 0:
+        return
     er.NMpri = (NMpri / total_pri) * 100 if total_pri > 0 else 0
     er.NMsupp = (NMsupp / er.supp) * 100 if er.supp > 0 else 0
     er.NMbase = (NMbase / tot) * 100 if tot > 0 else 0
@@ -222,7 +228,7 @@ cdef count_attributes2(reads1, reads2, spanning, float insert_ppf, generic_ins,
 
 
 cdef int within_read_end_position(event_pos, svtype, cigartuples, cigar_index):
-    cdef int i, end, idx, opp, length, cigar_skip, n_aligned_bases, target_bases
+    cdef int i, end
     if svtype == 1:  # insertion (or other e.g. duplication/inversion within read)
         return event_pos + 1
     else:  # deletion type
@@ -684,6 +690,9 @@ cdef single(rds, int insert_size, int insert_stdev, float insert_ppf, int clip_l
     tmp = defaultdict(list)  # group by template name
     small_tlen_outliers = 0  # for paired reads note any smaller than expected TLENs
     for cigar_info, align in rds:
+        # echo(cigar_info, cigar_info.read_enum)
+        # if not paired_end and cigar_info.read_enum < 2:  # split read
+        #     continue
         tmp[align.qname].append((cigar_info, align))
         if align.flag & 1 and abs(align.tlen) < insert_ppf:
             small_tlen_outliers += 1
@@ -694,6 +703,7 @@ cdef single(rds, int insert_size, int insert_stdev, float insert_ppf, int clip_l
         l_align = list(alignments)
         if len(l_align) > 1:
             pair = guess_informative_pair(l_align)
+            # pair = informative_pair(alignments, alignments, paired_end=paired_end)
             if pair is not None:
                 if len(pair) == 2:
                     a, b = pair
@@ -762,19 +772,19 @@ cdef single(rds, int insert_size, int insert_stdev, float insert_ppf, int clip_l
             svlen_adjusted = int(np.median([b[0] for b in size_pos_bounded]))
             posA_adjusted = int(np.median([b[1] for b in size_pos_bounded]))
             posB_adjusted = int(np.median([b[2] for b in size_pos_bounded]))
-            if svtype_m == 2:  # del
-                posA = posA_adjusted
-                posB = posB_adjusted
-                svlen = svlen_adjusted
-                er.preciseA = True
-                er.preciseB = True
-            else:
+            # if svtype_m == 2:  # del
+            #     posA = posA_adjusted
+            #     posB = posB_adjusted
+            #     svlen = svlen_adjusted
+            #     er.preciseA = True
+            #     er.preciseB = True
+            # else:
                 # to_assemble = False
-                er.preciseA = True
-                er.preciseB = True
-                posA = posA_adjusted
-                posB = posB_adjusted
-                svlen = svlen_adjusted
+            er.preciseA = True
+            er.preciseB = True
+            posA = posA_adjusted
+            posB = posB_adjusted
+            svlen = svlen_adjusted
 
         else:
             svlen = int(np.median([sp[4] for sp in spanning_alignments]))
@@ -872,18 +882,35 @@ cdef single(rds, int insert_size, int insert_stdev, float insert_ppf, int clip_l
 cdef tuple informative_pair(u, v):
     pri_u = None
     pri_v = None
-    for i in u:
+    for i_info, i in u:
         ri_flag = i.flag & 64
         if not i.flag & 2304:  # Not pri, supplementary --> is primary
-            pri_u = i
-        for j in v:
+            pri_u = i_info, i
+        for j_info, j in v:
             if j.flag & 64 == ri_flag:  # Same read
                 # Same read, primary + supp, or supp + supp
-                return i, j
+                return (i_info, i), (j_info, j)
             if not j.flag & 2304:  # Is primary
-                pri_v = j
+                pri_v = j_info, j
     if pri_u is not None and pri_v is not None:
         return pri_u, pri_v
+    return None
+
+# cdef tuple informative_pair(u, v, bint paired_end):
+#     # fetch either a split read or pair1 and pair2
+#     for i in u:
+#         i_info = i[0]
+#         for j in v:
+#             j_info = j[0]
+#             if j_info.hash_name == i_info.hash_name:
+#                 continue
+#             if not paired_end:
+#                 if i_info.read_enum == 1 and j_info.read_enum == 1:
+#                     return i, j
+#             elif i_info.read_enum == j_info.read_enum:
+#                 return i, j
+#     return None
+
 
 
 cdef tuple break_ops(positions, precise, int limit, float median_pos):
@@ -903,7 +930,7 @@ cdef tuple break_ops(positions, precise, int limit, float median_pos):
             ops.append((i + v + 1, -1, 1))
         else:
             ops.append((i + v, -1, 1))
-    ops = sorted(ops, reverse=limit < 0)
+    ops.sort(reverse=limit < 0)
     cdef int cum_sum = 0
     cdef int max_sum = 0
     cdef int max_sum_i = 0
@@ -1286,63 +1313,66 @@ cdef void assemble_partitioned_reads(EventResult_t er, u_reads, v_reads, int blo
     er.ref_bases = rbases
 
 
-cdef call_from_reads(u_reads, v_reads, int insert_size, int insert_stdev, float insert_ppf, int min_support,
+cdef call_from_reads(u_reads_info, v_reads_info, int insert_size, int insert_stdev, float insert_ppf, int min_support,
                      int block_edge, int assemble, info, bint paired_end):
     grp_u = defaultdict(list)
     grp_v = defaultdict(list)
-    for r in u_reads:
-        grp_u[r.qname].append(r)
-    for r in v_reads:
-        grp_v[r.qname].append(r)
+    for uinfo, r in u_reads_info:
+        grp_u[r.qname].append((uinfo, r))
+    for vinfo, r in v_reads_info:
+        grp_v[r.qname].append((vinfo, r))
     informative = []
     cdef AlignmentItem v_item, i
     cdef int left_clip_a, right_clip_a, left_clip_b, right_clip_b
     for qname in [k for k in grp_u if k in grp_v]:  # Qname found on both sides
         u = grp_u[qname]
         v = grp_v[qname]
-        if len(u) == 1 and len(v) == 1:
-            pair = (u[0], v[0])
-        else:
-            pair = informative_pair(u, v)
-        if pair:
-            a, b = pair
-            a_ct = a.cigartuples
-            b_ct = b.cigartuples
-            a_qstart, a_qend, b_qstart, b_qend, a_len, b_len = start_end_query_pair(a, b)
-            # Soft-clips for the chosen pair, plus template start of alignment
-            left_clip_a, right_clip_a, left_clip_b, right_clip_b = mask_soft_clips(a.flag, b.flag, a_ct, b_ct)
-            a_chrom, b_chrom = a.rname, b.rname
-            a_start, a_end = a.pos, a.reference_end
-            b_start, b_end = b.pos, b.reference_end
-            read_overlaps_mate = same_read_overlaps_mate(a_chrom, b_chrom, a_start, a_end, b_start, b_end, a, b)
-            v_item = AlignmentItem(a.rname,
-                                   b.rname,
-                                   int(not a.flag & 2304),  # Is primary
-                                   int(not b.flag & 2304),
-                                   1 if a.flag & 64 else 2,
-                                   1 if b.flag & 64 else 2,
-                                   a.pos, a.reference_end,
-                                   b.pos, b.reference_end,
-                                   3 if a.flag & 16 == 0 else 5,
-                                   3 if b.flag & 16 == 0 else 5,
-                                   left_clip_a, right_clip_a,
-                                   left_clip_b, right_clip_b,
-                                   a_qstart, a_qend, b_qstart, b_qend, a_len, b_len,
-                                   read_overlaps_mate,
-                                   a, b
-                                   )
-            if v_item.left_clipA and v_item.right_clipA:
-                if a_ct[0][1] >= a_ct[-1][1]:
-                    v_item.right_clipA = 0
-                else:
-                    v_item.left_clipA = 0
-            if v_item.left_clipB and v_item.right_clipB:
-                if b_ct[0][1] >= b_ct[-1][1]:
-                    v_item.right_clipB = 0
-                else:
-                    v_item.left_clipB = 0
-            classify_d(v_item)
-            informative.append(v_item)
+        pair = informative_pair(u, v) #, paired_end)
+        if not pair:
+            continue
+        # if len(u) == 1 and len(v) == 1:
+        #     pair = (u[0], v[0])
+        # else:
+        #     pair = informative_pair(u, v, paired_end)
+        a_node_info, a, b_node_info, b, = pair[0][0], pair[0][1], pair[1][0], pair[1][1]
+        a_ct = a.cigartuples
+        b_ct = b.cigartuples
+        a_qstart, a_qend, b_qstart, b_qend, a_len, b_len = start_end_query_pair(a, b)
+        # Soft-clips for the chosen pair, plus template start of alignment
+        left_clip_a, right_clip_a, left_clip_b, right_clip_b = mask_soft_clips(a.flag, b.flag, a_ct, b_ct)
+        a_chrom, b_chrom = a.rname, b.rname
+        a_start, a_end = a.pos, a.reference_end
+        b_start, b_end = b.pos, b.reference_end
+        read_overlaps_mate = same_read_overlaps_mate(a_chrom, b_chrom, a_start, a_end, b_start, b_end, a, b)
+        v_item = AlignmentItem(a.rname,
+                               b.rname,
+                               int(not a.flag & 2304),  # Is primary
+                               int(not b.flag & 2304),
+                               1 if a.flag & 64 else 2,
+                               1 if b.flag & 64 else 2,
+                               a.pos, a.reference_end,
+                               b.pos, b.reference_end,
+                               3 if a.flag & 16 == 0 else 5,
+                               3 if b.flag & 16 == 0 else 5,
+                               left_clip_a, right_clip_a,
+                               left_clip_b, right_clip_b,
+                               a_qstart, a_qend, b_qstart, b_qend, a_len, b_len,
+                               read_overlaps_mate,
+                               a, b,
+                               a_node_info, b_node_info
+                               )
+        if v_item.left_clipA and v_item.right_clipA:
+            if a_ct[0][1] >= a_ct[-1][1]:
+                v_item.right_clipA = 0
+            else:
+                v_item.left_clipA = 0
+        if v_item.left_clipB and v_item.right_clipB:
+            if b_ct[0][1] >= b_ct[-1][1]:
+                v_item.right_clipB = 0
+            else:
+                v_item.left_clipB = 0
+        classify_d(v_item)
+        informative.append(v_item)
     if not informative:
         return []
 
@@ -1356,10 +1386,10 @@ cdef call_from_reads(u_reads, v_reads, int insert_size, int insert_stdev, float
             svtypes_counts[2].append(i)
         elif i.svtype == "TRA":
             svtypes_counts[3].append(i)
-    svtypes_res = sorted(svtypes_counts, key=sort_by_length, reverse=True)
+    svtypes_counts.sort(key=sort_by_length, reverse=True)
     results = []
     cdef EventResult_t er
-    for sub_informative in svtypes_res:
+    for sub_informative in svtypes_counts:
         if len(sub_informative) >= min_support:
             svtype = sub_informative[0].svtype
             if svtype == "INV" or svtype == "TRA":
@@ -1436,7 +1466,7 @@ cdef filter_single_partitions(u_reads, v_reads):
 
 
 cdef one_edge(u_reads_info, v_reads_info, int clip_length, int insert_size, int insert_stdev, float insert_ppf,
-                   int min_support, int block_edge, int assemble, info, bint paired_end):
+            int min_support, int block_edge, int assemble, info, bint paired_end):
     spanning_alignments = []
     u_reads = []
     v_reads = []
@@ -1555,7 +1585,7 @@ cdef one_edge(u_reads_info, v_reads_info, int clip_length, int insert_size, int
         return [er]
 
     else:
-        results = call_from_reads(u_reads, v_reads, insert_size, insert_stdev, insert_ppf, min_support, block_edge, assemble, info, paired_end)
+        results = call_from_reads(u_reads_info, v_reads_info, insert_size, insert_stdev, insert_ppf, min_support, block_edge, assemble, info, paired_end)
         return results
 
 
@@ -1571,15 +1601,24 @@ cdef get_reads(infile, nodes_info, buffered_reads, n2n, bint add_to_buffer, site
     aligns = []
     fpos = []
     site_info = []
+
+    # Sometimes seeking into the bam file doesnt find the read. This can be addressed by instead
+    # using the index and looping through the file, but this adds significant overhead
+    # todo see if missing reads are an issue for long reads
+    # regions = []
+    # hash_names = {}
+    # has_index = infile.has_index()
+
     for int_node in nodes_info:
         if sites_index and int_node in sites_index:
             continue
         n = n2n[int_node]
-        if int_node in buffered_reads:
+        if buffered_reads and int_node in buffered_reads:
             aligns.append((n, buffered_reads[int_node]))
             continue
         fpos.append((n, int_node))
-    for node, int_node in sorted(fpos, key=fpos_srt):
+    fpos.sort(key=fpos_srt)
+    for node, int_node in fpos:
         infile.seek(node.tell)
         try:
             a = next(infile)
@@ -1605,6 +1644,23 @@ cdef get_reads(infile, nodes_info, buffered_reads, n2n, bint add_to_buffer, site
                     if add_to_buffer:
                         buffered_reads[int_node] = a
                     break
+            # else:
+    #             if has_index:
+    #                 nn = n2n[int_node]
+    #                 hash_names[(nn.hash_name, nn.flag, nn.pos, nn.chrom)] = nn
+    #                 regions.append((infile.get_reference_name(nn.chrom), nn.pos, nn.pos + 200))
+    #
+    # if regions:
+    #     regions = merge_intervals(regions)
+    #     for rgn in regions:
+    #         for a in infile.fetch(*rgn):
+    #             v = xxhasher(bam_get_qname(a._delegate), len(a.qname), 42)
+    #             nm = (v, a.flag, a.pos, a.rname)
+    #             if nm in hash_names:
+    #                 aligns.append((hash_names[nm], a))
+    #
+    # if len(aligns) < len(nodes_info):
+    #     echo(len(aligns), len(nodes_info))
 
     return aligns
 
@@ -1614,65 +1670,68 @@ cdef list multi(data, bam, int insert_size, int insert_stdev, float insert_ppf,
 
     # Sometimes partitions are not linked, happens when there is not much support between partitions
     # Then need to decide whether to call from a single partition
-    n2n = data["n2n"]
-    seen = set(range(len(data['parts'])))
+    n2n = data.n2n
+    seen = set(range(len(data.parts))) if data.parts else {}
     out_counts = defaultdict(int)  # The number of 'outward' links to other clusters
     cdef int buffered_reads = 0
     cdef bint add_to_buffer = 1
     cdef int int_node
     cdef unsigned long[:] partition
     events = []
-    if info:
-        sites_info = list(info.values())
-    else:
-        sites_info = []
-    # u and v are the part ids, d[0] and d[1] are the lists of nodes for those parts
-    for (u, v), d in data["s_between"].items():
-        rd_u = get_reads(bam, d[0], data["reads"], data["n2n"], add_to_buffer, info)   # [(Nodeinfo, alignment)..]
-        rd_v = get_reads(bam, d[1], data["reads"], data["n2n"], add_to_buffer, info)
-        total_reads = len(rd_u) + len(rd_v)
-        buffered_reads += total_reads
-        if add_to_buffer and buffered_reads > 50000:
-            add_to_buffer = 0
-        out_counts[u] += len(rd_u)
-        out_counts[v] += len(rd_v)
-        if len(rd_u) == 0 or len(rd_v) == 0:
-            continue
-        if u in seen:
-            seen.remove(u)
-        if v in seen:
-            seen.remove(v)
-
-        # finds reads that should be a single partition
-        u_reads, v_reads, u_single, v_single = filter_single_partitions(rd_u, rd_v)
-        if len(u_reads) > 0 and len(v_reads) > 0:
-            events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1, assemble_contigs,
+    sites_info = list(info.values()) if info else []
+
+    if data.s_between:
+        # u and v are the part ids, d[0] and d[1] are the lists of nodes for those parts
+        for (u, v), d in data.s_between: #.items():
+            rd_u = get_reads(bam, d[0], data.reads, n2n, add_to_buffer, info)   # [(Nodeinfo, alignment)..]
+            rd_v = get_reads(bam, d[1], data.reads, n2n, add_to_buffer, info)
+            total_reads = len(rd_u) + len(rd_v)
+            buffered_reads += total_reads
+            if add_to_buffer and buffered_reads > 50000:
+                add_to_buffer = 0
+            out_counts[u] += len(rd_u)
+            out_counts[v] += len(rd_v)
+            if len(rd_u) == 0 or len(rd_v) == 0:
+                continue
+            if u in seen:
+                seen.remove(u)
+            if v in seen:
+                seen.remove(v)
+
+            events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1,
+                               assemble_contigs,
                                sites_info, paired_end)
-        if u_single:
-            res = single(u_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
-                         sites_info, paired_end, length_extend, divergence)
-            if res:
-                if isinstance(res, EventResult):
-                    events.append(res)
-                else:
-                    events += res
-        if v_single:
-            res = single(v_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
-                         sites_info, paired_end, length_extend, divergence)
-            if res:
-                if isinstance(res, EventResult):
-                    events.append(res)
-                else:
-                    events += res
+
+            # finds reads that should be a single partition
+            # u_reads, v_reads, u_single, v_single = filter_single_partitions(rd_u, rd_v)
+            # if len(u_reads) > 0 and len(v_reads) > 0:
+            #     events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1, assemble_contigs,
+            #                        sites_info, paired_end)
+            # if u_single:
+            #     res = single(u_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
+            #                  sites_info, paired_end, length_extend, divergence)
+            #     if res:
+            #         if isinstance(res, EventResult):
+            #             events.append(res)
+            #         else:
+            #             events += res
+            # if v_single:
+            #     res = single(v_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
+            #                  sites_info, paired_end, length_extend, divergence)
+            #     if res:
+            #         if isinstance(res, EventResult):
+            #             events.append(res)
+            #         else:
+            #             events += res
 
     # Process any singles / unconnected blocks
     if seen:
         for part_key in seen:
-            d = data["parts"][part_key]
+            d = data.parts[part_key]
             lb = lower_bound_support if len(sites_info) == 0 else 1
             if max_single_size > len(d) >= lb:
                 # Call single block, only collect local reads to the block
-                rds = get_reads(bam, d, data["reads"], data["n2n"], 0, info)
+                rds = get_reads(bam, d, data.reads, data.n2n, 0, info)
                 if len(rds) < lower_bound_support or (len(sites_info) != 0 and len(rds) == 0):
                     continue
                 res = single(rds, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
@@ -1684,46 +1743,43 @@ cdef list multi(data, bam, int insert_size, int insert_stdev, float insert_ppf,
                         events += res
 
     # Check for events within clustered nodes
-    for k, d in data["s_within"].items():
-        o_count = out_counts[k]
-        i_counts = len(d)
-        if i_counts > max_single_size:
-            continue
-        if o_count > 0 and i_counts > (2*min_support) and i_counts > o_count:
-            rds = get_reads(bam, d, data["reads"], data["n2n"], 0, info)
-            if len(rds) < lower_bound_support or (len(sites_info) != 0 and len(rds) == 0):
-                    continue
-            res = single(rds, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
-                         sites_info, paired_end, length_extend, divergence)
-            if res:
-                if isinstance(res, EventResult):
-                    events.append(res)
-                else:
-                    events += res
+    if data.s_within:
+        for k, d in data.s_within:  #.items():
+            o_count = out_counts[k]
+            i_counts = len(d)
+            if i_counts > max_single_size:
+                continue
+            if o_count > 0 and i_counts > (2*min_support) and i_counts > o_count:
+                rds = get_reads(bam, d, data.reads, data.n2n, 0, info)
+                if len(rds) < lower_bound_support or (len(sites_info) != 0 and len(rds) == 0):
+                        continue
+                res = single(rds, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs,
+                             sites_info, paired_end, length_extend, divergence)
+                if res:
+                    if isinstance(res, EventResult):
+                        events.append(res)
+                    else:
+                        events += res
     return events
 
 
 cpdef list call_from_block_model(bam, data, clip_length, insert_size, insert_stdev, insert_ppf, min_support, lower_bound_support,
                                  assemble_contigs, max_single_size, sites_index, bint paired_end, int length_extend, float divergence):
-    n_parts = len(data["parts"])
+    n_parts = len(data.parts) if data.parts else 0
     events = []
-    if "info" in data:
-        info = data["info"]
-    else:
-        info = None
+    info = data.info
+    if data.reads is None:
+        data.reads = {}
     # next deal with info - need to filter these into the partitions, then deal with them in single / one_edge
     cdef EventResult_t e
     if n_parts >= 1:
         events += multi(data, bam, insert_size, insert_stdev, insert_ppf, clip_length, min_support, lower_bound_support,
                         assemble_contigs, max_single_size, info, paired_end, length_extend, divergence)
     elif n_parts == 0:
-        if len(data["n2n"]) > max_single_size:
+        if len(data.n2n) > max_single_size:
             return []
-        rds = get_reads(bam, data["n2n"].keys(), data["reads"], data["n2n"], 0, info)
-        if info:
-            sites_info = list(info.values())
-        else:
-            sites_info = []
+        rds = get_reads(bam, data.n2n.keys(), data.reads, data.n2n, 0, info)
+        sites_info = list(info.values()) if info else []
         if len(rds) < lower_bound_support or (len(sites_info) != 0 and len(rds) == 0):
             return []
         ev = single(rds, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs, sites_info, paired_end, length_extend, divergence)
diff --git a/dysgu/cluster.pyx b/dysgu/cluster.pyx
index 33bc24a..55e8a80 100644
--- a/dysgu/cluster.pyx
+++ b/dysgu/cluster.pyx
@@ -52,24 +52,27 @@ def filter_potential(input_events, tree, regions_only):
     return potential
 
 
-def compare_subset(potential, int max_dist):
+def get_chrom_key(ei):
+    if ei.chrA != ei.chrB:
+        return min(ei.chrA, ei.chrB), max(ei.chrA, ei.chrB)
+    return ei.chrA
+
+
+def compare_subset(potential, int max_dist, int max_comparisons):
     tmp_list = defaultdict(list)
     cdef int idx, jdx, dist1, ci_a
     for idx in range(len(potential)):
         ei = potential[idx]
-        tmp_list[ei.chrA].append((ei.posA - ei.cipos95A - max_dist, ei.posA + ei.cipos95A + max_dist, idx))
-        if ei.chrA != ei.chrB or abs(ei.posB - ei.posA) > 5:
-            if ei.chrA != ei.chrB:
-                tmp_list[ei.chrB].append((ei.posB - ei.cipos95B - max_dist, ei.posB + ei.cipos95B + max_dist, idx))
-            else:
-                dist1 = abs(ei.posB - ei.posA)
-                ci_a = max(ei.cipos95A, ei.cipos95A)
-                if dist1 + ci_a > max_dist:
-                    tmp_list[ei.chrB].append((ei.posB - ei.cipos95B - max_dist, ei.posB + ei.cipos95B + max_dist, idx))
+        tmp_list[get_chrom_key(ei)].append((ei.posA - ei.cipos95A - max_dist, ei.posA + ei.cipos95A + max_dist, idx))
+
     nc2 = {k: io_funcs.iitree(v, add_value=True) for k, v in tmp_list.items()}
     for idx in range(len(potential)):
         ei = potential[idx]
-        ols = nc2[ei.chrB].allOverlappingIntervals(ei.posB, ei.posB + 1)
+        ols = nc2[get_chrom_key(ei)].allOverlappingIntervals(ei.posA, ei.posA + 1)
+        ols.remove(idx)
+        if len(ols) > max_comparisons:
+            random.shuffle(ols)
+            ols = ols[:max_comparisons]
         for jdx in ols:
             ej = potential[jdx]
             yield ei, ej, idx, jdx
@@ -143,14 +146,16 @@ cdef break_distances(int i_a, int i_b, int j_a, j_b, bint i_a_precise, bint i_b_
 
 
 def enumerate_events(G, potential, max_dist, try_rev, tree, paired_end=False, rel_diffs=False, diffs=15,
-                     same_sample=True, aggressive_ins_merge=False, debug=False):
+                     same_sample=True, aggressive_ins_merge=False, debug=False, max_comparisons=20):
     if len(potential) < 50:
         event_iter = compare_all(potential)  # N^2 compare all events to all others
     else:
-        event_iter = compare_subset(potential, max_dist)  # Use iitree, generate overlap tree and perform intersections
+        event_iter = compare_subset(potential, max_dist, max_comparisons)  # Use iitree, generate overlap tree and perform intersections
+
     seen = set([])
     pad = 100
     disjoint_nodes = set([])  # if a component has more than one disjoint nodes it needs to be broken apart
+    node_counts = defaultdict(int)
     for ei, ej, idx, jdx in event_iter:
         i_id = ei.event_id
         j_id = ej.event_id
@@ -279,7 +284,6 @@ def enumerate_events(G, potential, max_dist, try_rev, tree, paired_end=False, re
                 v = (ci2, cj)
             else:
                 continue
-            # echo(ei.remap_score == 0 or ej.remap_score == 0, ei.svlen, ej.svlen, v, assembler.check_contig_match(v[0], v[1], return_int=True))
             # if not remapped and nearby insertions with opposing soft-clips --> merge
             # also long-reads will normally have remap_score == 0
             if ei.remap_score == 0 or ej.remap_score == 0:
@@ -290,6 +294,10 @@ def enumerate_events(G, potential, max_dist, try_rev, tree, paired_end=False, re
                 G.add_edge(i_id, j_id, loci_same=True)
             # see if alt sequence can be found in other contig
             else:
+                # if ci_alt and cj_alt:
+                    # if assembler.check_contig_match(ci_alt, cj_alt, return_int=True):
+                    #     G.add_edge(i_id, j_id, loci_same=True)
+                        # continue
                 if ci_alt and cj:
                     if assembler.check_contig_match(ci_alt, cj, return_int=True):
                         G.add_edge(i_id, j_id, loci_same=True)
@@ -350,7 +358,7 @@ cpdef srt_func(c):
 
 def merge_events(potential, max_dist, tree, paired_end=False, try_rev=False, pick_best=False, add_partners=False,
                  rel_diffs=False, diffs=15, same_sample=True, debug=False, min_size=0, aggressive_ins_merge=False,
-                 skip_imprecise=False):
+                 skip_imprecise=False, max_comparisons=100):
     """Try and merge similar events, use overlap of both breaks points
     """
     max_dist = max_dist / 2
@@ -360,7 +368,7 @@ def merge_events(potential, max_dist, tree, paired_end=False, try_rev=False, pic
     G = nx.Graph()
     G, disjoint_nodes = enumerate_events(G, potential, max_dist, try_rev, tree, paired_end, rel_diffs, diffs, same_sample,
                         aggressive_ins_merge=aggressive_ins_merge,
-                        debug=debug)
+                        debug=debug, max_comparisons=max_comparisons)
     found = []
     for item in potential:  # Add singletons, non-merged
         if not G.has_node(item.event_id):
@@ -371,8 +379,8 @@ def merge_events(potential, max_dist, tree, paired_end=False, try_rev=False, pic
     node_to_event = {i.event_id: i for i in potential}
     cdef int k
     for grp in components:
-        c = [node_to_event[n] for n in grp]
-        best = sorted(c, key=srt_func, reverse=True)
+        best = [node_to_event[n] for n in grp]
+        best.sort(key=srt_func, reverse=True)
         w0 = best[0]
         if not pick_best:
             weight = w0.pe + w0.supp + w0.spanning
@@ -811,7 +819,10 @@ def pipe1(args, infile, kind, regions, ibam, ref_genome, sample_name, bam_iter=N
                                             trust_ins_len=args["trust_ins_len"] == "True",
                                             low_mem=low_mem,
                                             temp_dir=tdir,
-                                            find_n_aligned_bases=find_n_aligned_bases)
+                                            find_n_aligned_bases=find_n_aligned_bases,
+                                            position_distance_thresh=args['sd'],
+                                            max_search_depth=args['search_depth']
+    )
     sites_index = None
     if sites_adder:
         sites_index = sites_adder.sites_index
@@ -937,7 +948,7 @@ def pipe1(args, infile, kind, regions, ibam, ref_genome, sample_name, bam_iter=N
                                     pickle.dump(res, completed_file)
                     else:
                         j_submitted, w_idx = heapq.heappop(minhq)
-                        heapq.heappush(minhq, (j_submitted + len(res["n2n"]), w_idx))
+                        heapq.heappush(minhq, (j_submitted + len(res.n2n), w_idx))
                         msg_queues[w_idx][1].send(res)
             else:
                 # most partitions processed here, dict returned, or None
@@ -969,7 +980,7 @@ def pipe1(args, infile, kind, regions, ibam, ref_genome, sample_name, bam_iter=N
                                     pickle.dump(res, completed_file)
                     else:
                         j_submitted, w_idx = heapq.heappop(minhq)
-                        heapq.heappush(minhq, (j_submitted + len(res["n2n"]), w_idx))
+                        heapq.heappush(minhq, (j_submitted + len(res.n2n), w_idx))
                         msg_queues[w_idx][1].send(res)
 
     if completed_file is not None:
@@ -1022,7 +1033,8 @@ def pipe1(args, infile, kind, regions, ibam, ref_genome, sample_name, bam_iter=N
     # Merge across calls
     if args["merge_within"] == "True":
         merged = merge_events(block_edge_events, args["merge_dist"], regions, bool(paired_end), try_rev=False, pick_best=False,
-                                         debug=True, min_size=args["min_size"])
+                                         debug=True, min_size=args["min_size"],
+                              max_comparisons=args["max_comparisons"] if "max_comparisons" in args else 100)
     else:
         merged = block_edge_events
     logging.info("Number of candidate SVs merged: {}".format(len(block_edge_events) - len(merged)))
diff --git a/dysgu/coverage.pyx b/dysgu/coverage.pyx
index 6da4246..8e526fd 100644
--- a/dysgu/coverage.pyx
+++ b/dysgu/coverage.pyx
@@ -161,10 +161,10 @@ cdef class GenomeScanner:
                 else:
                     f_iter = self.input_bam.fetch(until_eof=True)
                 for aln in f_iter:
-                    # if aln.flag & 1284 or aln.mapq < mq_thresh or aln.cigartuples is None:
-                    #     continue
+
                     cigar_l = aln._delegate.core.n_cigar
                     if cigar_l == 0 or aln.flag & 1284:
+                        tell = 0 if self.no_tell else self.input_bam.tell()
                         continue
 
                     if aln.rname != self.current_tid:
@@ -182,7 +182,6 @@ cdef class GenomeScanner:
                         good_read = True
                     elif not aln.flag & 2:
                         good_read = True
-                    # cigar_l = aln._delegate.core.n_cigar
                     cigar_p = bam_get_cigar(aln._delegate)
                     for i in range(cigar_l):
                         cigar_value = cigar_p[i]
@@ -199,12 +198,11 @@ cdef class GenomeScanner:
                         elif opp == 0 or opp == 7 or opp == 8:
                             self.cpp_cov_track.add(pos + index_start, pos + index_start + length)
                             index_start += length
-                    if aln.mapq < mq_thresh:
-                        continue
-                    if not good_read:
-                        continue
-                    if not self.cpp_cov_track.cov_val_good(self.current_tid, aln.rname, pos):
+
+                    if aln.mapq < mq_thresh or not good_read or not self.cpp_cov_track.cov_val_good(self.current_tid, aln.rname, pos):
+                        tell = 0 if self.no_tell else self.input_bam.tell()
                         continue
+
                     self._add_to_bin_buffer(aln, tell)
                     tell = 0 if self.no_tell else self.input_bam.tell()
                     while len(self.staged_reads) > 0:
@@ -460,9 +458,9 @@ cdef class GenomeScanner:
         cdef float current_coverage
         if self.current_chrom != rname:
             self.current_chrom = rname
-        in_roi = False
-        if self.overlap_regions:
-            in_roi = intersecter(self.overlap_regions, a.rname, apos, apos+1)
+        # in_roi = False
+        # if self.overlap_regions:
+        #     in_roi = intersecter(self.overlap_regions, a.rname, apos, apos+1)
         if rname == self.current_chrom and bin_pos == self.current_pos:
             self.current_bin.append((a, tell))
         else:
diff --git a/dysgu/edlib/__init__.py b/dysgu/edlib/__init__.py
new file mode 100644
index 0000000..5dd9e64
--- /dev/null
+++ b/dysgu/edlib/__init__.py
@@ -0,0 +1,3 @@
+
+from .edlib import (align)
+__all__ = ['align']
\ No newline at end of file
diff --git a/dysgu/edlib/edlib.pxd b/dysgu/edlib/edlib.pxd
new file mode 100644
index 0000000..4eddbb8
--- /dev/null
+++ b/dysgu/edlib/edlib.pxd
@@ -0,0 +1,39 @@
+cdef extern from "src/edlib.h" nogil:
+
+     ctypedef enum EdlibAlignMode: EDLIB_MODE_NW, EDLIB_MODE_SHW, EDLIB_MODE_HW
+     ctypedef enum EdlibAlignTask: EDLIB_TASK_DISTANCE, EDLIB_TASK_LOC, EDLIB_TASK_PATH
+     ctypedef enum EdlibCigarFormat: EDLIB_CIGAR_STANDARD, EDLIB_CIGAR_EXTENDED
+
+     ctypedef struct EdlibEqualityPair:
+         char first
+         char second
+
+     ctypedef struct EdlibAlignConfig:
+         int k
+         EdlibAlignMode mode
+         EdlibAlignTask task
+         const EdlibEqualityPair* additionalEqualities
+         int additionalEqualitiesLength
+
+     EdlibAlignConfig edlibNewAlignConfig(int k, EdlibAlignMode mode, EdlibAlignTask task,
+                                          const EdlibEqualityPair* additionalEqualities,
+                                          int additionalEqualitiesLength)
+     EdlibAlignConfig edlibDefaultAlignConfig()
+
+     ctypedef struct EdlibAlignResult:
+         int status
+         int editDistance
+         int* endLocations
+         int* startLocations
+         int numLocations
+         unsigned char* alignment
+         int alignmentLength
+         int alphabetLength
+
+     void edlibFreeAlignResult(EdlibAlignResult result)
+
+     EdlibAlignResult edlibAlign(const char* query, int queryLength,
+                                 const char* target, int targetLength,
+                                 const EdlibAlignConfig config)
+
+     char* edlibAlignmentToCigar(const unsigned char* alignment, int alignmentLength, EdlibCigarFormat cigarFormat)
\ No newline at end of file
diff --git a/dysgu/edlib/edlib.pyx b/dysgu/edlib/edlib.pyx
new file mode 100644
index 0000000..ea12c0a
--- /dev/null
+++ b/dysgu/edlib/edlib.pyx
@@ -0,0 +1,240 @@
+# cython: language_level=2
+cimport cython
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
+import re
+#
+# cimport dysgu.cedlib
+
+class NeedsAlphabetMapping(Exception):
+    pass
+
+
+def _map_ascii_string(s):
+    """ Helper func to handle the bytes mapping in the case of ASCII strings."""
+    if isinstance(s, bytes):
+        return s
+    elif isinstance(s, str):
+        s_bytes = s.encode('utf-8')
+        if len(s_bytes) == len(s):
+            return s_bytes
+    raise NeedsAlphabetMapping()
+
+
+def _map_to_bytes(query, target, additional_equalities):
+    """ Map hashable input values to single byte values.
+
+    Example:
+    In: query={12, 'ä'}, target='ööö', additional_equalities={'ä': 'ö'}
+    Out: b'\x00\x01', b'\x02\x02\x02', additional_equalities={b'\x01': b'\x02'}
+    """
+    cdef bytes query_bytes
+    cdef bytes target_bytes
+    try:
+        query_bytes = _map_ascii_string(query)
+        target_bytes = _map_ascii_string(target)
+    except NeedsAlphabetMapping:
+        # Map elements of alphabet to chars from 0 up to 255, so that Edlib can work with them,
+        # since C++ Edlib needs chars.
+        alphabet = set(query).union(set(target))
+        if len(alphabet) > 256:
+            raise ValueError(
+                "query and target combined have more than 256 unique values, "
+                "this is not supported.")
+        alphabet_to_byte_mapping = {
+            c: idx.to_bytes(1, byteorder='big') for idx, c in enumerate(alphabet)
+        }
+        map_seq = lambda seq: b''.join(alphabet_to_byte_mapping[c] for c in seq)
+        query_bytes = map_seq(query)
+        target_bytes = map_seq(target)
+        if additional_equalities is not None:
+            additional_equalities = [
+                (alphabet_to_byte_mapping[a].decode('utf-8'), alphabet_to_byte_mapping[b].decode('utf-8'))
+                for a, b in additional_equalities
+                if a in alphabet_to_byte_mapping and b in alphabet_to_byte_mapping]
+    return query_bytes, target_bytes, additional_equalities
+
+
+def align(query, target, mode="NW", task="distance", k=-1, additionalEqualities=None):
+    """ Align query with target using edit distance.
+    @param {str or bytes or iterable of hashable objects} query, combined with target must have no more
+           than 256 unique values
+    @param {str or bytes or iterable of hashable objects} target, combined with query must have no more
+           than 256 unique values
+    @param {string} mode  Optional. Alignment method do be used. Possible values are:
+            - 'NW' for global (default)
+            - 'HW' for infix
+            - 'SHW' for prefix.
+    @param {string} task  Optional. Tells edlib what to calculate. The less there is to calculate,
+            the faster it is. Possible value are (from fastest to slowest):
+            - 'distance' - find edit distance and end locations in target. Default.
+            - 'locations' - find edit distance, end locations and start locations.
+            - 'path' - find edit distance, start and end locations and alignment path.
+    @param {int} k  Optional. Max edit distance to search for - the lower this value,
+            the faster is calculation. Set to -1 (default) to have no limit on edit distance.
+    @param {list} additionalEqualities  Optional.
+            List of pairs of characters or hashable objects, where each pair defines two values as equal.
+            This way you can extend edlib's definition of equality (which is that each character is equal only
+            to itself).
+            This can be useful e.g. when you want edlib to be case insensitive, or if you want certain
+            characters to act as a wildcards.
+            Set to None (default) if you do not want to extend edlib's default equality definition.
+    @return Dictionary with following fields:
+            {int} editDistance  Integer, -1 if it is larger than k.
+            {int} alphabetLength Integer, length of unique characters in 'query' and 'target'
+            {[(int, int)]} locations  List of locations, in format [(start, end)].
+            {string} cigar  Cigar is a standard format for alignment path.
+                Here we are using extended cigar format, which uses following symbols:
+                Match: '=', Insertion to target: 'I', Deletion from target: 'D', Mismatch: 'X'.
+                e.g. cigar of "5=1X1=1I" means "5 matches, 1 mismatch, 1 match, 1 insertion (to target)".
+    """
+    # Transform python sequences of hashables into c strings.
+    cdef bytes query_bytes
+    cdef bytes target_bytes
+    query_bytes, target_bytes, additionalEqualities = _map_to_bytes(
+            query, target, additionalEqualities)
+    cdef char* cquery = query_bytes;
+    cdef char* ctarget = target_bytes;
+
+    # Build an edlib config object based on given parameters.
+    cconfig = edlibDefaultAlignConfig()
+
+    if k is not None: cconfig.k = k
+
+    if mode == 'NW': cconfig.mode = EDLIB_MODE_NW
+    if mode == 'HW': cconfig.mode = EDLIB_MODE_HW
+    if mode == 'SHW': cconfig.mode = EDLIB_MODE_SHW
+
+    if task == 'distance': cconfig.task = EDLIB_TASK_DISTANCE
+    if task == 'locations': cconfig.task = EDLIB_TASK_LOC
+    if task == 'path': cconfig.task = EDLIB_TASK_PATH
+
+    cdef bytes tmp_bytes;
+    cdef char* tmp_cstring;
+    cdef EdlibEqualityPair* c_additionalEqualities = NULL
+    if additionalEqualities is None:
+        cconfig.additionalEqualities = NULL
+        cconfig.additionalEqualitiesLength = 0
+    else:
+        c_additionalEqualities = <EdlibEqualityPair*> PyMem_Malloc(len(additionalEqualities)
+                                                                          * cython.sizeof(EdlibEqualityPair))
+        for i in range(len(additionalEqualities)):
+            c_additionalEqualities[i].first = bytearray(additionalEqualities[i][0].encode('utf-8'))[0]
+            c_additionalEqualities[i].second = bytearray(additionalEqualities[i][1].encode('utf-8'))[0]
+        cconfig.additionalEqualities = c_additionalEqualities
+        cconfig.additionalEqualitiesLength = len(additionalEqualities)
+
+    # Run alignment -- need to get len before disabling the GIL
+    query_len = len(query)
+    target_len = len(target)
+    with nogil:
+        cresult = edlibAlign(cquery, query_len, ctarget, target_len, cconfig)
+    if c_additionalEqualities != NULL: PyMem_Free(c_additionalEqualities)
+
+    if cresult.status == 1:
+        raise Exception("There was an error.")
+
+    # Build python dictionary with results from result object that edlib returned.
+    locations = []
+    if cresult.numLocations >= 0:
+        for i in range(cresult.numLocations):
+            locations.append((cresult.startLocations[i] if cresult.startLocations else None,
+                              cresult.endLocations[i] if cresult.endLocations else None))
+    cigar = None
+    if cresult.alignment:
+        ccigar = edlibAlignmentToCigar(cresult.alignment, cresult.alignmentLength,
+                                              EDLIB_CIGAR_EXTENDED)
+        cigar = <bytes> ccigar
+        cigar = cigar.decode('UTF-8')
+    result = {
+        'editDistance': cresult.editDistance,
+        'alphabetLength': cresult.alphabetLength,
+        'locations': locations,
+        'cigar': cigar
+    }
+    edlibFreeAlignResult(cresult)
+
+    return result
+
+
+def getNiceAlignment(alignResult, query, target, gapSymbol="-"):
+    """ Output alignments from align() in NICE format
+    @param {dictionary} alignResult, output of the method align()
+        NOTE: The method align() requires the argument task="path"
+    @param {string} query, the exact query used for alignResult
+    @param {string} target, the exact target used for alignResult
+    @param {string} gapSymbol, default "-"
+        String used to represent gaps in the alignment between query and target
+    @return Alignment in NICE format, which is human-readable visual representation of how the query and target align to each other.
+        e.g., for "telephone" and "elephant", it would look like:
+           telephone
+            |||||.|.
+           -elephant
+        It is represented as dictionary with following fields:
+          - {string} query_aligned
+          - {string} matched_aligned ('|' for match, '.' for mismatch, ' ' for insertion/deletion)
+          - {string} target_aligned
+        Normally you will want to print these three in order above joined with newline character.
+    """
+    if type(alignResult) is not dict:
+        raise Exception("The object alignResult is expected to be a python dictionary. Please check the input alignResult.")
+
+    if 'locations' not in alignResult.keys():
+        raise Exception("The object alignResult is expected to contain a field 'locations'. Please check the input alignResult.")
+
+    target_pos = alignResult["locations"][0][0]
+    if target_pos == None:
+        target_pos = 0
+    query_pos = 0 ## 0-indexed
+    target_aln = match_aln = query_aln = ""
+
+    if 'cigar' not in alignResult.keys():
+        raise Exception("The object alignResult is expected to contain a CIGAR string. Please check the input alignResult.")
+
+    cigar = alignResult["cigar"]
+
+    if alignResult["cigar"] == '' or alignResult["cigar"] == None:
+        raise Exception("The object alignResult contains an empty CIGAR string. Users must run align() with task='path'. Please check the input alignResult.")
+
+    ###
+    ## cigar parsing, motivated by yech1990: https://github.com/Martinsos/edlib/issues/127
+    ##
+    ## 'num_occurences' == Number of occurrences of the alignment operation
+    ## 'alignment_operation' == Cigar symbol/code that represent an alignment operation
+    ##
+    for num_occurrences, alignment_operation in re.findall("(\d+)(\D)", cigar):
+        num_occurrences = int(num_occurrences)
+        if alignment_operation == "=":
+            target_aln += target[target_pos : target_pos + num_occurrences]
+            target_pos += num_occurrences
+            query_aln += query[query_pos : query_pos + num_occurrences]
+            query_pos += num_occurrences
+            match_aln += "|" * num_occurrences
+        elif alignment_operation == "X":
+            target_aln += target[target_pos : target_pos + num_occurrences]
+            target_pos += num_occurrences
+            query_aln += query[query_pos : query_pos + num_occurrences]
+            query_pos += num_occurrences
+            match_aln += "." * num_occurrences
+        elif alignment_operation == "D":
+            target_aln += target[target_pos : target_pos + num_occurrences]
+            target_pos += num_occurrences
+            query_aln += gapSymbol * num_occurrences
+            query_pos += 0
+            match_aln +=gapSymbol * num_occurrences
+        elif alignment_operation == "I":
+            target_aln += gapSymbol * num_occurrences
+            target_pos += 0
+            query_aln += query[query_pos : query_pos + num_occurrences]
+            query_pos += num_occurrences
+            match_aln += gapSymbol * num_occurrences
+        else:
+            raise Exception("The CIGAR string from alignResult contains a symbol not '=', 'X', 'D', 'I'. Please check the validity of alignResult and alignResult.cigar")
+
+    alignments = {
+        'query_aligned': query_aln,
+        'matched_aligned': match_aln,
+        'target_aligned': target_aln
+    }
+
+    return alignments
+
diff --git a/dysgu/edlib/src/edlib.cpp b/dysgu/edlib/src/edlib.cpp
new file mode 100644
index 0000000..46cbd17
--- /dev/null
+++ b/dysgu/edlib/src/edlib.cpp
@@ -0,0 +1,1495 @@
+#include "edlib.h"
+
+#include <stdint.h>
+#include <cstdlib>
+#include <algorithm>
+#include <vector>
+#include <cstring>
+#include <string>
+
+using namespace std;
+
+typedef uint64_t Word;
+static const int WORD_SIZE = sizeof(Word) * 8; // Size of Word in bits
+static const Word WORD_1 = static_cast<Word>(1);
+static const Word HIGH_BIT_MASK = WORD_1 << (WORD_SIZE - 1);  // 100..00
+static const int MAX_UCHAR = 255;
+
+// Data needed to find alignment.
+struct AlignmentData {
+    Word* Ps;
+    Word* Ms;
+    int* scores;
+    int* firstBlocks;
+    int* lastBlocks;
+
+    AlignmentData(int maxNumBlocks, int targetLength) {
+        // We build a complete table and mark first and last block for each column
+        // (because algorithm is banded so only part of each columns is used).
+        // TODO: do not build a whole table, but just enough blocks for each column.
+         Ps     = new Word[maxNumBlocks * targetLength];
+         Ms     = new Word[maxNumBlocks * targetLength];
+         scores = new  int[maxNumBlocks * targetLength];
+         firstBlocks = new int[targetLength];
+         lastBlocks  = new int[targetLength];
+    }
+
+    ~AlignmentData() {
+        delete[] Ps;
+        delete[] Ms;
+        delete[] scores;
+        delete[] firstBlocks;
+        delete[] lastBlocks;
+    }
+};
+
+struct Block {
+    Word P;  // Pvin
+    Word M;  // Mvin
+    int score; // score of last cell in block;
+
+    Block() {}
+    Block(Word p, Word m, int s) :P(p), M(m), score(s) {}
+};
+
+
+/**
+ * Defines equality relation on alphabet characters.
+ * By default each character is always equal only to itself, but you can also provide additional equalities.
+ */
+class EqualityDefinition {
+private:
+    bool matrix[MAX_UCHAR + 1][MAX_UCHAR + 1];
+public:
+    EqualityDefinition(const string& alphabet,
+                       const EdlibEqualityPair* additionalEqualities = NULL,
+                       const int additionalEqualitiesLength = 0) {
+        for (int i = 0; i < static_cast<int>(alphabet.size()); i++) {
+            for (int j = 0; j < static_cast<int>(alphabet.size()); j++) {
+                matrix[i][j] = (i == j);
+            }
+        }
+        if (additionalEqualities != NULL) {
+            for (int i = 0; i < additionalEqualitiesLength; i++) {
+                size_t firstTransformed = alphabet.find(additionalEqualities[i].first);
+                size_t secondTransformed = alphabet.find(additionalEqualities[i].second);
+                if (firstTransformed != string::npos && secondTransformed != string::npos) {
+                    matrix[firstTransformed][secondTransformed] = matrix[secondTransformed][firstTransformed] = true;
+                }
+            }
+        }
+    }
+
+    /**
+     * @param a  Element from transformed sequence.
+     * @param b  Element from transformed sequence.
+     * @return True if a and b are defined as equal, false otherwise.
+     */
+    bool areEqual(unsigned char a, unsigned char b) const {
+        return matrix[a][b];
+    }
+};
+
+static int myersCalcEditDistanceSemiGlobal(const Word* Peq, int W, int maxNumBlocks,
+                                           int queryLength,
+                                           const unsigned char* target, int targetLength,
+                                           int k, EdlibAlignMode mode,
+                                           int* bestScore_, int** positions_, int* numPositions_);
+
+static int myersCalcEditDistanceNW(const Word* Peq, int W, int maxNumBlocks,
+                                   int queryLength,
+                                   const unsigned char* target, int targetLength,
+                                   int k, int* bestScore_,
+                                   int* position_, bool findAlignment,
+                                   AlignmentData** alignData, int targetStopPosition);
+
+
+static int obtainAlignment(
+        const unsigned char* query, const unsigned char* rQuery, int queryLength,
+        const unsigned char* target, const unsigned char* rTarget, int targetLength,
+        const EqualityDefinition& equalityDefinition, int alphabetLength, int bestScore,
+        unsigned char** alignment, int* alignmentLength);
+
+static int obtainAlignmentHirschberg(
+        const unsigned char* query, const unsigned char* rQuery, int queryLength,
+        const unsigned char* target, const unsigned char* rTarget, int targetLength,
+        const EqualityDefinition& equalityDefinition, int alphabetLength, int bestScore,
+        unsigned char** alignment, int* alignmentLength);
+
+static int obtainAlignmentTraceback(int queryLength, int targetLength,
+                                    int bestScore, const AlignmentData* alignData,
+                                    unsigned char** alignment, int* alignmentLength);
+
+static string transformSequences(const char* queryOriginal, int queryLength,
+                                 const char* targetOriginal, int targetLength,
+                                 unsigned char** queryTransformed,
+                                 unsigned char** targetTransformed);
+
+static inline int ceilDiv(int x, int y);
+
+static inline unsigned char* createReverseCopy(const unsigned char* seq, int length);
+
+static inline Word* buildPeq(const int alphabetLength,
+                             const unsigned char* query,
+                             const int queryLength,
+                             const EqualityDefinition& equalityDefinition);
+
+
+/**
+ * Main edlib method.
+ */
+extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const int queryLength,
+                                       const char* const targetOriginal, const int targetLength,
+                                       const EdlibAlignConfig config) {
+    EdlibAlignResult result;
+    result.status = EDLIB_STATUS_OK;
+    result.editDistance = -1;
+    result.endLocations = result.startLocations = NULL;
+    result.numLocations = 0;
+    result.alignment = NULL;
+    result.alignmentLength = 0;
+    result.alphabetLength = 0;
+
+    /*------------ TRANSFORM SEQUENCES AND RECOGNIZE ALPHABET -----------*/
+    unsigned char* query, * target;
+    string alphabet = transformSequences(queryOriginal, queryLength, targetOriginal, targetLength,
+                                         &query, &target);
+    result.alphabetLength = static_cast<int>(alphabet.size());
+    /*-------------------------------------------------------*/
+
+    // Handle special situation when at least one of the sequences has length 0.
+    if (queryLength == 0 || targetLength == 0) {
+        if (config.mode == EDLIB_MODE_NW) {
+            result.editDistance = std::max(queryLength, targetLength);
+            result.endLocations = static_cast<int *>(malloc(sizeof(int) * 1));
+            result.endLocations[0] = targetLength - 1;
+            result.numLocations = 1;
+        } else if (config.mode == EDLIB_MODE_SHW || config.mode == EDLIB_MODE_HW) {
+            result.editDistance = queryLength;
+            result.endLocations = static_cast<int *>(malloc(sizeof(int) * 1));
+            result.endLocations[0] = -1;
+            result.numLocations = 1;
+        } else {
+            result.status = EDLIB_STATUS_ERROR;
+        }
+
+        free(query);
+        free(target);
+        return result;
+    }
+
+    /*--------------------- INITIALIZATION ------------------*/
+    int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); // bmax in Myers
+    int W = maxNumBlocks * WORD_SIZE - queryLength; // number of redundant cells in last level blocks
+    EqualityDefinition equalityDefinition(alphabet, config.additionalEqualities, config.additionalEqualitiesLength);
+    Word* Peq = buildPeq(static_cast<int>(alphabet.size()), query, queryLength, equalityDefinition);
+    /*-------------------------------------------------------*/
+
+    /*------------------ MAIN CALCULATION -------------------*/
+    // TODO: Store alignment data only after k is determined? That could make things faster.
+    int positionNW; // Used only when mode is NW.
+    AlignmentData* alignData = NULL;
+    bool dynamicK = false;
+    int k = config.k;
+    if (k < 0) { // If valid k is not given, auto-adjust k until solution is found.
+        dynamicK = true;
+        k = WORD_SIZE; // Gives better results than smaller k.
+    }
+
+    do {
+        if (config.mode == EDLIB_MODE_HW || config.mode == EDLIB_MODE_SHW) {
+            myersCalcEditDistanceSemiGlobal(Peq, W, maxNumBlocks,
+                                            queryLength, target, targetLength,
+                                            k, config.mode, &(result.editDistance),
+                                            &(result.endLocations), &(result.numLocations));
+        } else {  // mode == EDLIB_MODE_NW
+            myersCalcEditDistanceNW(Peq, W, maxNumBlocks,
+                                    queryLength, target, targetLength,
+                                    k, &(result.editDistance), &positionNW,
+                                    false, &alignData, -1);
+        }
+        k *= 2;
+    } while(dynamicK && result.editDistance == -1);
+
+    if (result.editDistance >= 0) {  // If there is solution.
+        // If NW mode, set end location explicitly.
+        if (config.mode == EDLIB_MODE_NW) {
+            result.endLocations = static_cast<int *>(malloc(sizeof(int) * 1));
+            result.endLocations[0] = targetLength - 1;
+            result.numLocations = 1;
+        }
+
+        // Find starting locations.
+        if (config.task == EDLIB_TASK_LOC || config.task == EDLIB_TASK_PATH) {
+            result.startLocations = static_cast<int *>(malloc(result.numLocations * sizeof(int)));
+            if (config.mode == EDLIB_MODE_HW) {  // If HW, I need to calculate start locations.
+                const unsigned char* rTarget = createReverseCopy(target, targetLength);
+                const unsigned char* rQuery  = createReverseCopy(query, queryLength);
+                // Peq for reversed query.
+                Word* rPeq = buildPeq(static_cast<int>(alphabet.size()), rQuery, queryLength, equalityDefinition);
+                for (int i = 0; i < result.numLocations; i++) {
+                    int endLocation = result.endLocations[i];
+                    if (endLocation == -1) {
+                        // NOTE: Sometimes one of optimal solutions is that query starts before target, like this:
+                        //                       AAGG <- target
+                        //                   CCTT     <- query
+                        //   It will never be only optimal solution and it does not happen often, however it is
+                        //   possible and in that case end location will be -1. What should we do with that?
+                        //   Should we just skip reporting such end location, although it is a solution?
+                        //   If we do report it, what is the start location? -4? -1? Nothing?
+                        // TODO: Figure this out. This has to do in general with how we think about start
+                        //   and end locations.
+                        //   Also, we have alignment later relying on this locations to limit the space of it's
+                        //   search -> how can it do it right if these locations are negative or incorrect?
+                        result.startLocations[i] = 0;  // I put 0 for now, but it does not make much sense.
+                    } else {
+                        int bestScoreSHW, numPositionsSHW;
+                        int* positionsSHW;
+                        myersCalcEditDistanceSemiGlobal(
+                                rPeq, W, maxNumBlocks,
+                                queryLength, rTarget + targetLength - endLocation - 1, endLocation + 1,
+                                result.editDistance, EDLIB_MODE_SHW,
+                                &bestScoreSHW, &positionsSHW, &numPositionsSHW);
+                        // Taking last location as start ensures that alignment will not start with insertions
+                        // if it can start with mismatches instead.
+                        result.startLocations[i] = endLocation - positionsSHW[numPositionsSHW - 1];
+                        free(positionsSHW);
+                    }
+                }
+                delete[] rTarget;
+                delete[] rQuery;
+                delete[] rPeq;
+            } else {  // If mode is SHW or NW
+                for (int i = 0; i < result.numLocations; i++) {
+                    result.startLocations[i] = 0;
+                }
+            }
+        }
+
+        // Find alignment -> all comes down to finding alignment for NW.
+        // Currently we return alignment only for first pair of locations.
+        if (config.task == EDLIB_TASK_PATH) {
+            int alnStartLocation = result.startLocations[0];
+            int alnEndLocation = result.endLocations[0];
+            const unsigned char* alnTarget = target + alnStartLocation;
+            const int alnTargetLength = alnEndLocation - alnStartLocation + 1;
+            const unsigned char* rAlnTarget = createReverseCopy(alnTarget, alnTargetLength);
+            const unsigned char* rQuery  = createReverseCopy(query, queryLength);
+            obtainAlignment(query, rQuery, queryLength,
+                            alnTarget, rAlnTarget, alnTargetLength,
+                            equalityDefinition, static_cast<int>(alphabet.size()), result.editDistance,
+                            &(result.alignment), &(result.alignmentLength));
+            delete[] rAlnTarget;
+            delete[] rQuery;
+        }
+    }
+    /*-------------------------------------------------------*/
+
+    //--- Free memory ---//
+    delete[] Peq;
+    free(query);
+    free(target);
+    if (alignData) delete alignData;
+    //-------------------//
+
+    return result;
+}
+
+extern "C" char* edlibAlignmentToCigar(const unsigned char* const alignment, const int alignmentLength,
+                                       const EdlibCigarFormat cigarFormat) {
+    if (cigarFormat != EDLIB_CIGAR_EXTENDED && cigarFormat != EDLIB_CIGAR_STANDARD) {
+        return 0;
+    }
+
+    // Maps move code from alignment to char in cigar.
+    //                        0    1    2    3
+    char moveCodeToChar[] = {'=', 'I', 'D', 'X'};
+    if (cigarFormat == EDLIB_CIGAR_STANDARD) {
+        moveCodeToChar[0] = moveCodeToChar[3] = 'M';
+    }
+
+    vector<char>* cigar = new vector<char>();
+    char lastMove = 0;  // Char of last move. 0 if there was no previous move.
+    int numOfSameMoves = 0;
+    for (int i = 0; i <= alignmentLength; i++) {
+        // if new sequence of same moves started
+        if (i == alignmentLength || (moveCodeToChar[alignment[i]] != lastMove && lastMove != 0)) {
+            // Write number of moves to cigar string.
+            int numDigits = 0;
+            for (; numOfSameMoves; numOfSameMoves /= 10) {
+                cigar->push_back('0' + numOfSameMoves % 10);
+                numDigits++;
+            }
+            reverse(cigar->end() - numDigits, cigar->end());
+            // Write code of move to cigar string.
+            cigar->push_back(lastMove);
+            // If not at the end, start new sequence of moves.
+            if (i < alignmentLength) {
+                // Check if alignment has valid values.
+                if (alignment[i] > 3) {
+                    delete cigar;
+                    return 0;
+                }
+                numOfSameMoves = 0;
+            }
+        }
+        if (i < alignmentLength) {
+            lastMove = moveCodeToChar[alignment[i]];
+            numOfSameMoves++;
+        }
+    }
+    cigar->push_back(0);  // Null character termination.
+    char* cigar_ = static_cast<char *>(malloc(cigar->size() * sizeof(char)));
+    memcpy(cigar_, &(*cigar)[0], cigar->size() * sizeof(char));
+    delete cigar;
+
+    return cigar_;
+}
+
+/**
+ * Build Peq table for given query and alphabet.
+ * Peq is table of dimensions alphabetLength+1 x maxNumBlocks.
+ * Bit i of Peq[s * maxNumBlocks + b] is 1 if i-th symbol from block b of query equals symbol s, otherwise it is 0.
+ * NOTICE: free returned array with delete[]!
+ */
+static inline Word* buildPeq(const int alphabetLength,
+                             const unsigned char* const query,
+                             const int queryLength,
+                             const EqualityDefinition& equalityDefinition) {
+    int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
+    // table of dimensions alphabetLength+1 x maxNumBlocks. Last symbol is wildcard.
+    Word* Peq = new Word[(alphabetLength + 1) * maxNumBlocks];
+
+    // Build Peq (1 is match, 0 is mismatch). NOTE: last column is wildcard(symbol that matches anything) with just 1s
+    for (int symbol = 0; symbol <= alphabetLength; symbol++) {
+        for (int b = 0; b < maxNumBlocks; b++) {
+            if (symbol < alphabetLength) {
+                Peq[symbol * maxNumBlocks + b] = 0;
+                for (int r = (b+1) * WORD_SIZE - 1; r >= b * WORD_SIZE; r--) {
+                    Peq[symbol * maxNumBlocks + b] <<= 1;
+                    // NOTE: We pretend like query is padded at the end with W wildcard symbols
+                    if (r >= queryLength || equalityDefinition.areEqual(query[r], symbol))
+                        Peq[symbol * maxNumBlocks + b] += 1;
+                }
+            } else { // Last symbol is wildcard, so it is all 1s
+                Peq[symbol * maxNumBlocks + b] = static_cast<Word>(-1);
+            }
+        }
+    }
+
+    return Peq;
+}
+
+
+/**
+ * Returns new sequence that is reverse of given sequence.
+ * Free returned array with delete[].
+ */
+static inline unsigned char* createReverseCopy(const unsigned char* const seq, const int length) {
+    unsigned char* rSeq = new unsigned char[length];
+    for (int i = 0; i < length; i++) {
+        rSeq[i] = seq[length - i - 1];
+    }
+    return rSeq;
+}
+
+/**
+ * Corresponds to Advance_Block function from Myers.
+ * Calculates one word(block), which is part of a column.
+ * Highest bit of word (one most to the left) is most bottom cell of block from column.
+ * Pv[i] and Mv[i] define vin of cell[i]: vin = cell[i] - cell[i-1].
+ * @param [in] Pv  Bitset, Pv[i] == 1 if vin is +1, otherwise Pv[i] == 0.
+ * @param [in] Mv  Bitset, Mv[i] == 1 if vin is -1, otherwise Mv[i] == 0.
+ * @param [in] Eq  Bitset, Eq[i] == 1 if match, 0 if mismatch.
+ * @param [in] Phin  Bitset, 00..01 when hin == +1.
+ * @param [in] Mhin  Bitset, 00..01 when hin == -1.
+ * @param [out] PvOut  Bitset, PvOut[i] == 1 if vout is +1, otherwise PvOut[i] == 0.
+ * @param [out] MvOut  Bitset, MvOut[i] == 1 if vout is -1, otherwise MvOut[i] == 0.
+ * @param [out] Phout  Bitset, 00..01 when hout == +1.
+ * @param [out] Mhout  Bitset, 00..01 when hout == -1.
+ */
+static inline pair<Word, Word> calculateBlock(Word Pv, Word Mv, Word Eq, const Word Phin, const Word Mhin,
+                                 Word &PvOut, Word &MvOut) {
+    // hin can be 1, -1 or 0.
+    // 1  -> 00...01
+    // 0  -> 00...00
+    // -1 -> 11...11 (2-complement)
+
+    Word Xv = Eq | Mv;
+    // This is instruction below written using 'if': if (hin < 0) Eq |= (Word)1;
+    Eq |= Mhin;
+    Word Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq;
+
+    Word Ph = Mv | ~(Xh | Pv);
+    Word Mh = Pv & Xh;
+
+    // This is instruction below written using 'if': if (Ph & HIGH_BIT_MASK) hout = 1;
+    Word Phout = Ph >> (WORD_SIZE - 1);
+    // This is instruction below written using 'if': if (Mh & HIGH_BIT_MASK) hout = -1;
+    Word Mhout = Mh >> (WORD_SIZE - 1);
+
+    Ph <<= 1;
+    Mh <<= 1;
+
+    // This is instruction below written using 'if': if (hin < 0) Mh |= (Word)1;
+    Mh |= Mhin;
+    // This is instruction below written using 'if': if (hin > 0) Ph |= (Word)1;
+    Ph |= Phin;
+
+    PvOut = Mh | ~(Xv | Ph);
+    MvOut = Ph & Xv;
+
+    return {Phout, Mhout};
+}
+
+/**
+ * Does ceiling division x / y.
+ * Note: x and y must be non-negative and x + y must not overflow.
+ */
+static inline int ceilDiv(const int x, const int y) {
+    return x % y ? x / y + 1 : x / y;
+}
+
+static inline int min(const int x, const int y) {
+    return x < y ? x : y;
+}
+
+static inline int max(const int x, const int y) {
+    return x > y ? x : y;
+}
+
+
+/**
+ * @param [in] block
+ * @return Values of cells in block, starting with bottom cell in block.
+ */
+static inline vector<int> getBlockCellValues(const Block block) {
+    vector<int> scores(WORD_SIZE);
+    int score = block.score;
+    Word mask = HIGH_BIT_MASK;
+    for (int i = 0; i < WORD_SIZE - 1; i++) {
+        scores[i] = score;
+        if (block.P & mask) score--;
+        if (block.M & mask) score++;
+        mask >>= 1;
+    }
+    scores[WORD_SIZE - 1] = score;
+    return scores;
+}
+
+/**
+ * Writes values of cells in block into given array, starting with first/top cell.
+ * @param [in] block
+ * @param [out] dest  Array into which cell values are written. Must have size of at least WORD_SIZE.
+ */
+static inline void readBlock(const Block block, int* const dest) {
+    int score = block.score;
+    Word mask = HIGH_BIT_MASK;
+    for (int i = 0; i < WORD_SIZE - 1; i++) {
+        dest[WORD_SIZE - 1 - i] = score;
+        if (block.P & mask) score--;
+        if (block.M & mask) score++;
+        mask >>= 1;
+    }
+    dest[0] = score;
+}
+
+/**
+ * Writes values of cells in block into given array, starting with last/bottom cell.
+ * @param [in] block
+ * @param [out] dest  Array into which cell values are written. Must have size of at least WORD_SIZE.
+ */
+static inline void readBlockReverse(const Block block, int* const dest) {
+    int score = block.score;
+    Word mask = HIGH_BIT_MASK;
+    for (int i = 0; i < WORD_SIZE - 1; i++) {
+        dest[i] = score;
+        if (block.P & mask) score--;
+        if (block.M & mask) score++;
+        mask >>= 1;
+    }
+    dest[WORD_SIZE - 1] = score;
+}
+
+/**
+ * @param [in] block
+ * @param [in] k
+ * @return True if all cells in block have value larger than k, otherwise false.
+ */
+static inline bool allBlockCellsLarger(const Block block, const int k) {
+    vector<int> scores = getBlockCellValues(block);
+    for (int i = 0; i < WORD_SIZE; i++) {
+        if (scores[i] <= k) return false;
+    }
+    return true;
+}
+
+
+/**
+ * Uses Myers' bit-vector algorithm to find edit distance for one of semi-global alignment methods.
+ * @param [in] Peq  Query profile.
+ * @param [in] W  Size of padding in last block.
+ *                TODO: Calculate this directly from query, instead of passing it.
+ * @param [in] maxNumBlocks  Number of blocks needed to cover the whole query.
+ *                           TODO: Calculate this directly from query, instead of passing it.
+ * @param [in] queryLength
+ * @param [in] target
+ * @param [in] targetLength
+ * @param [in] k
+ * @param [in] mode  EDLIB_MODE_HW or EDLIB_MODE_SHW
+ * @param [out] bestScore_  Edit distance.
+ * @param [out] positions_  Array of 0-indexed positions in target at which best score was found.
+                            Make sure to free this array with free().
+ * @param [out] numPositions_  Number of positions in the positions_ array.
+ * @return Status.
+ */
+static int myersCalcEditDistanceSemiGlobal(
+        const Word* const Peq, const int W, const int maxNumBlocks,
+        const int queryLength,
+        const unsigned char* const target, const int targetLength,
+        int k, const EdlibAlignMode mode,
+        int* const bestScore_, int** const positions_, int* const numPositions_) {
+    *positions_ = NULL;
+    *numPositions_ = 0;
+
+    // firstBlock is 0-based index of first block in Ukkonen band.
+    // lastBlock is 0-based index of last block in Ukkonen band.
+    int firstBlock = 0;
+    int lastBlock = min(ceilDiv(k + 1, WORD_SIZE), maxNumBlocks) - 1; // y in Myers
+    Block *bl; // Current block
+
+    Block* blocks = new Block[maxNumBlocks];
+
+    // For HW, solution will never be larger then queryLength.
+    if (mode == EDLIB_MODE_HW) {
+        k = min(queryLength, k);
+    }
+
+    // Each STRONG_REDUCE_NUM column is reduced in more expensive way.
+    // This gives speed up of about 2 times for small k.
+    const int STRONG_REDUCE_NUM = 2048;
+
+    // Initialize P, M and score
+    bl = blocks;
+    for (int b = 0; b <= lastBlock; b++) {
+        bl->score = (b + 1) * WORD_SIZE;
+        bl->P = static_cast<Word>(-1); // All 1s
+        bl->M = static_cast<Word>(0);
+        bl++;
+    }
+
+    int bestScore = -1;
+    vector<int> positions; // TODO: Maybe put this on heap?
+    const Word PstartHout = mode == EDLIB_MODE_HW ? 0 : 1; // If 0 then gap before query is not penalized;
+    const unsigned char* targetChar = target;
+    for (int c = 0; c < targetLength; c++) { // for each column
+        const Word* Peq_c = Peq + (*targetChar) * maxNumBlocks;
+
+        //----------------------- Calculate column -------------------------//
+        Word Phout = PstartHout;
+        Word Mhout = 0;
+        int hout = 0;
+        bl = blocks + firstBlock;
+        Peq_c += firstBlock;
+        for (int b = firstBlock; b <= lastBlock; b++) {
+            pair<Word,Word> PMhout = calculateBlock(bl->P, bl->M, *Peq_c, Phout, Mhout, bl->P, bl->M);
+            Phout = PMhout.first;
+            Mhout = PMhout.second;
+            hout = Phout - Mhout;
+            bl->score += hout;
+            bl++; Peq_c++;
+        }
+        bl--; Peq_c--;
+        //------------------------------------------------------------------//
+
+        //---------- Adjust number of blocks according to Ukkonen ----------//
+        if ((lastBlock < maxNumBlocks - 1) && (bl->score - hout <= k) // bl is pointing to last block
+            && ((*(Peq_c + 1) & WORD_1) || hout < 0)) { // Peq_c is pointing to last block
+            // If score of left block is not too big, calculate one more block
+            lastBlock++; bl++; Peq_c++;
+            bl->P = static_cast<Word>(-1); // All 1s
+            bl->M = static_cast<Word>(0);
+            pair<Word, Word> PMhout = calculateBlock(bl->P, bl->M, *Peq_c, Phout, Mhout, bl->P, bl->M);
+            bl->score = (bl - 1)->score - hout + WORD_SIZE + PMhout.first - PMhout.second;
+        } else {
+            while (lastBlock >= firstBlock && bl->score >= k + WORD_SIZE) {
+                lastBlock--; bl--; Peq_c--;
+            }
+        }
+
+        // Every some columns, do some expensive but also more efficient block reducing.
+        // This is important!
+        //
+        // Reduce the band by decreasing last block if possible.
+        if (c % STRONG_REDUCE_NUM == 0) {
+            while (lastBlock >= 0 && lastBlock >= firstBlock && allBlockCellsLarger(*bl, k)) {
+                lastBlock--; bl--; Peq_c--;
+            }
+        }
+        // For HW, even if all cells are > k, there still may be solution in next
+        // column because starting conditions at upper boundary are 0.
+        // That means that first block is always candidate for solution,
+        // and we can never end calculation before last column.
+        if (mode == EDLIB_MODE_HW && lastBlock == -1) {
+            lastBlock++; bl++; Peq_c++;
+        }
+
+        // Reduce band by increasing first block if possible. Not applicable to HW.
+        if (mode != EDLIB_MODE_HW) {
+            while (firstBlock <= lastBlock && blocks[firstBlock].score >= k + WORD_SIZE) {
+                firstBlock++;
+            }
+            if (c % STRONG_REDUCE_NUM == 0) { // Do strong reduction every some blocks
+                while (firstBlock <= lastBlock && allBlockCellsLarger(blocks[firstBlock], k)) {
+                    firstBlock++;
+                }
+            }
+        }
+
+        // If band stops to exist finish
+        if (lastBlock < firstBlock) {
+            *bestScore_ = bestScore;
+            if (bestScore != -1) {
+                *positions_ = static_cast<int *>(malloc(sizeof(int) * static_cast<int>(positions.size())));
+                *numPositions_ = static_cast<int>(positions.size());
+                copy(positions.begin(), positions.end(), *positions_);
+            }
+            delete[] blocks;
+            return EDLIB_STATUS_OK;
+        }
+        //------------------------------------------------------------------//
+
+        //------------------------- Update best score ----------------------//
+        if (lastBlock == maxNumBlocks - 1) {
+            int colScore = bl->score;
+            if (colScore <= k) { // Scores > k dont have correct values (so we cannot use them), but are certainly > k.
+                // NOTE: Score that I find in column c is actually score from column c-W
+                if (bestScore == -1 || colScore <= bestScore) {
+                    if (colScore != bestScore) {
+                        positions.clear();
+                        bestScore = colScore;
+                        // Change k so we will look only for equal or better
+                        // scores then the best found so far.
+                        k = bestScore;
+                    }
+                    positions.push_back(c - W);
+                }
+            }
+        }
+        //------------------------------------------------------------------//
+
+        targetChar++;
+    }
+
+
+    // Obtain results for last W columns from last column.
+    if (lastBlock == maxNumBlocks - 1) {
+        vector<int> blockScores = getBlockCellValues(*bl);
+        for (int i = 0; i < W; i++) {
+            int colScore = blockScores[i + 1];
+            if (colScore <= k && (bestScore == -1 || colScore <= bestScore)) {
+                if (colScore != bestScore) {
+                    positions.clear();
+                    k = bestScore = colScore;
+                }
+                positions.push_back(targetLength - W + i);
+            }
+        }
+    }
+
+    *bestScore_ = bestScore;
+    if (bestScore != -1) {
+        *positions_ = static_cast<int *>(malloc(sizeof(int) * static_cast<int>(positions.size())));
+        *numPositions_ = static_cast<int>(positions.size());
+        copy(positions.begin(), positions.end(), *positions_);
+    }
+
+    delete[] blocks;
+    return EDLIB_STATUS_OK;
+}
+
+
+/**
+ * Uses Myers' bit-vector algorithm to find edit distance for global(NW) alignment method.
+ * @param [in] Peq  Query profile.
+ * @param [in] W  Size of padding in last block.
+ *                TODO: Calculate this directly from query, instead of passing it.
+ * @param [in] maxNumBlocks  Number of blocks needed to cover the whole query.
+ *                           TODO: Calculate this directly from query, instead of passing it.
+ * @param [in] queryLength
+ * @param [in] target
+ * @param [in] targetLength
+ * @param [in] k
+ * @param [out] bestScore_  Edit distance.
+ * @param [out] position_  0-indexed position in target at which best score was found.
+ * @param [in] findAlignment  If true, whole matrix is remembered and alignment data is returned.
+ *                            Quadratic amount of memory is consumed.
+ * @param [out] alignData  Data needed for alignment traceback (for reconstruction of alignment).
+ *                         Set only if findAlignment is set to true, otherwise it is NULL.
+ *                         Make sure to free this array using delete[].
+ * @param [out] targetStopPosition  If set to -1, whole calculation is performed normally, as expected.
+ *         If set to p, calculation is performed up to position p in target (inclusive)
+ *         and column p is returned as the only column in alignData.
+ * @return Status.
+ */
+static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int maxNumBlocks,
+                                   const int queryLength,
+                                   const unsigned char* const target, const int targetLength,
+                                   int k, int* const bestScore_,
+                                   int* const position_, const bool findAlignment,
+                                   AlignmentData** const alignData, const int targetStopPosition) {
+    if (targetStopPosition > -1 && findAlignment) {
+        // They can not be both set at the same time!
+        return EDLIB_STATUS_ERROR;
+    }
+
+    // Each STRONG_REDUCE_NUM column is reduced in more expensive way.
+    const int STRONG_REDUCE_NUM = 2048; // TODO: Choose this number dinamically (based on query and target lengths?), so it does not affect speed of computation
+
+    if (k < abs(targetLength - queryLength)) {
+        *bestScore_ = *position_ = -1;
+        return EDLIB_STATUS_OK;
+    }
+
+    k = min(k, max(queryLength, targetLength));  // Upper bound for k
+
+    // firstBlock is 0-based index of first block in Ukkonen band.
+    // lastBlock is 0-based index of last block in Ukkonen band.
+    int firstBlock = 0;
+    // This is optimal now, by my formula.
+    int lastBlock = min(maxNumBlocks, ceilDiv(min(k, (k + queryLength - targetLength) / 2) + 1, WORD_SIZE)) - 1;
+    Block* bl; // Current block
+
+    Block* blocks = new Block[maxNumBlocks];
+
+    // Initialize P, M and score
+    bl = blocks;
+    for (int b = 0; b <= lastBlock; b++) {
+        bl->score = (b + 1) * WORD_SIZE;
+        bl->P = static_cast<Word>(-1); // All 1s
+        bl->M = static_cast<Word>(0);
+        bl++;
+    }
+
+    // If we want to find alignment, we have to store needed data.
+    if (findAlignment)
+        *alignData = new AlignmentData(maxNumBlocks, targetLength);
+    else if (targetStopPosition > -1)
+        *alignData = new AlignmentData(maxNumBlocks, 1);
+    else
+        *alignData = NULL;
+
+    const unsigned char* targetChar = target;
+    for (int c = 0; c < targetLength; c++) { // for each column
+        const Word* Peq_c = Peq + *targetChar * maxNumBlocks;
+
+        //----------------------- Calculate column -------------------------//
+        int hout = 1;
+        Word Phout = 1;
+        Word Mhout = 0;
+        bl = blocks + firstBlock;
+        for (int b = firstBlock; b <= lastBlock; b++) {
+            pair<Word, Word> PMhout = calculateBlock(bl->P, bl->M, Peq_c[b], Phout, Mhout, bl->P, bl->M);
+            Phout = PMhout.first;
+            Mhout = PMhout.second;
+            hout = Phout - Mhout;
+            bl->score += hout;
+            bl++;
+        }
+        bl--;
+        //------------------------------------------------------------------//
+        // bl now points to last block
+
+        // Update k. I do it only on end of column because it would slow calculation too much otherwise.
+        // NOTICE: I add W when in last block because it is actually result from W cells to the left and W cells up.
+        k = min(k, bl->score
+                + max(targetLength - c - 1, queryLength - ((1 + lastBlock) * WORD_SIZE - 1) - 1)
+                + (lastBlock == maxNumBlocks - 1 ? W : 0));
+
+        //---------- Adjust number of blocks according to Ukkonen ----------//
+        //--- Adjust last block ---//
+        // If block is not beneath band, calculate next block. Only next because others are certainly beneath band.
+        if (lastBlock + 1 < maxNumBlocks
+            && !(//score[lastBlock] >= k + WORD_SIZE ||  // NOTICE: this condition could be satisfied if above block also!
+                 ((lastBlock + 1) * WORD_SIZE - 1
+                  > k - bl->score + 2 * WORD_SIZE - 2 - targetLength + c + queryLength))) {
+            lastBlock++; bl++;
+            bl->P = static_cast<Word>(-1); // All 1s
+            bl->M = static_cast<Word>(0);
+            pair<Word, Word> PMhout = calculateBlock(bl->P, bl->M, Peq_c[lastBlock], Phout, Mhout, bl->P, bl->M);
+            Phout = PMhout.first;
+            Mhout = PMhout.second;
+            int newHout = Phout - Mhout;
+            bl->score = (bl - 1)->score - hout + WORD_SIZE + newHout;
+            hout = newHout;
+        }
+
+        // While block is out of band, move one block up.
+        // NOTE: Condition used here is more loose than the one from the article, since I simplified the max() part of it.
+        // I could consider adding that max part, for optimal performance.
+        while (lastBlock >= firstBlock
+               && (bl->score >= k + WORD_SIZE
+                   || ((lastBlock + 1) * WORD_SIZE - 1 >
+                       // TODO: Does not work if do not put +1! Why???
+                       k - bl->score + 2 * WORD_SIZE - 2 - targetLength + c + queryLength + 1))) {
+            lastBlock--; bl--;
+        }
+        //-------------------------//
+
+        //--- Adjust first block ---//
+        // While outside of band, advance block
+        while (firstBlock <= lastBlock
+               && (blocks[firstBlock].score >= k + WORD_SIZE
+                   || ((firstBlock + 1) * WORD_SIZE - 1 <
+                       blocks[firstBlock].score - k - targetLength + queryLength + c))) {
+            firstBlock++;
+        }
+        //--------------------------/
+
+
+        // TODO: consider if this part is useful, it does not seem to help much
+        if (c % STRONG_REDUCE_NUM == 0) { // Every some columns do more expensive but more efficient reduction
+            while (lastBlock >= firstBlock) {
+                // If all cells outside of band, remove block
+                vector<int> scores = getBlockCellValues(*bl);
+                int numCells = lastBlock == maxNumBlocks - 1 ? WORD_SIZE - W : WORD_SIZE;
+                int r = lastBlock * WORD_SIZE + numCells - 1;
+                bool reduce = true;
+                for (int i = WORD_SIZE - numCells; i < WORD_SIZE; i++) {
+                    // TODO: Does not work if do not put +1! Why???
+                    if (scores[i] <= k && r <= k - scores[i] - targetLength + c + queryLength + 1) {
+                        reduce = false;
+                        break;
+                    }
+                    r--;
+                }
+                if (!reduce) break;
+                lastBlock--; bl--;
+            }
+
+            while (firstBlock <= lastBlock) {
+                // If all cells outside of band, remove block
+                vector<int> scores = getBlockCellValues(blocks[firstBlock]);
+                int numCells = firstBlock == maxNumBlocks - 1 ? WORD_SIZE - W : WORD_SIZE;
+                int r = firstBlock * WORD_SIZE + numCells - 1;
+                bool reduce = true;
+                for (int i = WORD_SIZE - numCells; i < WORD_SIZE; i++) {
+                    if (scores[i] <= k && r >= scores[i] - k - targetLength + c + queryLength) {
+                        reduce = false;
+                        break;
+                    }
+                    r--;
+                }
+                if (!reduce) break;
+                firstBlock++;
+            }
+        }
+
+
+        // If band stops to exist finish
+        if (lastBlock < firstBlock) {
+            *bestScore_ = *position_ = -1;
+            delete[] blocks;
+            return EDLIB_STATUS_OK;
+        }
+        //------------------------------------------------------------------//
+
+
+        //---- Save column so it can be used for reconstruction ----//
+        if (findAlignment && c < targetLength) {
+            bl = blocks + firstBlock;
+            for (int b = firstBlock; b <= lastBlock; b++) {
+                (*alignData)->Ps[maxNumBlocks * c + b] = bl->P;
+                (*alignData)->Ms[maxNumBlocks * c + b] = bl->M;
+                (*alignData)->scores[maxNumBlocks * c + b] = bl->score;
+                (*alignData)->firstBlocks[c] = firstBlock;
+                (*alignData)->lastBlocks[c] = lastBlock;
+                bl++;
+            }
+        }
+        //----------------------------------------------------------//
+        //---- If this is stop column, save it and finish ----//
+        if (c == targetStopPosition) {
+            for (int b = firstBlock; b <= lastBlock; b++) {
+                (*alignData)->Ps[b] = (blocks + b)->P;
+                (*alignData)->Ms[b] = (blocks + b)->M;
+                (*alignData)->scores[b] = (blocks + b)->score;
+                (*alignData)->firstBlocks[0] = firstBlock;
+                (*alignData)->lastBlocks[0] = lastBlock;
+            }
+            *bestScore_ = -1;
+            *position_ = targetStopPosition;
+            delete[] blocks;
+            return EDLIB_STATUS_OK;
+        }
+        //----------------------------------------------------//
+
+        targetChar++;
+    }
+
+    if (lastBlock == maxNumBlocks - 1) { // If last block of last column was calculated
+        // Obtain best score from block -> it is complicated because query is padded with W cells
+        int bestScore = getBlockCellValues(blocks[lastBlock])[W];
+        if (bestScore <= k) {
+            *bestScore_ = bestScore;
+            *position_ = targetLength - 1;
+            delete[] blocks;
+            return EDLIB_STATUS_OK;
+        }
+    }
+
+    *bestScore_ = *position_ = -1;
+    delete[] blocks;
+    return EDLIB_STATUS_OK;
+}
+
+
+/**
+ * Finds one possible alignment that gives optimal score by moving back through the dynamic programming matrix,
+ * that is stored in alignData. Consumes large amount of memory: O(queryLength * targetLength).
+ * @param [in] queryLength  Normal length, without W.
+ * @param [in] targetLength  Normal length, without W.
+ * @param [in] bestScore  Best score.
+ * @param [in] alignData  Data obtained during finding best score that is useful for finding alignment.
+ * @param [out] alignment  Alignment.
+ * @param [out] alignmentLength  Length of alignment.
+ * @return Status code.
+ */
+static int obtainAlignmentTraceback(const int queryLength, const int targetLength,
+                                    const int bestScore, const AlignmentData* const alignData,
+                                    unsigned char** const alignment, int* const alignmentLength) {
+    const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
+    const int W = maxNumBlocks * WORD_SIZE - queryLength;
+
+    *alignment = static_cast<unsigned char*>(malloc((queryLength + targetLength - 1) * sizeof(unsigned char)));
+    *alignmentLength = 0;
+    int c = targetLength - 1; // index of column
+    int b = maxNumBlocks - 1; // index of block in column
+    int currScore = bestScore; // Score of current cell
+    int lScore  = -1; // Score of left cell
+    int uScore  = -1; // Score of upper cell
+    int ulScore = -1; // Score of upper left cell
+    Word currP = alignData->Ps[c * maxNumBlocks + b]; // P of current block
+    Word currM = alignData->Ms[c * maxNumBlocks + b]; // M of current block
+    // True if block to left exists and is in band
+    bool thereIsLeftBlock = c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1];
+    // We set initial values of lP and lM to 0 only to avoid compiler warnings, they should not affect the
+    // calculation as both lP and lM should be initialized at some moment later (but compiler can not
+    // detect it since this initialization is guaranteed by "business" logic).
+    Word lP = 0, lM = 0;
+    if (thereIsLeftBlock) {
+        lP = alignData->Ps[(c - 1) * maxNumBlocks + b]; // P of block to the left
+        lM = alignData->Ms[(c - 1) * maxNumBlocks + b]; // M of block to the left
+    }
+    currP <<= W;
+    currM <<= W;
+    int blockPos = WORD_SIZE - W - 1; // 0 based index of current cell in blockPos
+
+    // TODO(martin): refactor this whole piece of code. There are too many if-else statements,
+    // it is too easy for a bug to hide and to hard to effectively cover all the edge-cases.
+    // We need better separation of logic and responsibilities.
+    while (true) {
+        if (c == 0) {
+            thereIsLeftBlock = true;
+            lScore = b * WORD_SIZE + blockPos + 1;
+            ulScore = lScore - 1;
+        }
+
+        // TODO: improvement: calculate only those cells that are needed,
+        //       for example if I calculate upper cell and can move up,
+        //       there is no need to calculate left and upper left cell
+        //---------- Calculate scores ---------//
+        if (lScore == -1 && thereIsLeftBlock) {
+            lScore = alignData->scores[(c - 1) * maxNumBlocks + b]; // score of block to the left
+            for (int i = 0; i < WORD_SIZE - blockPos - 1; i++) {
+                if (lP & HIGH_BIT_MASK) lScore--;
+                if (lM & HIGH_BIT_MASK) lScore++;
+                lP <<= 1;
+                lM <<= 1;
+            }
+        }
+        if (ulScore == -1) {
+            if (lScore != -1) {
+                ulScore = lScore;
+                if (lP & HIGH_BIT_MASK) ulScore--;
+                if (lM & HIGH_BIT_MASK) ulScore++;
+            }
+            else if (c > 0 && b-1 >= alignData->firstBlocks[c-1] && b-1 <= alignData->lastBlocks[c-1]) {
+                // This is the case when upper left cell is last cell in block,
+                // and block to left is not in band so lScore is -1.
+                ulScore = alignData->scores[(c - 1) * maxNumBlocks + b - 1];
+            }
+        }
+        if (uScore == -1) {
+            uScore = currScore;
+            if (currP & HIGH_BIT_MASK) uScore--;
+            if (currM & HIGH_BIT_MASK) uScore++;
+            currP <<= 1;
+            currM <<= 1;
+        }
+        //-------------------------------------//
+
+        // TODO: should I check if there is upper block?
+
+        //-------------- Move --------------//
+        // Move up - insertion to target - deletion from query
+        if (uScore != -1 && uScore + 1 == currScore) {
+            currScore = uScore;
+            lScore = ulScore;
+            uScore = ulScore = -1;
+            if (blockPos == 0) { // If entering new (upper) block
+                if (b == 0) { // If there are no cells above (only boundary cells)
+                    (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT; // Move up
+                    for (int i = 0; i < c + 1; i++) // Move left until end
+                        (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
+                    break;
+                } else {
+                    blockPos = WORD_SIZE - 1;
+                    b--;
+                    currP = alignData->Ps[c * maxNumBlocks + b];
+                    currM = alignData->Ms[c * maxNumBlocks + b];
+                    if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
+                        thereIsLeftBlock = true;
+                        lP = alignData->Ps[(c - 1) * maxNumBlocks + b]; // TODO: improve this, too many operations
+                        lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
+                    } else {
+                        thereIsLeftBlock = false;
+                        // TODO(martin): There may not be left block, but there can be left boundary - do we
+                        // handle this correctly then? Are l and ul score set correctly? I should check that / refactor this.
+                    }
+                }
+            } else {
+                blockPos--;
+                lP <<= 1;
+                lM <<= 1;
+            }
+            // Mark move
+            (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
+        }
+        // Move left - deletion from target - insertion to query
+        else if (lScore != -1 && lScore + 1 == currScore) {
+            currScore = lScore;
+            uScore = ulScore;
+            lScore = ulScore = -1;
+            c--;
+            if (c == -1) { // If there are no cells to the left (only boundary cells)
+                (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE; // Move left
+                int numUp = b * WORD_SIZE + blockPos + 1;
+                for (int i = 0; i < numUp; i++) // Move up until end
+                    (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
+                break;
+            }
+            currP = lP;
+            currM = lM;
+            if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
+                thereIsLeftBlock = true;
+                lP = alignData->Ps[(c - 1) * maxNumBlocks + b];
+                lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
+            } else {
+                if (c == 0) { // If there are no cells to the left (only boundary cells)
+                    thereIsLeftBlock = true;
+                    lScore = b * WORD_SIZE + blockPos + 1;
+                    ulScore = lScore - 1;
+                } else {
+                    thereIsLeftBlock = false;
+                }
+            }
+            // Mark move
+            (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
+        }
+        // Move up left - (mis)match
+        else if (ulScore != -1) {
+            unsigned char moveCode = ulScore == currScore ? EDLIB_EDOP_MATCH : EDLIB_EDOP_MISMATCH;
+            currScore = ulScore;
+            uScore = lScore = ulScore = -1;
+            c--;
+            if (c == -1) { // If there are no cells to the left (only boundary cells)
+                (*alignment)[(*alignmentLength)++] = moveCode; // Move left
+                int numUp = b * WORD_SIZE + blockPos;
+                for (int i = 0; i < numUp; i++) // Move up until end
+                    (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
+                break;
+            }
+            if (blockPos == 0) { // If entering upper left block
+                if (b == 0) { // If there are no more cells above (only boundary cells)
+                    (*alignment)[(*alignmentLength)++] = moveCode; // Move up left
+                    for (int i = 0; i < c + 1; i++) // Move left until end
+                        (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
+                    break;
+                }
+                blockPos = WORD_SIZE - 1;
+                b--;
+                currP = alignData->Ps[c * maxNumBlocks + b];
+                currM = alignData->Ms[c * maxNumBlocks + b];
+            } else { // If entering left block
+                blockPos--;
+                currP = lP;
+                currM = lM;
+                currP <<= 1;
+                currM <<= 1;
+            }
+            // Set new left block
+            if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
+                thereIsLeftBlock = true;
+                lP = alignData->Ps[(c - 1) * maxNumBlocks + b];
+                lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
+            } else {
+                if (c == 0) { // If there are no cells to the left (only boundary cells)
+                    thereIsLeftBlock = true;
+                    lScore = b * WORD_SIZE + blockPos + 1;
+                    ulScore = lScore - 1;
+                } else {
+                    thereIsLeftBlock = false;
+                }
+            }
+            // Mark move
+            (*alignment)[(*alignmentLength)++] = moveCode;
+        } else {
+            // Reached end - finished!
+            break;
+        }
+        //----------------------------------//
+    }
+
+    *alignment = static_cast<unsigned char*>(realloc(*alignment, (*alignmentLength) * sizeof(unsigned char)));
+    reverse(*alignment, *alignment + (*alignmentLength));
+    return EDLIB_STATUS_OK;
+}
+
+
+/**
+ * Finds one possible alignment that gives optimal score (bestScore).
+ * It will split problem into smaller problems using Hirschberg's algorithm and when they are small enough,
+ * it will solve them using traceback algorithm.
+ * @param [in] query
+ * @param [in] rQuery  Reversed query.
+ * @param [in] queryLength
+ * @param [in] target
+ * @param [in] rTarget  Reversed target.
+ * @param [in] targetLength
+ * @param [in] equalityDefinition
+ * @param [in] alphabetLength
+ * @param [in] bestScore  Best(optimal) score.
+ * @param [out] alignment  Sequence of edit operations that make target equal to query.
+ * @param [out] alignmentLength  Length of alignment.
+ * @return Status code.
+ */
+static int obtainAlignment(
+        const unsigned char* const query, const unsigned char* const rQuery, const int queryLength,
+        const unsigned char* const target, const unsigned char* const rTarget, const int targetLength,
+        const EqualityDefinition& equalityDefinition, const int alphabetLength, const int bestScore,
+        unsigned char** const alignment, int* const alignmentLength) {
+
+    // Handle special case when one of sequences has length of 0.
+    if (queryLength == 0 || targetLength == 0) {
+        *alignmentLength = targetLength + queryLength;
+        *alignment = static_cast<unsigned char*>(malloc((*alignmentLength) * sizeof(unsigned char)));
+        for (int i = 0; i < *alignmentLength; i++) {
+            (*alignment)[i] = queryLength == 0 ? EDLIB_EDOP_DELETE : EDLIB_EDOP_INSERT;
+        }
+        return EDLIB_STATUS_OK;
+    }
+
+    const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
+    const int W = maxNumBlocks * WORD_SIZE - queryLength;
+    int statusCode;
+
+    // TODO: think about reducing number of memory allocations in alignment functions, probably
+    // by sharing some memory that is allocated only once. That refers to: Peq, columns in Hirschberg,
+    // and it could also be done for alignments - we could have one big array for alignment that would be
+    // sparsely populated by each of steps in recursion, and at the end we would just consolidate those results.
+
+    // If estimated memory consumption for traceback algorithm is smaller than 1MB use it,
+    // otherwise use Hirschberg's algorithm. By running few tests I choose boundary of 1MB as optimal.
+    long long alignmentDataSize = (2ll * sizeof(Word) + sizeof(int)) * maxNumBlocks * targetLength
+        + 2ll * sizeof(int) * targetLength;
+    if (alignmentDataSize < 1024 * 1024) {
+        int score_, endLocation_;  // Used only to call function.
+        AlignmentData* alignData = NULL;
+        Word* Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition);
+        myersCalcEditDistanceNW(Peq, W, maxNumBlocks,
+                                queryLength,
+                                target, targetLength,
+                                bestScore,
+                                &score_, &endLocation_, true, &alignData, -1);
+        //assert(score_ == bestScore);
+        //assert(endLocation_ == targetLength - 1);
+
+        statusCode = obtainAlignmentTraceback(queryLength, targetLength,
+                                              bestScore, alignData, alignment, alignmentLength);
+        delete alignData;
+        delete[] Peq;
+    } else {
+        statusCode = obtainAlignmentHirschberg(query, rQuery, queryLength,
+                                               target, rTarget, targetLength,
+                                               equalityDefinition, alphabetLength, bestScore,
+                                               alignment, alignmentLength);
+    }
+    return statusCode;
+}
+
+
+/**
+ * Finds one possible alignment that gives optimal score (bestScore).
+ * Uses Hirschberg's algorithm to split problem into two sub-problems, solve them and combine them together.
+ * @param [in] query
+ * @param [in] rQuery  Reversed query.
+ * @param [in] queryLength
+ * @param [in] target
+ * @param [in] rTarget  Reversed target.
+ * @param [in] targetLength
+ * @param [in] alphabetLength
+ * @param [in] bestScore  Best(optimal) score.
+ * @param [out] alignment  Sequence of edit operations that make target equal to query.
+ * @param [out] alignmentLength  Length of alignment.
+ * @return Status code.
+ */
+static int obtainAlignmentHirschberg(
+        const unsigned char* const query, const unsigned char* const rQuery, const int queryLength,
+        const unsigned char* const target, const unsigned char* const rTarget, const int targetLength,
+        const EqualityDefinition& equalityDefinition, const int alphabetLength, const int bestScore,
+        unsigned char** const alignment, int* const alignmentLength) {
+
+    const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
+    const int W = maxNumBlocks * WORD_SIZE - queryLength;
+
+    Word* Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition);
+    Word* rPeq = buildPeq(alphabetLength, rQuery, queryLength, equalityDefinition);
+
+    // Used only to call functions.
+    int score_, endLocation_;
+
+    // Divide dynamic matrix into two halfs, left and right.
+    const int leftHalfWidth = targetLength / 2;
+    const int rightHalfWidth = targetLength - leftHalfWidth;
+
+    // Calculate left half.
+    AlignmentData* alignDataLeftHalf = NULL;
+    int leftHalfCalcStatus = myersCalcEditDistanceNW(
+            Peq, W, maxNumBlocks, queryLength, target, targetLength, bestScore,
+            &score_, &endLocation_, false, &alignDataLeftHalf, leftHalfWidth - 1);
+
+    // Calculate right half.
+    AlignmentData* alignDataRightHalf = NULL;
+    int rightHalfCalcStatus = myersCalcEditDistanceNW(
+            rPeq, W, maxNumBlocks, queryLength, rTarget, targetLength, bestScore,
+            &score_, &endLocation_, false, &alignDataRightHalf, rightHalfWidth - 1);
+
+    delete[] Peq;
+    delete[] rPeq;
+
+    if (leftHalfCalcStatus == EDLIB_STATUS_ERROR || rightHalfCalcStatus == EDLIB_STATUS_ERROR) {
+        if (alignDataLeftHalf) delete alignDataLeftHalf;
+        if (alignDataRightHalf) delete alignDataRightHalf;
+        return EDLIB_STATUS_ERROR;
+    }
+
+    // Unwrap the left half.
+    int firstBlockIdxLeft = alignDataLeftHalf->firstBlocks[0];
+    int lastBlockIdxLeft = alignDataLeftHalf->lastBlocks[0];
+    // TODO: avoid this allocation by using some shared array?
+    // scoresLeft contains scores from left column, starting with scoresLeftStartIdx row (query index)
+    // and ending with scoresLeftEndIdx row (0-indexed).
+    int scoresLeftLength = (lastBlockIdxLeft - firstBlockIdxLeft + 1) * WORD_SIZE;
+    int* scoresLeft = new int[scoresLeftLength];
+    for (int blockIdx = firstBlockIdxLeft; blockIdx <= lastBlockIdxLeft; blockIdx++) {
+        Block block(alignDataLeftHalf->Ps[blockIdx], alignDataLeftHalf->Ms[blockIdx],
+                    alignDataLeftHalf->scores[blockIdx]);
+        readBlock(block, scoresLeft + (blockIdx - firstBlockIdxLeft) * WORD_SIZE);
+    }
+    int scoresLeftStartIdx = firstBlockIdxLeft * WORD_SIZE;
+    // If last block contains padding, shorten the length of scores for the length of padding.
+    if (lastBlockIdxLeft == maxNumBlocks - 1) {
+        scoresLeftLength -= W;
+    }
+
+    // Unwrap the right half (I also reverse it while unwraping).
+    int firstBlockIdxRight = alignDataRightHalf->firstBlocks[0];
+    int lastBlockIdxRight = alignDataRightHalf->lastBlocks[0];
+    int scoresRightLength = (lastBlockIdxRight - firstBlockIdxRight + 1) * WORD_SIZE;
+    int* scoresRight = new int[scoresRightLength];
+    int* scoresRightOriginalStart = scoresRight;
+    for (int blockIdx = firstBlockIdxRight; blockIdx <= lastBlockIdxRight; blockIdx++) {
+        Block block(alignDataRightHalf->Ps[blockIdx], alignDataRightHalf->Ms[blockIdx],
+                    alignDataRightHalf->scores[blockIdx]);
+        readBlockReverse(block, scoresRight + (lastBlockIdxRight - blockIdx) * WORD_SIZE);
+    }
+    int scoresRightStartIdx = queryLength - (lastBlockIdxRight + 1) * WORD_SIZE;
+    // If there is padding at the beginning of scoresRight (that can happen because of reversing that we do),
+    // move pointer forward to remove the padding (that is why we remember originalStart).
+    if (scoresRightStartIdx < 0) {
+        //assert(scoresRightStartIdx == -1 * W);
+        scoresRight += W;
+        scoresRightStartIdx += W;
+        scoresRightLength -= W;
+    }
+
+    delete alignDataLeftHalf;
+    delete alignDataRightHalf;
+
+    //--------------------- Find the best move ----------------//
+    // Find the query/row index of cell in left column which together with its lower right neighbour
+    // from right column gives the best score (when summed). We also have to consider boundary cells
+    // (those cells at -1 indexes).
+    //  x|
+    //  -+-
+    //   |x
+    int queryIdxLeftStart = max(scoresLeftStartIdx, scoresRightStartIdx - 1);
+    int queryIdxLeftEnd = min(scoresLeftStartIdx + scoresLeftLength - 1,
+                          scoresRightStartIdx + scoresRightLength - 2);
+    int leftScore = -1, rightScore = -1;
+    int queryIdxLeftAlignment = -1;  // Query/row index of cell in left column where alignment is passing through.
+    bool queryIdxLeftAlignmentFound = false;
+    for (int queryIdx = queryIdxLeftStart; queryIdx <= queryIdxLeftEnd; queryIdx++) {
+        leftScore = scoresLeft[queryIdx - scoresLeftStartIdx];
+        rightScore = scoresRight[queryIdx + 1 - scoresRightStartIdx];
+        if (leftScore + rightScore == bestScore) {
+            queryIdxLeftAlignment = queryIdx;
+            queryIdxLeftAlignmentFound = true;
+            break;
+        }
+    }
+    // Check boundary cells.
+    if (!queryIdxLeftAlignmentFound && scoresLeftStartIdx == 0 && scoresRightStartIdx == 0) {
+        leftScore = leftHalfWidth;
+        rightScore = scoresRight[0];
+        if (leftScore + rightScore == bestScore) {
+            queryIdxLeftAlignment = -1;
+            queryIdxLeftAlignmentFound = true;
+        }
+    }
+    if (!queryIdxLeftAlignmentFound && scoresLeftStartIdx + scoresLeftLength == queryLength
+        && scoresRightStartIdx + scoresRightLength == queryLength) {
+        leftScore = scoresLeft[scoresLeftLength - 1];
+        rightScore = rightHalfWidth;
+        if (leftScore + rightScore == bestScore) {
+            queryIdxLeftAlignment = queryLength - 1;
+            queryIdxLeftAlignmentFound = true;
+        }
+    }
+
+    delete[] scoresLeft;
+    delete[] scoresRightOriginalStart;
+
+    if (queryIdxLeftAlignmentFound == false) {
+        // If there was no move that is part of optimal alignment, then there is no such alignment
+        // or given bestScore is not correct!
+        return EDLIB_STATUS_ERROR;
+    }
+    //----------------------------------------------------------//
+
+    // Calculate alignments for upper half of left half (upper left - ul)
+    // and lower half of right half (lower right - lr).
+    const int ulHeight = queryIdxLeftAlignment + 1;
+    const int lrHeight = queryLength - ulHeight;
+    const int ulWidth = leftHalfWidth;
+    const int lrWidth = rightHalfWidth;
+    unsigned char* ulAlignment = NULL; int ulAlignmentLength;
+    int ulStatusCode = obtainAlignment(query, rQuery + lrHeight, ulHeight,
+                                       target, rTarget + lrWidth, ulWidth,
+                                       equalityDefinition, alphabetLength, leftScore,
+                                       &ulAlignment, &ulAlignmentLength);
+    unsigned char* lrAlignment = NULL; int lrAlignmentLength;
+    int lrStatusCode = obtainAlignment(query + ulHeight, rQuery, lrHeight,
+                                       target + ulWidth, rTarget, lrWidth,
+                                       equalityDefinition, alphabetLength, rightScore,
+                                       &lrAlignment, &lrAlignmentLength);
+    if (ulStatusCode == EDLIB_STATUS_ERROR || lrStatusCode == EDLIB_STATUS_ERROR) {
+        if (ulAlignment) free(ulAlignment);
+        if (lrAlignment) free(lrAlignment);
+        return EDLIB_STATUS_ERROR;
+    }
+
+    // Build alignment by concatenating upper left alignment with lower right alignment.
+    *alignmentLength = ulAlignmentLength + lrAlignmentLength;
+    *alignment = static_cast<unsigned char*>(malloc((*alignmentLength) * sizeof(unsigned char)));
+    memcpy(*alignment, ulAlignment, ulAlignmentLength);
+    memcpy(*alignment + ulAlignmentLength, lrAlignment, lrAlignmentLength);
+
+    free(ulAlignment);
+    free(lrAlignment);
+    return EDLIB_STATUS_OK;
+}
+
+
+/**
+ * Takes char query and char target, recognizes alphabet and transforms them into unsigned char sequences
+ * where elements in sequences are not any more letters of alphabet, but their index in alphabet.
+ * Most of internal edlib functions expect such transformed sequences.
+ * This function will allocate queryTransformed and targetTransformed, so make sure to free them when done.
+ * Example:
+ *   Original sequences: "ACT" and "CGT".
+ *   Alphabet would be recognized as "ACTG". Alphabet length = 4.
+ *   Transformed sequences: [0, 1, 2] and [1, 3, 2].
+ * @param [in] queryOriginal
+ * @param [in] queryLength
+ * @param [in] targetOriginal
+ * @param [in] targetLength
+ * @param [out] queryTransformed  It will contain values in range [0, alphabet length - 1].
+ * @param [out] targetTransformed  It will contain values in range [0, alphabet length - 1].
+ * @return  Alphabet as a string of unique characters, where index of each character is its value in transformed
+ *          sequences.
+ */
+static string transformSequences(const char* const queryOriginal, const int queryLength,
+                                 const char* const targetOriginal, const int targetLength,
+                                 unsigned char** const queryTransformed,
+                                 unsigned char** const targetTransformed) {
+    // Alphabet is constructed from letters that are present in sequences.
+    // Each letter is assigned an ordinal number, starting from 0 up to alphabetLength - 1,
+    // and new query and target are created in which letters are replaced with their ordinal numbers.
+    // This query and target are used in all the calculations later.
+    *queryTransformed = static_cast<unsigned char *>(malloc(sizeof(unsigned char) * queryLength));
+    *targetTransformed = static_cast<unsigned char *>(malloc(sizeof(unsigned char) * targetLength));
+
+    string alphabet = "";
+
+    // Alphabet information, it is constructed on fly while transforming sequences.
+    // letterIdx[c] is index of letter c in alphabet.
+    unsigned char letterIdx[MAX_UCHAR + 1];
+    bool inAlphabet[MAX_UCHAR + 1]; // inAlphabet[c] is true if c is in alphabet
+    for (int i = 0; i < MAX_UCHAR + 1; i++) inAlphabet[i] = false;
+
+    for (int i = 0; i < queryLength; i++) {
+        unsigned char c = static_cast<unsigned char>(queryOriginal[i]);
+        if (!inAlphabet[c]) {
+            inAlphabet[c] = true;
+            letterIdx[c] = static_cast<unsigned char>(alphabet.size());
+            alphabet += queryOriginal[i];
+        }
+        (*queryTransformed)[i] = letterIdx[c];
+    }
+    for (int i = 0; i < targetLength; i++) {
+        unsigned char c = static_cast<unsigned char>(targetOriginal[i]);
+        if (!inAlphabet[c]) {
+            inAlphabet[c] = true;
+            letterIdx[c] = static_cast<unsigned char>(alphabet.size());
+            alphabet += targetOriginal[i];
+        }
+        (*targetTransformed)[i] = letterIdx[c];
+    }
+
+    return alphabet;
+}
+
+
+extern "C" EdlibAlignConfig edlibNewAlignConfig(int k, EdlibAlignMode mode, EdlibAlignTask task,
+                                                const EdlibEqualityPair* additionalEqualities,
+                                                int additionalEqualitiesLength) {
+    EdlibAlignConfig config;
+    config.k = k;
+    config.mode = mode;
+    config.task = task;
+    config.additionalEqualities = additionalEqualities;
+    config.additionalEqualitiesLength = additionalEqualitiesLength;
+    return config;
+}
+
+extern "C" EdlibAlignConfig edlibDefaultAlignConfig(void) {
+    return edlibNewAlignConfig(-1, EDLIB_MODE_NW, EDLIB_TASK_DISTANCE, NULL, 0);
+}
+
+extern "C" void edlibFreeAlignResult(EdlibAlignResult result) {
+    if (result.endLocations) free(result.endLocations);
+    if (result.startLocations) free(result.startLocations);
+    if (result.alignment) free(result.alignment);
+}
\ No newline at end of file
diff --git a/dysgu/edlib/src/edlib.h b/dysgu/edlib/src/edlib.h
new file mode 100644
index 0000000..284331b
--- /dev/null
+++ b/dysgu/edlib/src/edlib.h
@@ -0,0 +1,277 @@
+#ifndef EDLIB_H
+#define EDLIB_H
+
+/**
+ * @file
+ * @author Martin Sosic
+ * @brief Main header file, containing all public functions and structures.
+ */
+
+// Define EDLIB_API macro to properly export symbols
+#ifdef EDLIB_SHARED
+#    ifdef _WIN32
+#        ifdef EDLIB_BUILD
+#            define EDLIB_API __declspec(dllexport)
+#        else
+#            define EDLIB_API __declspec(dllimport)
+#        endif
+#    else
+#        define EDLIB_API __attribute__ ((visibility ("default")))
+#    endif
+#else
+#    define EDLIB_API
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Status codes
+#define EDLIB_STATUS_OK 0
+#define EDLIB_STATUS_ERROR 1
+
+    /**
+     * Alignment methods - how should Edlib treat gaps before and after query?
+     */
+    typedef enum {
+        /**
+         * Global method. This is the standard method.
+         * Useful when you want to find out how similar is first sequence to second sequence.
+         */
+        EDLIB_MODE_NW,
+        /**
+         * Prefix method. Similar to global method, but with a small twist - gap at query end is not penalized.
+         * What that means is that deleting elements from the end of second sequence is "free"!
+         * For example, if we had "AACT" and "AACTGGC", edit distance would be 0, because removing "GGC" from the end
+         * of second sequence is "free" and does not count into total edit distance. This method is appropriate
+         * when you want to find out how well first sequence fits at the beginning of second sequence.
+         */
+        EDLIB_MODE_SHW,
+        /**
+         * Infix method. Similar as prefix method, but with one more twist - gaps at query end and start are
+         * not penalized. What that means is that deleting elements from the start and end of second sequence is "free"!
+         * For example, if we had ACT and CGACTGAC, edit distance would be 0, because removing CG from the start
+         * and GAC from the end of second sequence is "free" and does not count into total edit distance.
+         * This method is appropriate when you want to find out how well first sequence fits at any part of
+         * second sequence.
+         * For example, if your second sequence was a long text and your first sequence was a sentence from that text,
+         * but slightly scrambled, you could use this method to discover how scrambled it is and where it fits in
+         * that text. In bioinformatics, this method is appropriate for aligning read to a sequence.
+         */
+        EDLIB_MODE_HW
+    } EdlibAlignMode;
+
+    /**
+     * Alignment tasks - what do you want Edlib to do?
+     */
+    typedef enum {
+        EDLIB_TASK_DISTANCE,  //!< Find edit distance and end locations.
+        EDLIB_TASK_LOC,       //!< Find edit distance, end locations and start locations.
+        EDLIB_TASK_PATH       //!< Find edit distance, end locations and start locations and alignment path.
+    } EdlibAlignTask;
+
+    /**
+     * Describes cigar format.
+     * @see http://samtools.github.io/hts-specs/SAMv1.pdf
+     * @see http://drive5.com/usearch/manual/cigar.html
+     */
+    typedef enum {
+        EDLIB_CIGAR_STANDARD,  //!< Match: 'M', Insertion: 'I', Deletion: 'D', Mismatch: 'M'.
+        EDLIB_CIGAR_EXTENDED   //!< Match: '=', Insertion: 'I', Deletion: 'D', Mismatch: 'X'.
+    } EdlibCigarFormat;
+
+// Edit operations.
+#define EDLIB_EDOP_MATCH 0    //!< Match.
+#define EDLIB_EDOP_INSERT 1   //!< Insertion to target = deletion from query.
+#define EDLIB_EDOP_DELETE 2   //!< Deletion from target = insertion to query.
+#define EDLIB_EDOP_MISMATCH 3 //!< Mismatch.
+
+    /**
+     * @brief Defines two given characters as equal.
+     */
+    typedef struct {
+        char first;
+        char second;
+    } EdlibEqualityPair;
+
+    /**
+     * @brief Configuration object for edlibAlign() function.
+     */
+    typedef struct {
+        /**
+         * Set k to non-negative value to tell edlib that edit distance is not larger than k.
+         * Smaller k can significantly improve speed of computation.
+         * If edit distance is larger than k, edlib will set edit distance to -1.
+         * Set k to negative value and edlib will internally auto-adjust k until score is found.
+         */
+        int k;
+
+        /**
+         * Alignment method.
+         * EDLIB_MODE_NW: global (Needleman-Wunsch)
+         * EDLIB_MODE_SHW: prefix. Gap after query is not penalized.
+         * EDLIB_MODE_HW: infix. Gaps before and after query are not penalized.
+         */
+        EdlibAlignMode mode;
+
+        /**
+         * Alignment task - tells Edlib what to calculate. Less to calculate, faster it is.
+         * EDLIB_TASK_DISTANCE - find edit distance and end locations of optimal alignment paths in target.
+         * EDLIB_TASK_LOC - find edit distance and start and end locations of optimal alignment paths in target.
+         * EDLIB_TASK_PATH - find edit distance, alignment path (and start and end locations of it in target).
+         */
+        EdlibAlignTask task;
+
+        /**
+         * List of pairs of characters, where each pair defines two characters as equal.
+         * This way you can extend edlib's definition of equality (which is that each character is equal only
+         * to itself).
+         * This can be useful if you have some wildcard characters that should match multiple other characters,
+         * or e.g. if you want edlib to be case insensitive.
+         * Can be set to NULL if there are none.
+         */
+        const EdlibEqualityPair* additionalEqualities;
+
+        /**
+         * Number of additional equalities, which is non-negative number.
+         * 0 if there are none.
+         */
+        int additionalEqualitiesLength;
+    } EdlibAlignConfig;
+
+    /**
+     * Helper method for easy construction of configuration object.
+     * @return Configuration object filled with given parameters.
+     */
+    EDLIB_API EdlibAlignConfig edlibNewAlignConfig(
+        int k, EdlibAlignMode mode, EdlibAlignTask task,
+        const EdlibEqualityPair* additionalEqualities,
+        int additionalEqualitiesLength
+    );
+
+    /**
+     * @return Default configuration object, with following defaults:
+     *         k = -1, mode = EDLIB_MODE_NW, task = EDLIB_TASK_DISTANCE, no additional equalities.
+     */
+    EDLIB_API EdlibAlignConfig edlibDefaultAlignConfig(void);
+
+
+    /**
+     * Container for results of alignment done by edlibAlign() function.
+     */
+    typedef struct {
+        /**
+         * EDLIB_STATUS_OK or EDLIB_STATUS_ERROR. If error, all other fields will have undefined values.
+         */
+        int status;
+
+        /**
+         * -1 if k is non-negative and edit distance is larger than k.
+         */
+        int editDistance;
+
+        /**
+         * Array of zero-based positions in target where optimal alignment paths end.
+         * If gap after query is penalized, gap counts as part of query (NW), otherwise not.
+         * Set to NULL if edit distance is larger than k.
+         * If you do not free whole result object using edlibFreeAlignResult(), do not forget to use free().
+         */
+        int* endLocations;
+
+        /**
+         * Array of zero-based positions in target where optimal alignment paths start,
+         * they correspond to endLocations.
+         * If gap before query is penalized, gap counts as part of query (NW), otherwise not.
+         * Set to NULL if not calculated or if edit distance is larger than k.
+         * If you do not free whole result object using edlibFreeAlignResult(), do not forget to use free().
+         */
+        int* startLocations;
+
+        /**
+         * Number of end (and start) locations.
+         */
+        int numLocations;
+
+        /**
+         * Alignment is found for first pair of start and end locations.
+         * Set to NULL if not calculated.
+         * Alignment is sequence of numbers: 0, 1, 2, 3.
+         * 0 stands for match.
+         * 1 stands for insertion to target.
+         * 2 stands for insertion to query.
+         * 3 stands for mismatch.
+         * Alignment aligns query to target from begining of query till end of query.
+         * If gaps are not penalized, they are not in alignment.
+         * If you do not free whole result object using edlibFreeAlignResult(), do not forget to use free().
+         */
+        unsigned char* alignment;
+
+        /**
+         * Length of alignment.
+         */
+        int alignmentLength;
+
+        /**
+         * Number of different characters in query and target together.
+         */
+        int alphabetLength;
+    } EdlibAlignResult;
+
+    /**
+     * Frees memory in EdlibAlignResult that was allocated by edlib.
+     * If you do not use it, make sure to free needed members manually using free().
+     */
+    EDLIB_API void edlibFreeAlignResult(EdlibAlignResult result);
+
+
+    /**
+     * Aligns two sequences (query and target) using edit distance (levenshtein distance).
+     * Through config parameter, this function supports different alignment methods (global, prefix, infix),
+     * as well as different modes of search (tasks).
+     * It always returns edit distance and end locations of optimal alignment in target.
+     * It optionally returns start locations of optimal alignment in target and alignment path,
+     * if you choose appropriate tasks.
+     * @param [in] query  First sequence.
+     * @param [in] queryLength  Number of characters in first sequence.
+     * @param [in] target  Second sequence.
+     * @param [in] targetLength  Number of characters in second sequence.
+     * @param [in] config  Additional alignment parameters, like alignment method and wanted results.
+     * @return  Result of alignment, which can contain edit distance, start and end locations and alignment path.
+     *          Make sure to clean up the object using edlibFreeAlignResult() or by manually freeing needed members.
+     */
+    EDLIB_API EdlibAlignResult edlibAlign(
+        const char* query, int queryLength,
+        const char* target, int targetLength,
+        const EdlibAlignConfig config
+    );
+
+
+    /**
+     * Builds cigar string from given alignment sequence.
+     * @param [in] alignment  Alignment sequence.
+     *     0 stands for match.
+     *     1 stands for insertion to target.
+     *     2 stands for insertion to query.
+     *     3 stands for mismatch.
+     * @param [in] alignmentLength
+     * @param [in] cigarFormat  Cigar will be returned in specified format.
+     * @return Cigar string.
+     *     I stands for insertion.
+     *     D stands for deletion.
+     *     X stands for mismatch. (used only in extended format)
+     *     = stands for match. (used only in extended format)
+     *     M stands for (mis)match. (used only in standard format)
+     *     String is null terminated.
+     *     Needed memory is allocated and given pointer is set to it.
+     *     Do not forget to free it later using free()!
+     */
+    EDLIB_API char* edlibAlignmentToCigar(
+        const unsigned char* alignment, int alignmentLength,
+        EdlibCigarFormat cigarFormat
+    );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // EDLIB_H
\ No newline at end of file
diff --git a/dysgu/extra_metrics.pyx b/dysgu/extra_metrics.pyx
index 5e5a2c6..b5c44c6 100644
--- a/dysgu/extra_metrics.pyx
+++ b/dysgu/extra_metrics.pyx
@@ -262,7 +262,7 @@ cpdef filter_poorly_aligned_ends(spanning_alignments, float divergence=0.02):
     return spanning, 1 - (len(spanning) / len(spanning_alignments))
 
 
-cpdef gap_size_upper_bound(AlignedSegment alignment, int cigarindex, int pos_input, int end_input, int length_extend=15, float divergence=0.02):
+cpdef gap_size_upper_bound(AlignedSegment alignment, int cigarindex, int pos_input, int end_input, int length_extend=15, float divergence=0.02, float len_t=3):
     # expand indel using cigar, merges nearby gaps into the SV event
     cdef int pos, end, l, opp, extent_left, extent_right, candidate_type, candidate_len, len_input, i, dist, dist_thresh, middle, last_seen_size
     cdef uint32_t cigar_value
@@ -276,7 +276,7 @@ cpdef gap_size_upper_bound(AlignedSegment alignment, int cigarindex, int pos_inp
     candidate_type = cigar_value & BAM_CIGAR_MASK
     candidate_len = cigar_value >> BAM_CIGAR_SHIFT
     len_input = candidate_len
-    dist_thresh = min(len_input * 3, <int> (<float> log2_32(1 + candidate_len) / divergence))
+    dist_thresh = min(<int> (len_input * len_t), <int> (<float> log2_32(1 + candidate_len) / divergence))
     i = cigarindex + 1
     dist = 0
     last_seen_size = candidate_len
@@ -308,7 +308,7 @@ cpdef gap_size_upper_bound(AlignedSegment alignment, int cigarindex, int pos_inp
                 else:
                     candidate_len += l
                     extent_right = end
-            dist_thresh = min(l * 3, <int> (<float> log2_32(1 + candidate_len) / divergence))
+            dist_thresh = min(<int> (l * len_t), <int> (<float> log2_32(1 + candidate_len) / divergence))
             dist = 0
         i += 1
 
@@ -342,7 +342,7 @@ cpdef gap_size_upper_bound(AlignedSegment alignment, int cigarindex, int pos_inp
                 else:
                     candidate_len += l
                     extent_left = pos
-            dist_thresh = min(l * 3, <int>( <float>log2_32(1 + candidate_len) / divergence) )
+            dist_thresh = min(<int> (l * len_t), <int>( <float>log2_32(1 + candidate_len) / divergence) )
             dist = 0
         i -= 1
 
diff --git a/dysgu/filter_normals.py b/dysgu/filter_normals.py
index 3bc9afe..7857808 100644
--- a/dysgu/filter_normals.py
+++ b/dysgu/filter_normals.py
@@ -1,5 +1,7 @@
 import random
 from sys import stderr, argv
+
+import numpy as np
 import pysam
 from pysam import CSOFT_CLIP, CHARD_CLIP, CDEL, CINS, CDIFF, CMATCH
 import time
@@ -7,13 +9,16 @@
 import os
 from dysgu.map_set_utils import Py_BasicIntervalTree, is_overlapping
 from dysgu.graph import AlignmentsSA
+from dysgu.extra_metrics import gap_size_upper_bound
+from dysgu.call_component import n_aligned_bases
 import logging
 from collections import defaultdict
 from enum import Enum
 from dysgu.scikitbio._ssw_wrapper import StripedSmithWaterman
 import zlib
-import edlib
+from dysgu.edlib import edlib
 import glob
+from dysgu.map_set_utils import echo
 
 
 random.seed(42)
@@ -113,8 +118,11 @@ def load_samples(args, pths):
 def output_vcf(args, template_vcf):
     hdr = template_vcf.header
     input_command = ' '.join(argv)
-    hdr.add_line('##FILTER=<ID=normal,Description="SV has read support in a normal vcf or alignment file">')
+    hdr.add_line('##FILTER=<ID=normalOverlap,Description="SV overlap in a normal vcf">')
+    hdr.add_line('##FILTER=<ID=normal,Description="SV has read support in a normal alignment file">')
     hdr.add_line('##FILTER=<ID=lowSupport,Description="Not enough supporting evidence / too many nuisance reads">')
+    hdr.add_line('##FILTER=<ID=lowMapQ,Description="Mean MapQ below threshold">')
+    hdr.add_line('##FILTER=<ID=highDivergence,Description="Too many mismatches or small gaps in reads">')
     hdr.add_line(f'##command="{input_command}"')
     return pysam.VariantFile("-" if not args['svs_out'] else args['svs_out'], "w", header=hdr)
 
@@ -181,7 +189,7 @@ def get_sv_type(r, chrom, chrom2):
 def make_interval_tree(args, infile, sample_name, normal_vcfs):
     if not normal_vcfs:
         return None
-    intervals = defaultdict(lambda: Py_BasicIntervalTree())
+    intervals = defaultdict(lambda: defaultdict(lambda: Py_BasicIntervalTree()))  # chrom : SVTYPE : interval
     ignored = defaultdict(int)
     added = 0
     for normal_vcf in normal_vcfs:
@@ -211,18 +219,20 @@ def make_interval_tree(args, infile, sample_name, normal_vcfs):
                 continue
             start = r.start  # note pysam converts to zero-based index like bam
             stop = r.stop
+            svlen = r.info["SVLEN"] if "SVLEN" in r.info else 1000000
             if chrom == chrom2 and stop < start:
                 start = r.stop - 1
                 stop = r.start - 1
             if chrom == chrom2 and abs(stop - start) <= distance:
-                intervals[chrom].add(min(start, stop) - distance, max(start, stop) + distance, idx)
+                intervals[chrom][r.info["SVTYPE"]].add(min(start, stop) - distance, max(start, stop) + distance, svlen)
             else:
                 posB = get_posB(r)
-                intervals[chrom].add(start - distance, start + distance, idx)
-                intervals[chrom2].add(posB - distance, posB + distance, idx)
+                intervals[chrom][r.info["SVTYPE"]].add(start - distance, start + distance, svlen)
+                intervals[chrom2][r.info["SVTYPE"]].add(posB - distance, posB + distance, svlen)
             added += 1
-    for tree in intervals.values():
-        tree.index()
+    for tree_v in intervals.values():
+        for tree in tree_v.values():
+            tree.index()
     if ignored:
         logging.warning(f'Ignored SV types: {ignored}')
     if added == 0:
@@ -392,38 +402,90 @@ def matching_supplementary(aln, infile, posA, posB):
     return False
 
 
-def matching_gap(posA, posB, r, svtype, is_insertion, svlen, pos_threshold=100, span_threshold=0.1):
+def within_read_end_position(event_pos, svtype, cigartuples, cigar_index):
+    if svtype == 1:  # insertion (or other e.g. duplication/inversion within read)
+        return event_pos + 1
+    else:  # deletion type
+        end = event_pos + cigartuples[cigar_index][1]
+        return end
+
+def matching_gap(posA, posB, r, svtype, is_insertion, svlen, pos_threshold=100, span_threshold=0.1, paired_end=True, cipos95=0):
     ct = r.cigartuples
     pos = r.pos
     min_length = svlen * 0.2  # 0.25
     max_distance = svlen * 10
     ins_like = svtype == "DUP" or svtype == "INV"
+    debug = False
+    # debug = True
+    cigar_index = 0
+    # echo(span_threshold)
     for k, l in ct:
         if k == CHARD_CLIP or k == CSOFT_CLIP:
+            cigar_index += 1
             continue
         if k == CMATCH or k == CDIFF:
             pos += l
+            cigar_index += 1
             continue
         if is_insertion and k == CDEL:
             pos += l
+            cigar_index += 1
             continue
         elif not is_insertion and k == CINS:
-            if l > 250 and ins_like:
+
+            span_similarity = min(l, svlen) / max(l, svlen)
+            if l > 250 and ins_like and span_similarity > 0.85 and abs(posA - pos) < max(l, svlen):
+                if debug: echo("ret0", svlen, f"{posA}-{posA + svlen}", f"{pos}-{pos + l}", (k, l))
                 return True
             elif svlen * 2 < l and (abs(posA - pos) < max_distance or (abs(posB - pos + l) < max_distance)):
+                if debug: echo("ret1")
                 return True
+
+            # if l > 100:
+            #     pos_original = pos
+            #     l, pos, _ = gap_size_upper_bound(r, cigar_index, pos, pos+1)
+            #     span_similarity = min(l, svlen) / max(l, svlen)
+            #     if l > 250 and ins_like and span_similarity > 0.85 and abs(posA - pos) < max(l, svlen):
+            #         if debug: echo("ret0b", svlen, f"{posA}-{posA + svlen}", f"{pos}-{pos + l}", (k, l))
+            #         return True
+            #     elif svlen * 2 < l and (abs(posA - pos) < max_distance or (abs(posB - pos + l) < max_distance)):
+            #         if debug: echo("ret1b")
+            #         return True
+            #     if pos_original is not None:
+            #         pos = pos_original
+            #         pos_original = None
+
+            cigar_index += 1
             continue
+
         end = pos + l
         if l > min_length and abs(posA - pos) < max_distance:
             if is_insertion:
-                if span_position_distance(posA, posA + svlen, pos, end, pos_threshold, span_threshold):
+                if min(svlen, l) / max(svlen, l) > span_threshold:
+                    if debug: echo("ret2", f"{posA}-{posA + svlen}", f"{pos}-{end}")
                     return True
             elif span_position_distance(posA, posB, pos, end, pos_threshold, span_threshold):
+                if debug: echo("ret3")
                 return True
+
+        if not paired_end and l > 20 and ((k == CINS and is_insertion) or (k == CDEL and svtype == "DEL")):
+            this_l, this_pos, this_end = gap_size_upper_bound(r, cigar_index, pos, pos + 1)
+            if this_l > min_length and abs(posA - this_pos) < max_distance:
+                if is_insertion:
+                    if min(svlen, this_l) / max(svlen, this_l) > span_threshold:
+                        if debug: echo("ret2b", f"{posA}-{posA + svlen}", f"{this_pos}-{this_end}")
+                        return True
+                    elif cipos95 and min(svlen + cipos95, this_l) / max(svlen + cipos95, this_l) > span_threshold:
+                        return True
+                elif span_position_distance(posA, posB, this_pos, this_end, pos_threshold, span_threshold):
+                    if debug: echo("ret3b")
+                    return True
+
         if k == CDEL:
             pos += l
         if pos > posB + svlen:
             break
+        cigar_index += 1
     return False
 
 
@@ -533,7 +595,7 @@ def cache_nearby_soft_clip(posA, posB, align, join_type, svtype, cached, distanc
 
 def any_nearby_soft_clip(posA, posB, align, join_type, svtype, distance=30, clip_length=3):
     ct = align.cigartuples
-    if svtype == "BND" or svtype == "INV" or svtype == "DUP":  # respect join orientation
+    if svtype == "BND" or svtype == "DUP":  # respect join orientation
         if join_type[0] == "3":
             if clip_position_matches(ct[-1], clip_length, posA, align.reference_end, distance):
                 return True
@@ -549,7 +611,7 @@ def any_nearby_soft_clip(posA, posB, align, join_type, svtype, distance=30, clip
             return True
         if clip_position_matches(ct[0], clip_length, posB, align.pos, distance):
             return True
-    elif svtype == "INS" or svtype == "TRA":  # check all clips
+    elif svtype == "INS" or svtype == "TRA" or svtype == "INV":  # check all clips
         if clip_position_matches(ct[-1], clip_length, posA, align.reference_end, distance):
             return True
         if clip_position_matches(ct[-1], clip_length, posB, align.reference_end, distance):
@@ -567,13 +629,16 @@ def has_clip(r):
     return False
 
 
-def clip_align_matches(seq1, seq2, clip_side, paired_end):
+def clip_align_matches(seq1, seq2, clip_side, paired_end, is_alt_seq=False):
     if not seq1 or not seq2:
         return False
-    min_seq = min(len(seq1), len(seq2))
+    if not is_alt_seq:
+        min_seq = min(len(seq1), len(seq2))
+    else:
+        min_seq = len(seq2)
     if min_seq < 8:
-        if min_seq == len(seq2):
-            return False
+        # if min_seq == len(seq2):
+        #     return False
         if clip_side == SeqType.RIGHT_CLIP:
             seq2_clipped = seq2[int(len(seq2) / 2) + (1 if len(seq2) % 2 == 0 else 0):]
             if seq2_clipped.startswith(seq1):
@@ -588,13 +653,15 @@ def clip_align_matches(seq1, seq2, clip_side, paired_end):
         a = aln(seq2)
         score = a.optimal_alignment_score
         pct = score / (min_seq * 2)
+        # echo(pct, "1")
         if pct > 0.7:
             return True
     else:
         el = edlib.align(seq1, seq2, mode="HW", task="locations")
         if el and el['locations']:
-            pct = el['locations'][0][1] - el['locations'][0][0] / min_seq
-            if pct > 0.7:
+            pct = 1 - (el['editDistance'] / min_seq)
+            # echo(pct, "2", min(len(seq1), len(seq2)), el['editDistance'])
+            if pct > 0.7 and (el['locations'][0][1] - el['locations'][0][0]) / min_seq > 0.7:
                 return True
     if paired_end:
         seq1_e = seq1.encode("ascii")
@@ -611,36 +678,41 @@ def matching_soft_clips(r, reads_with_nearby_soft_clips, pe):
     if not break_seqs.any_seqs:
         return False
     for clip_side, align, target_side in reads_with_nearby_soft_clips:
-        checked = False
         ct = align.cigartuples
+        if break_seqs.alt_seq:
+            if clip_align_matches(get_left_clip(align), break_seqs.alt_seq, SeqType.LEFT_CLIP, pe, True):
+                return True
+            if clip_align_matches(get_right_clip(align), break_seqs.alt_seq, SeqType.RIGHT_CLIP, pe, True):
+                return True
         if target_side == "A":
             if clip_side == SeqType.LEFT_CLIP and break_seqs.left_clip_A:
-                checked = True
                 if clip_align_matches(get_left_clip(align), break_seqs.left_clip_A, SeqType.LEFT_CLIP, pe) or (pe and ct[0][0] == CHARD_CLIP):
                     return True
             elif clip_side == SeqType.RIGHT_CLIP and break_seqs.right_clip_A:
-                checked = True
                 if clip_align_matches(get_right_clip(align), break_seqs.right_clip_A, SeqType.RIGHT_CLIP, pe) or (pe and ct[-1][0] == CHARD_CLIP):
                     return True
         else:
             if clip_side == SeqType.LEFT_CLIP and break_seqs.left_clip_B:
-                checked = True
                 if clip_align_matches(get_left_clip(align), break_seqs.left_clip_B, SeqType.LEFT_CLIP, pe) or (pe and ct[0][0] == CHARD_CLIP):
                     return True
             elif clip_side == SeqType.RIGHT_CLIP and break_seqs.right_clip_B:
-                checked = True
                 if clip_align_matches(get_right_clip(align), break_seqs.right_clip_B, SeqType.RIGHT_CLIP, pe) or (pe and ct[-1][0] == CHARD_CLIP):
                     return True
-        if not checked and break_seqs.alt_seq:
-            if clip_align_matches(get_left_clip(align), break_seqs.alt_seq, SeqType.LEFT_CLIP, pe):
-                return True
-            if clip_align_matches(get_right_clip(align), break_seqs.alt_seq, SeqType.RIGHT_CLIP, pe):
-                return True
+
     return False
 
 
 def has_low_support(r, sample, support_fraction):
-    cov = r.samples[sample]['COV']
+    d = r.samples[sample]
+    if "SVLEN" in r.info and "RPOLY" in r.info and r.info['RPOLY'] > r.info["SVLEN"]:
+        support_fraction *= 2
+    m = max(d['ICN'], d['OCN'])
+    if m == 0:
+        cov = d['COV']
+        if cov == 0:
+            return False
+    else:
+        cov = d['COV'] * ((min(d['ICN'], d['OCN']) / m))
     min_support = round(1.5 + support_fraction * cov)
     if r.info['SU'] < min_support:
         return True
@@ -649,7 +721,14 @@ def has_low_support(r, sample, support_fraction):
 
 def has_low_WR_support(r, sample, support_fraction, n_overlapping, n_mapq0):
     sf = support_fraction / 2
-    cov = r.samples[sample]['COV']
+    d = r.samples[sample]
+    m = max(d['ICN'], d['OCN'])
+    if m == 0:
+        cov = d['COV']
+        if cov == 0:
+            return False
+    else:
+        cov = d['COV'] * ((min(d['ICN'], d['OCN']) / m))
     if n_mapq0 / n_overlapping > 0.3:
         cov = max(cov, n_overlapping)
     min_support = min(4, round(1.5 + sf * cov))
@@ -658,17 +737,40 @@ def has_low_WR_support(r, sample, support_fraction, n_overlapping, n_mapq0):
     return False
 
 
+def has_low_covering_support(covering_reads, n_mapq0):
+    if covering_reads - n_mapq0 <= 2:
+        return True
+
+
 def too_many_clipped_reads(r, clipped_reads, support_fraction):
     sf = support_fraction / 2
-    cov = r.samples[r.samples.keys()[0]]['COV']
-    max_nearby_clipped_reads = round(3 + sf * cov)
-    if clipped_reads > max_nearby_clipped_reads:
+    d = r.samples[r.samples.keys()[0]]
+    # cov = r.samples[r.samples.keys()[0]]['COV']
+    m = max(d['ICN'], d['OCN'])
+    if m == 0:
+        cov = d['COV']
+        if cov == 0:
+            return False
+    else:
+        cov = d['COV'] * ((min(d['ICN'], d['OCN']) / m))
+    # max_nearby_clipped_reads = round(3 + sf * cov)
+    max_nearby_clipped_reads = round(1 + sf * cov)
+    if clipped_reads >= max_nearby_clipped_reads:
         return True
     return False
 
 
-def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad, keep_all, sample,
-                          support_fraction):
+def poorly_aligned_region(sum_edit_distance, sum_n_small_gap, NMbase, n_aligns, fmt, max_divergence=0.08):
+    # Normal-reads error
+    if sum_edit_distance/n_aligns > max_divergence or sum_n_small_gap/n_aligns > max_divergence / 2:
+        return True
+    # Variant error rate > 4 times the normal-reads error rate
+    NMbase = (NMbase / n_aligns) * 100
+    if "NMB" in fmt and fmt["NMB"] > 4 * NMbase:
+        return True
+
+def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad, sample,
+                          support_fraction, max_divergence):
     ct = r.info["CT"]
     chromA = r.chrom
     posA = r.pos
@@ -681,6 +783,14 @@ def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad,
     has_contig = 'CONTIG' in r.info or 'CONTIG2' in r.info or r.alts[0][0] != "<"
     is_paired_end = False
     good_step = good_step_translocation(r, sample)
+    sum_edit_distance = 0
+    sum_n_small_gap = 0
+    n_aligns = 0
+    NMbase = 0
+    # debug = True
+
+    debug = False
+    # debug = True
     for is_paired_end, aln in iterate_bams_single_region(bams, chromA, posA, pad, bam_is_paired_end):
         if is_paired_end:
             distance = 50
@@ -688,36 +798,38 @@ def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad,
             if aln.flag & 12 or (aln.flag & 2 and not has_clip(aln)):
                 continue
             if chromB == infile.get_reference_name(aln.rnext) and is_overlapping(next_start, next_end, aln.pnext - pad, aln.pnext + pad):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if matching_supplementary(aln, infile, posA, posB):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if has_contig:
                 cache_nearby_soft_clip(posA, posB, aln, ct, "TRA", cached, distance, clip_length)
         else:
+            n_aligns += 1
+            a_bases, large_gaps, n_small_gaps = n_aligned_bases(aln)
+            if a_bases > 0:
+                sum_n_small_gap += n_small_gaps / a_bases
+            if aln.has_tag("NM"):
+                if a_bases:
+                    nm = float(aln.get_tag("NM"))
+                    sum_edit_distance += nm / a_bases
+                    NMbase += (nm - large_gaps) / a_bases
             distance = 500
             clip_length = 50
             if aln.flag & 4 or not has_clip(aln):
                 continue
             if matching_supplementary(aln, infile, posA, posB):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                if debug: echo("tra0")
+                return "normal"
             if not good_step and matching_ins_translocation(posA, aln):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                if debug: echo("tra1")
+                return "normal"
             if has_contig:
                 cache_nearby_soft_clip(posA, posB, aln, ct, "TRA", cached, distance, clip_length)
                 if any_nearby_soft_clip(posA, posB, aln, ct, "TRA", 30, clip_length=50):
                     nearby_soft_clips += 1
             elif any_nearby_soft_clip(posA, posB, aln, ct, "TRA", 30, clip_length=250):
-                if keep_all:
-                    r.filter.add("lowSupport")
-                return False
+                if debug: echo("tra2")
+                return "lowSupport"
 
     for is_paired_end, aln in iterate_bams_single_region(bams, chromB, posB, pad, bam_is_paired_end):
         if is_paired_end:
@@ -726,51 +838,55 @@ def process_translocation(r, chromB, posB, bams, infile, bam_is_paired_end, pad,
             if aln.flag & 12 or (aln.flag & 2 and not has_clip(aln)):
                 continue
             if r.chrom == infile.get_reference_name(aln.rnext) and is_overlapping(current_start, current_end, aln.pnext - pad, aln.pnext + pad):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if matching_supplementary(aln, infile, posB, posA):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                return "normal"
             if has_contig:
                 cache_nearby_soft_clip(r.pos, posB, aln, ct, "TRA", cached, distance, clip_length)
         else:
+            n_aligns += 1
+            a_bases, large_gaps, n_small_gaps = n_aligned_bases(aln)
+            if a_bases > 0:
+                sum_n_small_gap += n_small_gaps / a_bases
+            if aln.has_tag("NM"):
+                if a_bases:
+                    nm = float(aln.get_tag("NM"))
+                    sum_edit_distance += nm / a_bases
+                    NMbase += (nm - large_gaps) / a_bases
             distance = 500
             clip_length = 50
             if aln.flag & 4 or not has_clip(aln):
                 continue
             if matching_supplementary(aln, infile, posB, posA):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                if debug: echo("tra3")
+                return "normal"
             if not good_step and matching_ins_translocation(posB, aln):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                if debug: echo("tra4")
+                return "normal"
             if has_contig:
                 cache_nearby_soft_clip(posA, posB, aln, ct, "TRA", cached, distance, clip_length)
                 if any_nearby_soft_clip(r.pos, posB, aln, ct, "TRA", 30, clip_length=50):
                     nearby_soft_clips += 1
             elif any_nearby_soft_clip(posA, posB, aln, ct, "TRA", 30, clip_length=250):
-                if keep_all:
-                    r.filter.add("lowSupport")
-                return False
+                if debug: echo("tra5")
+                return "lowSupport"
 
     if not is_paired_end and too_many_clipped_reads(r, nearby_soft_clips, support_fraction):
-        if keep_all:
-            r.filter.add("lowSupport")
-        return False
+        if debug: echo("tra6")
+        return "lowSupport"
     if cached and matching_soft_clips(r, cached, is_paired_end):
-        if keep_all:
-            r.filter.add("normal")
-        return False
-    return True
+        if debug: echo("tra7")
+        return "normal"
+
+    if not is_paired_end and poorly_aligned_region(sum_edit_distance, sum_n_small_gap, NMbase, n_aligns, r.samples[sample], max_divergence):
+        return "highDivergence"
+    return None
 
 
-def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pad, sample, keep_all):
+def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pad, sample, max_divergence):
     svlen = r.info["SVLEN"]
     svtype = r.info["SVTYPE"]
+    any_contigs = ("CONTIGA" in r.info and r.info["CONTIGA"]) or ("CONTIGB" in r.info and r.info["CONTIGB"])
     is_insertion = svtype == "INS"
     posA = r.pos
     ct = r.info["CT"]
@@ -780,7 +896,24 @@ def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pa
     is_paired_end = False
     n_mapq_0 = 0
     n_overlpapping = 0
+    n_aligns = 0
+    sum_edit_distance = 0
+    sum_n_small_gap = 0
+    NMbase = 0
+
+    debug = False
+    # debug = True
     for is_paired_end, aln in iterate_bams(bams, r.chrom, posA, r.chrom, posB, pad, bam_is_paired_end):
+        n_aligns += 1
+        a_bases, large_gaps, n_small_gaps = n_aligned_bases(aln)
+        if a_bases > 0:
+            sum_n_small_gap += n_small_gaps / a_bases
+        if aln.has_tag("NM"):
+            if a_bases:
+                nm = float(aln.get_tag("NM"))
+                sum_edit_distance += nm / a_bases
+                NMbase += (nm - large_gaps) / a_bases
+
         if is_paired_end:
             a_posA = min(aln.pos, aln.pnext)
             a_posB = max(aln.pos, aln.pnext)
@@ -788,37 +921,32 @@ def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pa
                 continue
             if aln.rname != aln.rnext:
                 if matching_supplementary(aln, infile, posA, posB):
-                    if keep_all:
-                        r.filter.add("normal")
-                    return False
+                    if debug: echo("1")
+                    return "normal"
                 cache_nearby_soft_clip(posA, posB, aln, ct, svtype, cached)
                 continue
             if abs(a_posA - posA) > pad or abs(a_posB - posB) > pad:
                 continue
             if not is_insertion and not aln.flag & 10:  # proper pair, mate unmapped
-                if span_position_distance(posA, posB, a_posA, a_posB, pos_threshold=20, span_threshold=0.5):
-                    if keep_all:
-                        r.filter.add("normal")
-                    return False
+                if span_position_distance(posA, posB, a_posA, a_posB, pos_threshold=20, span_threshold=0.75):
+                    if debug: echo("2")
+                    return "normal"
             if not is_insertion and matching_supplementary(aln, infile, posA, posB):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
-            if svlen < 80 and matching_gap(posA, posB, aln, svtype, is_insertion, svlen):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                if debug: echo("3")
+                return "normal"
+            if svlen < 80 and matching_gap(posA, posB, aln, svtype, is_insertion, svlen, span_threshold=0.75):
+                if debug: echo("4")
+                return "normal"
             cache_nearby_soft_clip(posA, posB, aln, ct, svtype, cached, distance=50, clip_length=3)
 
         else:
-            if matching_gap(posA, posB, aln, svtype, is_insertion, svlen, pos_threshold=30, span_threshold=0.7):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+            cipos95 = r.info["CIPOS95"] if "CIPOS95" in r.info else 0
+            if matching_gap(posA, posB, aln, svtype, is_insertion, svlen, pos_threshold=30, span_threshold=0.75, paired_end=False, cipos95=cipos95):
+                if debug: echo("5", svlen, aln.qname)
+                return "normal"
             if matching_supplementary(aln, infile, posA, posB):
-                if keep_all:
-                    r.filter.add("normal")
-                return False
+                if debug: echo("6")
+                return "normal"
             if is_overlapping(posA, posA + 1, aln.pos, aln.reference_end):
                 n_overlpapping += 1
                 if aln.mapq == 0:
@@ -830,23 +958,22 @@ def process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pa
                 nearby_soft_clips += 1
             cache_nearby_soft_clip(posA, posB, aln, ct, svtype, cached, distance=min(500, svlen * 0.5), clip_length=50)
 
-    if not is_paired_end and not covered:
-        if keep_all:
-            r.filter.add("lowSupport")
-        return False
+    if not is_paired_end and not covered or has_low_covering_support(covered, n_mapq_0):
+        if debug: echo("7")
+        return "lowSupport"
     if not is_paired_end and has_low_WR_support(r, sample, support_fraction, n_overlpapping, n_mapq_0):
-        if keep_all:
-            r.filter.add("lowSupport")
-        return False
-    if not is_paired_end and too_many_clipped_reads(r, nearby_soft_clips, support_fraction):
-        if keep_all:
-            r.filter.add("lowSupport")
-        return False
+        if debug: echo("8")
+        return "lowSupport"
+    if (not is_paired_end or (svtype in "INVTRA" and not any_contigs)) and too_many_clipped_reads(r, nearby_soft_clips, support_fraction):
+        if debug: echo("9")
+        return "lowSupport"
     if cached and matching_soft_clips(r, cached, is_paired_end):
-        if keep_all:
-            r.filter.add("normal")
-        return False
-    return True
+        if debug: echo("10")
+        return "normal"
+    if not is_paired_end and poorly_aligned_region(sum_edit_distance, sum_n_small_gap, NMbase, n_aligns, r.samples[sample], max_divergence):
+        return "highDivergence"
+
+    return None
 
 
 def update_filter_value(r, sample_name, old_filter_values, pass_prob, new_value=""):
@@ -863,6 +990,28 @@ def update_filter_value(r, sample_name, old_filter_values, pass_prob, new_value=
                 r.filter.add(v)
 
 
+def check_for_interval_overlap(intervals, r, chrom_tid, chrom2_tid, posB, filter_results):
+    if not intervals:
+        return False
+
+    svtype = r.info["SVTYPE"]
+    if chrom_tid in intervals and svtype in intervals[chrom_tid]:
+        posA_overlaps = set(intervals[chrom_tid][svtype].allOverlappingIntervals(r.pos, r.pos + 1))
+        if chrom2_tid in intervals and svtype in intervals[chrom2_tid]:
+            posB_overlaps = set(intervals[chrom2_tid][svtype].allOverlappingIntervals(posB, posB + 1))
+            matching_svlens = posA_overlaps.intersection(posB_overlaps)
+            if matching_svlens:
+                for other_svlen in matching_svlens:
+                    if "SVLEN" in r.info:
+                        if r.info["SVLEN"] > 500 or r.info["SVLEN"] < 100:
+                            t = 0.8
+                        else:
+                            t = 0.5
+                        if min(other_svlen, r.info["SVLEN"]) / max(other_svlen, r.info["SVLEN"]) > t:
+                            filter_results['normal'] += 1
+                            return True
+
+
 def run_filtering(args):
     t0 = time.time()
     pths = get_bam_paths(args)
@@ -875,23 +1024,37 @@ def run_filtering(args):
     pad = args["interval_size"]
     keep_all = args["keep_all"]
     min_prob = args['min_prob']
+    min_mapq = args['min_mapq']
     pass_prob = args['pass_prob']
     support_fraction = args['support_fraction']
+    max_divergence = args['max_divergence']
     filter_results = defaultdict(int)
     written = 0
+    samp = list(vcf.header.samples)[0]
+
     for idx, r in enumerate(vcf):
-        # if r.id != "25979":
+        # if r.id != "207736":
         #     continue
+        # echo(dict(r.info))
+        # echo(dict(r.samples[sample_name]))
         old_filter_value = list(r.filter.keys())
         r.filter.clear()
+        mq_pri = r.samples[samp]["MAPQP"]
+        mq_sup = r.samples[samp]["MAPQS"] if "MAPQS" in r.samples[samp] else 0
+        if mq_pri < min_mapq and mq_sup < min_mapq:
+            filter_results['lowMapQ'] += 1
+            if keep_all:
+                update_filter_value(r, old_filter_value, pass_prob, new_value="lowMapQ")
+                out_vcf.write(r)
+            continue
         if min_prob != 0 and 'PROB' in r.samples[sample_name] and r.samples[sample_name]['PROB'] < min_prob:
-            filter_results['dropped, lowProb'] += 1
+            filter_results['lowProb'] += 1
             if keep_all:
                 update_filter_value(r, old_filter_value, pass_prob, new_value="lowProb")
                 out_vcf.write(r)
             continue
         if has_low_support(r, sample_name, support_fraction):
-            filter_results['dropped, low support'] += 1
+            filter_results['lowSupport'] += 1
             if keep_all:
                 update_filter_value(r, sample_name, old_filter_value, pass_prob, new_value="lowSupport")
                 out_vcf.write(r)
@@ -900,17 +1063,13 @@ def run_filtering(args):
         posB = get_posB(r)
         if chrom_tid == -1:
             logging.exception(f'Chromosome name {r.chrom} not found in bam file header')
-        if intervals:
-            if chrom_tid in intervals:
-                posA_overlaps = set(intervals[chrom_tid].allOverlappingIntervals(r.pos, r.pos+1))
-                if chrom2_tid in intervals:
-                    posB_overlaps = set(intervals[chrom2_tid].allOverlappingIntervals(posB, posB + 1))
-                    if posA_overlaps.intersection(posB_overlaps):
-                        filter_results['dropped, normal SV overlap'] += 1
-                        if keep_all:
-                            update_filter_value(r, sample_name, old_filter_value, pass_prob, new_value="normal")
-                            out_vcf.write(r)
-                        continue
+
+        if check_for_interval_overlap(intervals, r, chrom_tid, chrom2_tid, posB, filter_results):
+            if keep_all:
+                update_filter_value(r, sample_name, old_filter_value, pass_prob, new_value="normalOverlap")
+                out_vcf.write(r)
+            continue
+
         if not bams:
             update_filter_value(r, sample_name, old_filter_value, pass_prob)
             out_vcf.write(r)
@@ -918,18 +1077,19 @@ def run_filtering(args):
             continue
         sv_type = get_sv_type(r, chrom_tid, chrom2_tid)
         if sv_type == "TRA" or sv_type == "BND":
-            good = process_translocation(r, r.info["CHR2"], posB, bams, infile, bam_is_paired_end, pad=pad, keep_all=keep_all, sample=sample_name, support_fraction=support_fraction)
+            reason = process_translocation(r, r.info["CHR2"], posB, bams, infile, bam_is_paired_end, pad=pad, sample=sample_name, support_fraction=support_fraction, max_divergence=max_divergence)
         else:
-            good = process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pad=pad, sample=sample_name, keep_all=keep_all)
-        if good:
+            reason = process_intra(r, posB, bams, infile, bam_is_paired_end, support_fraction, pad=pad, sample=sample_name, max_divergence=max_divergence)
+        if reason is None:
             update_filter_value(r, sample_name, old_filter_value, pass_prob)
             out_vcf.write(r)
             written += 1
         else:
             if keep_all:
-                update_filter_value(r, sample_name, old_filter_value, pass_prob, new_value="normal")
+                r.filter.add(reason)
+                update_filter_value(r, sample_name, old_filter_value, pass_prob, new_value=reason)
                 out_vcf.write(r)
-            filter_results['dropped, normal read support'] += 1
+            filter_results[f'{reason}'] += 1
     out_vcf.close()
     logging.info(f'Filter results: {dict(sorted(filter_results.items()))}')
     logging.info("dysgu filter {} complete, n={}, h:m:s, {}".format(args['input_vcf'],
diff --git a/dysgu/find_reads.hpp b/dysgu/find_reads.hpp
index 19e4aa7..7138af1 100644
--- a/dysgu/find_reads.hpp
+++ b/dysgu/find_reads.hpp
@@ -14,6 +14,22 @@
 #include <htslib/hfile.h>
 
 
+void remove_extraneous_tags(bam1_t* src, int remove_extra_tags) {
+    // needed NM, AS, SA, XS, but remove some of the large offenders to save a bit of disk space
+    int i=0;
+    for (auto &t : {"ML", "MM", "MD"}) {
+        uint8_t* data = bam_aux_get(src, t);
+        if (data != nullptr) {
+            bam_aux_del(src, data);
+        }
+        i += 1;
+        if (i >= remove_extra_tags) {
+            break;
+        }
+    }
+}
+
+
 class CoverageTrack
 {
     public:
@@ -107,7 +123,7 @@ int process_alignment(int& current_tid, std::deque<std::pair<uint64_t, bam1_t*>>
                        robin_hood::unordered_set<uint64_t>& read_names, CoverageTrack& cov_track, uint64_t& total,
                        const int n_chromosomes, const int mapq_thresh, const int check_clips, const int min_within_size,
                        sam_hdr_t **samHdr, htsFile **f_out,
-                       std::string& temp_folder, const bool write_all, long *n_aligned_bases) {
+                       std::string& temp_folder, const bool write_all, long *n_aligned_bases, int remove_extra_tags) {
 
     int result;
     bam1_t* aln;
@@ -128,6 +144,9 @@ int process_alignment(int& current_tid, std::deque<std::pair<uint64_t, bam1_t*>>
     // Check if write queue is full
     if (write_queue.size() > max_write_queue) {
         for (const auto& val: write_queue) {
+            if (remove_extra_tags) {
+                remove_extraneous_tags(val, remove_extra_tags);
+            }
             result = sam_write1(*f_out, *samHdr, val);
             if (result < 0) { return -1; }
             total += 1;
@@ -317,7 +336,7 @@ int search_hts_alignments(char* infile, char* outfile, uint32_t min_within_size,
     std::string region_string = region;
 
     long n_aligned_bases = 0;
-
+    int remove_extra_tags = 0;
     // iterate through regions
     if (region_string != ".,") {
 
@@ -342,12 +361,21 @@ int search_hts_alignments(char* infile, char* outfile, uint32_t min_within_size,
 
             // Read alignment into the back of scope queue
             while ( sam_itr_next(fp_in, iter, scope.back().second) >= 0 ) {
-
+                if (total == 0) {
+                    for (auto &t : {"ML", "MM", "MD"}) {
+                        uint8_t* data = bam_aux_get(scope.back().second, t);
+                        if (data != nullptr) {
+                            remove_extra_tags += 1;
+                        } else {
+                            break;
+                        }
+                    }
+                }
                 int success = process_alignment(current_tid, scope, write_queue,
                            max_scope, max_write_queue, clip_length,
                            read_names, cov_track, total,
                            n_chromosomes, mapq_thresh, check_clips, min_within_size,
-                           &samHdr, &f_out, temp_folder, write_all, &n_aligned_bases);
+                           &samHdr, &f_out, temp_folder, write_all, &n_aligned_bases, remove_extra_tags);
                 if (success < 0) {
                     std::cerr << "Failed to process input alignment. Stopping" << region << std::endl;
                     break;
@@ -357,12 +385,21 @@ int search_hts_alignments(char* infile, char* outfile, uint32_t min_within_size,
     } else {
         // iterate whole alignment file (no index needed)
         while (sam_read1(fp_in, samHdr, scope.back().second) >= 0) {
-
+            if (total == 0) {
+                for (auto &t : {"ML", "MM", "MD"}) {
+                    uint8_t* data = bam_aux_get(scope.back().second, t);
+                    if (data != nullptr) {
+                        remove_extra_tags += 1;
+                    } else {
+                        break;
+                    }
+                }
+            }
             int success = process_alignment(current_tid, scope, write_queue,
                        max_scope, max_write_queue, clip_length,
                        read_names, cov_track, total,
                        n_chromosomes, mapq_thresh, check_clips, min_within_size,
-                       &samHdr, &f_out, temp_folder, write_all, &n_aligned_bases);
+                       &samHdr, &f_out, temp_folder, write_all, &n_aligned_bases, remove_extra_tags);
             if (success < 0) {
                 std::cerr << "Failed to process input alignment. Stopping" << region << std::endl;
                 break;
@@ -384,6 +421,9 @@ int search_hts_alignments(char* infile, char* outfile, uint32_t min_within_size,
     }
 
     for (const auto& val: write_queue) {
+        if (remove_extra_tags) {
+            remove_extraneous_tags(val, remove_extra_tags);
+        }
         result = sam_write1(f_out, samHdr, val);
         if (result < 0) { return -1; }
         total += 1;
diff --git a/dysgu/graph.pyx b/dysgu/graph.pyx
index 9a0f720..26d4e1b 100644
--- a/dysgu/graph.pyx
+++ b/dysgu/graph.pyx
@@ -7,7 +7,6 @@ import sortedcontainers
 import cython
 from cpython cimport array
 import array
-import re
 import logging
 from dysgu.map_set_utils cimport unordered_map as robin_map, Py_SimpleGraph
 from dysgu.map_set_utils cimport multimap as cpp_map
@@ -23,7 +22,7 @@ from libcpp.deque cimport deque as cpp_deque
 from libcpp.vector cimport vector
 from libcpp.pair cimport pair as cpp_pair
 from libcpp.unordered_map cimport unordered_map
-from libc.stdint cimport uint16_t, uint32_t, int32_t, uint64_t
+from libc.stdint cimport uint8_t, uint16_t, uint32_t, int32_t, uint64_t
 from libc.stdlib cimport abs as c_abs
 from cython.operator import dereference, postincrement, postdecrement, preincrement, predecrement
 from pysam.libcalignedsegment cimport AlignedSegment
@@ -280,20 +279,21 @@ cdef LocalVal make_local_val(int chrom2, int pos2, int node_name, ReadEnum_t rea
 
 
 cdef class PairedEndScoper:
-    cdef int clst_dist
-    cdef int max_dist
-    cdef int local_chrom
+    cdef int clst_dist, max_dist, local_chrom, max_search_depth
     cdef cpp_map[int, LocalVal] loci  # Track the local breaks and mapping locations
     cdef vector[cpp_map[int, LocalVal]] chrom_scope  # Track the mate-pair breaks and locations
     cdef float norm
-    cdef float thresh
+    cdef float thresh # spd
+    cdef float position_distance_thresh
     cdef bint paired_end
-    def __init__(self, max_dist, clst_dist, n_references, norm, thresh, paired_end):
+    def __init__(self, max_dist, clst_dist, n_references, norm, thresh, paired_end, position_distance_thresh, max_search_depth):
         self.clst_dist = clst_dist
         self.max_dist = max_dist
+        self.max_search_depth = max_search_depth
         self.local_chrom = -1
         self.norm = norm
         self.thresh = thresh
+        self.position_distance_thresh = position_distance_thresh
         self.paired_end = paired_end
         cdef cpp_map[int, LocalVal] scope
         for n in range(n_references + 1):  # Add one for special 'insertion chromosome'
@@ -347,28 +347,26 @@ cdef class PairedEndScoper:
             local_it = forward_scope.lower_bound(pos2)
             steps = 0
             if local_it != forward_scope.end():
-                while steps < 20: #6:
+                while local_it != forward_scope.end() and steps < self.max_search_depth:
                     vitem = dereference(local_it)
+                    preincrement(local_it)
+                    steps += 1
+
                     if (read_enum == DELETION and vitem.second.read_enum == INSERTION) or (read_enum == INSERTION and vitem.second.read_enum == DELETION):
-                        steps += 1
                         continue
                     node_name2 = vitem.second.node_name
                     if node_name2 != node_name:  # Can happen due to within-read events
-                        # if node_name == 77:
-                            # echo("-->", node_name, node_name2, current_chrom == chrom2, current_pos, pos2, pos2 - current_pos, vitem.second.pos2 - vitem.first,
-                            #      is_reciprocal_overlapping(current_pos, pos2, vitem.first, vitem.second.pos2),
-                            #      )
-                            # echo(read_enum, vitem.second.read_enum, read_enum == INSERTION, vitem.second.read_enum == DELETION)
                         if current_chrom != chrom2 or is_reciprocal_overlapping(current_pos, pos2, vitem.first, vitem.second.pos2):
                             sep = c_abs(vitem.first - pos2)
+                            if sep >= self.max_dist:
+                                break
                             sep2 = c_abs(vitem.second.pos2 - current_pos)
-                            if sep < self.max_dist and sep2 < self.max_dist:
-                                if sep < 35:
+                            if vitem.second.chrom2 == chrom2 and sep2 < self.max_dist:
+                                if sep < 150:#35:
                                     if length_from_cigar > 0 and vitem.second.length_from_cigar > 0:
                                         max_span = max(length_from_cigar, vitem.second.length_from_cigar)
                                         span_distance = <float>c_abs(length_from_cigar - vitem.second.length_from_cigar) / max_span
-                                        # echo("hi", node_name2, (length_from_cigar, vitem.second.length_from_cigar), span_distance, self.thresh)
-                                        if span_distance < 0.8:
+                                        if span_distance < self.position_distance_thresh:
                                             found_exact.push_back(node_name2)
                                     else:
                                         found_exact.push_back(node_name2)
@@ -379,54 +377,43 @@ cdef class PairedEndScoper:
                         elif span_position_distance(current_pos, pos2, vitem.first, vitem.second.pos2, self.norm, self.thresh, read_enum, self.paired_end, length_from_cigar, vitem.second.length_from_cigar, trust_ins_len):
                             found2.push_back(node_name2)
 
-                    if sep >= self.max_dist:
-                        break
-                    preincrement(local_it)
-                    steps += 1
-                    if local_it == forward_scope.end():
-                        break
+            if not found_exact.empty():
+                return found_exact
 
-            if found_exact.empty():
-                local_it = forward_scope.lower_bound(pos2)
-                vitem = dereference(local_it)
-                if local_it != forward_scope.begin():
-                    predecrement(local_it)  # Move back one before staring search, otherwise same value is processed twice
-                    steps = 0
-                    while steps < 20: # 6:
-                        vitem = dereference(local_it)
-                        if (read_enum == DELETION and vitem.second.read_enum == INSERTION) or (read_enum == INSERTION and vitem.second.read_enum == DELETION):
-                            steps += 1
-                            continue
-                        node_name2 = vitem.second.node_name
-                        if node_name2 != node_name:
-                            # if node_name == 1:
-                            # echo("2", node_name, node_name2, read_enum) #, is_reciprocal_overlapping(current_pos, pos2, vitem.first, vitem.second.pos2), span_position_distance(current_pos, pos2, vitem.first, vitem.second.pos2, self.norm, self.thresh, read_enum, self.paired_end, length_from_cigar, vitem.second.length_from_cigar))
-                            # if node_name == 77:
-                            #     echo('r', current_pos, pos2, vitem.first, vitem.second.pos2, span_position_distance(current_pos, pos2, vitem.first, vitem.second.pos2, self.norm, self.thresh, read_enum, self.paired_end, length_from_cigar, vitem.second.length_from_cigar, trust_ins_len))
-                            if current_chrom != chrom2 or is_reciprocal_overlapping(current_pos, pos2, vitem.first, vitem.second.pos2):
-                                sep = c_abs(vitem.first - pos2)
-                                sep2 = c_abs(vitem.second.pos2 - current_pos)
-                                if sep < self.max_dist and vitem.second.chrom2 == chrom2 and \
-                                        sep2 < self.max_dist:
-                                    if sep < 35:
-                                        if length_from_cigar > 0 and vitem.second.length_from_cigar > 0:
-                                            max_span = max(length_from_cigar, vitem.second.length_from_cigar)
-                                            span_distance = <float>c_abs(length_from_cigar - vitem.second.length_from_cigar) / max_span
-                                            # echo("hi2", node_name2, (length_from_cigar, vitem.second.length_from_cigar), span_distance, self.thresh)
-                                            if span_distance < 0.8:
-                                                found_exact.push_back(node_name2)
-                                        else:
+            local_it = forward_scope.lower_bound(pos2)
+            vitem = dereference(local_it)
+            if local_it != forward_scope.begin():
+                predecrement(local_it)  # Move back one before staring search, otherwise same value is processed twice
+                steps = 0
+                while local_it != forward_scope.begin() and steps < self.max_search_depth:
+                    vitem = dereference(local_it)
+                    predecrement(local_it)
+                    steps += 1
+                    if (read_enum == DELETION and vitem.second.read_enum == INSERTION) or (read_enum == INSERTION and vitem.second.read_enum == DELETION):
+                        continue
+                    node_name2 = vitem.second.node_name
+                    if node_name2 != node_name:
+                        if current_chrom != chrom2 or is_reciprocal_overlapping(current_pos, pos2, vitem.first, vitem.second.pos2):
+                            sep = c_abs(vitem.first - pos2)
+                            if sep >= self.max_dist:
+                                break
+                            sep2 = c_abs(vitem.second.pos2 - current_pos)
+                            if vitem.second.chrom2 == chrom2 and sep2 < self.max_dist:
+                                if sep < 150:#35:
+                                    if length_from_cigar > 0 and vitem.second.length_from_cigar > 0:
+                                        max_span = max(length_from_cigar, vitem.second.length_from_cigar)
+                                        span_distance = <float>c_abs(length_from_cigar - vitem.second.length_from_cigar) / max_span
+                                        if span_distance < self.position_distance_thresh:
                                             found_exact.push_back(node_name2)
-                                    elif span_position_distance(current_pos, pos2, vitem.first, vitem.second.pos2, self.norm, self.thresh, read_enum, self.paired_end, length_from_cigar, vitem.second.length_from_cigar, trust_ins_len):
-                                        found2.push_back(node_name2)
+                                    else:
+                                        found_exact.push_back(node_name2)
                                 elif span_position_distance(current_pos, pos2, vitem.first, vitem.second.pos2, self.norm, self.thresh, read_enum, self.paired_end, length_from_cigar, vitem.second.length_from_cigar, trust_ins_len):
                                     found2.push_back(node_name2)
                             elif span_position_distance(current_pos, pos2, vitem.first, vitem.second.pos2, self.norm, self.thresh, read_enum, self.paired_end, length_from_cigar, vitem.second.length_from_cigar, trust_ins_len):
                                 found2.push_back(node_name2)
-                        if local_it == forward_scope.begin() or sep >= self.max_dist:
-                            break
-                        predecrement(local_it)
-                        steps += 1
+                        elif span_position_distance(current_pos, pos2, vitem.first, vitem.second.pos2, self.norm, self.thresh, read_enum, self.paired_end, length_from_cigar, vitem.second.length_from_cigar, trust_ins_len):
+                            found2.push_back(node_name2)
+
         if not found_exact.empty():
             return found_exact
         else:
@@ -458,72 +445,94 @@ cdef class PairedEndScoper:
         forward_scope.insert(pp)
 
 
+cdef struct TemplateNode:
+    int query_start
+    int node
+    int flag
+
+
 cdef class TemplateEdges:
-    cdef public unordered_map[string, vector[int]] templates_s
+    cdef public unordered_map[string, vector[TemplateNode]] templates_s  # query_start, node, flag
     def __init__(self):
         pass
     cdef void add(self, str template_name, int flag, int node, int query_start):
-        cdef vector[int] val
         cdef bytes key = bytes(template_name, encoding="utf8")
-        val.push_back(query_start)
-        val.push_back(node)
-        val.push_back(flag)
-        self.templates_s[key].insert(self.templates_s[key].end(), val.begin(), val.end())
+        self.templates_s[key].resize(self.templates_s[key].size() + 1)
+        cdef TemplateNode *t = &self.templates_s[key].back()
+        t.query_start = query_start
+        t.flag = t.flag
+        t.node = node
 
 
 cdef void add_template_edges(Py_SimpleGraph G, TemplateEdges template_edges):
     # this function joins up template reads (read 1, read 2, plus any supplementary)
-    cdef int ii, u_start, v_start, u, v, uflag, vflag
-    cdef unordered_map[string, vector[int]].iterator it = template_edges.templates_s.begin()
-    cdef vector[int] arr
+    cdef int i, j, u_start, v_start, u, v, uflag, vflag, primary1, primary2, n1, n2
+    cdef unordered_map[string, vector[TemplateNode]].iterator it = template_edges.templates_s.begin()
+    cdef vector[TemplateNode] arr
+    cdef TemplateNode *t
     while it != template_edges.templates_s.end():
-        arr = dereference(it).second  # Array values are query start, node-name, flag
+        arr = dereference(it).second
         postincrement(it)
         read1_aligns = []
         read2_aligns = []
-        for ii in range(0, arr.size(), 3):
-            if arr[ii + 2] & 64:  # first in pair
-                read1_aligns.append(arr[ii:ii + 3])
+        for i in range(0, arr.size()):
+            t = &arr[i]
+            if t.flag & 64:  # first in pair
+                read1_aligns.append((t.query_start, t.node, t.flag))
             else:
-                read2_aligns.append(arr[ii:ii + 3])
-        primary1 = None
-        primary2 = None
-        if len(read1_aligns) > 0:
-            if len(read1_aligns) == 1:
+                read2_aligns.append((t.query_start, t.node, t.flag))
+        primary1 = -1
+        primary2 = -1
+        n1 = len(read1_aligns)
+        n2 = len(read2_aligns)
+        if n1 > 0:
+            if n1 == 1:
                 if not read1_aligns[0][2] & 2304:  # Is primary
                     primary1 = read1_aligns[0][1]
             else:
-                if len(read1_aligns) > 2:
-                    read1_aligns = sorted(read1_aligns)
+                if n1 > 2:
+                    read1_aligns.sort()
                 # Add edge between alignments that are neighbors on the query sequence, or between primary alignments
-                for ii in range(len(read1_aligns) - 1):
-                    u_start, u, uflag = read1_aligns[ii]
+                for i in range(n1 - 1):
+                    u_start, u, uflag = read1_aligns[i]
                     if not uflag & 2304:
                         primary1 = u
-                    v_start, v, vflag = read1_aligns[ii + 1]
+                    v_start, v, vflag = read1_aligns[i + 1]
                     if not G.hasEdge(u, v):
                         G.addEdge(u, v, w=1)
-                if primary1 is None:
+                if primary1 == -1:
                     if not read1_aligns[-1][2] & 2304:
                         primary1 = read1_aligns[-1][1]
-        if len(read2_aligns) > 0:
-            if len(read2_aligns) == 1:
+        if n2 > 0:
+            if n2 == 1:
                 if not read2_aligns[0][2] & 2304:
                     primary2 = read2_aligns[0][1]
             else:
-                if len(read2_aligns) > 2:
-                    read2_aligns = sorted(read2_aligns)
-                for ii in range(len(read2_aligns) - 1):
-                    u_start, u, uflag = read2_aligns[ii]
+                if n2 > 2:
+                    read2_aligns.sort()
+                for i in range(n2 - 1):
+                    u_start, u, uflag = read2_aligns[i]
                     if not uflag & 2304:
                         primary2 = u
-                    v_start, v, vflag = read2_aligns[ii + 1]
+                    v_start, v, vflag = read2_aligns[i + 1]
                     if not G.hasEdge(u, v):
                         G.addEdge(u, v, w=1)
-                if primary2 is None:
+                    # Tried this, only edges between ajacent query aligns, not duplicate ones. but hurt performance!
+                    # j = i + 1
+                    # while j < n2:
+                    #     v_start, v, vflag = read2_aligns[j]
+                    #     if v_start != u_start:
+                    #         if not G.hasEdge(u, v):
+                    #             G.addEdge(u, v, w=1)
+                    #             if j < n2 - 1 and read2_aligns[j + 1][0] == v_start:
+                    #                 j += 1
+                    #                 continue
+                    #             break
+                    #     j += 1
+                if primary2 == -1:
                     if not read2_aligns[-1][2] & 2304:
                         primary2 = read2_aligns[-1][1]
-        if primary1 is not None and primary2 is not None:
+        if primary1 >= 0 and primary2 >= 0:
             if not G.hasEdge(primary1, primary2):
                 G.addEdge(primary1, primary2, w=1)
 
@@ -545,35 +554,48 @@ cdef class NodeName:
         self.tell = t
         self.cigar_index = cigar_index
         self.event_pos = event_pos
-    def as_tuple(self):
-        return self.hash_name, self.flag, self.pos, self.chrom, self.tell, self.cigar_index, self.event_pos
+    def as_dict(self):
+        return {"hash_name": self.hash_name,
+                "flag": self.flag,
+                "chrom": self.chrom,
+                "pos": self.pos,
+                "tell": self.tell,
+                "cigar_index": self.cigar_index,
+                "event_pos": self.event_pos,
+                "read_enum": self.read_enum,
+                }
+
+
+cdef struct NodeNameItem:
+    uint64_t hash_val, tell
+    uint32_t pos, event_pos
+    int32_t cigar_index
+    uint16_t flag, chrom
+
 
 cdef class NodeToName:
     # Index these vectors to get the unique 'template_name'
     # node names have the form (hash qname, flag, pos, chrom, tell, cigar index, event pos)
-    cdef vector[uint64_t] h
-    cdef vector[uint16_t] f
-    cdef vector[uint32_t] p
-    cdef vector[uint16_t] c
-    cdef vector[uint64_t] t
-    cdef vector[int32_t] cigar_index
-    cdef vector[uint32_t] event_pos
+    cdef vector[NodeNameItem] stored_nodes
     def __cinit__(self):
         pass
-    cdef void append(self, long a, int b, int c, int d, long e, int f, int g) nogil:
-        self.h.push_back(a)
-        self.f.push_back(b)
-        self.p.push_back(c)
-        self.c.push_back(d)
-        if e == -1:  # means to look in read buffer instead
-            e = 0
-        self.t.push_back(e)
-        self.cigar_index.push_back(f)
-        self.event_pos.push_back(g)
+    cdef void append(self, long hash_val, int flag, int pos, int chrom, long tell, int cigar_index, int event_pos):
+        self.stored_nodes.resize(self.stored_nodes.size() + 1);
+        cdef NodeNameItem *nd = &self.stored_nodes.back();
+        nd.hash_val = hash_val
+        nd.flag = flag
+        nd.pos = pos
+        nd.chrom = chrom
+        if tell == -1: # means to look in read buffer instead
+            tell = 0
+        nd.tell = tell
+        nd.cigar_index = cigar_index
+        nd.event_pos = event_pos
     def __getitem__(self, idx):
-        return NodeName(self.h[idx], self.f[idx], self.p[idx], self.c[idx], self.t[idx], self.cigar_index[idx], self.event_pos[idx])
+        cdef NodeNameItem *nd = &self.stored_nodes[idx]
+        return NodeName(nd.hash_val, nd.flag, nd.pos, nd.chrom, nd.tell, nd.cigar_index, nd.event_pos)
     cdef bint same_template(self, int query_node, int target_node):
-        if self.h[query_node] == self.h[target_node]:
+        if self.stored_nodes[query_node].hash_val == self.stored_nodes[target_node].hash_val:
             return True
 
 cdef get_query_pos_from_cigarstring(cigar, pos):
@@ -599,12 +621,38 @@ cdef get_query_pos_from_cigarstring(cigar, pos):
     return start, end, pos, ref_end
 
 
-def get_query_pos_from_cigartuples(r):
+cdef void parse_cigar(str cigar, int &start, int &end, int &ref_end):
+    cdef:
+        int in_lead = 1
+        int num = 0  # To accumulate the number as we parse through the string
+        char op
+    cdef bytes t = cigar.encode('utf8')
+    cdef char *c_cigar = t  # Convert Python string to char*
+    while dereference(c_cigar):
+        if b'0' <= dereference(c_cigar) <= b'9':
+            # Convert digit char to int and accumulate
+            num = num * 10 + (dereference(c_cigar) - 48 )  # ord(b'0') = 48
+        else:
+            op = dereference(c_cigar)
+            if in_lead and (op == b'S' or op == b'H'):
+                start += num
+                end += num
+            elif op == b'D':
+                ref_end += num
+            elif op == b'I':
+                end += num
+            elif op == b'M' or op == b'=' or op == b'X':
+                end += num
+                ref_end += num
+            num = 0
+            in_lead = 0
+        c_cigar += 1
+
+def get_query_pos_from_cigartuples(r, query_length):
     # Infer the position on the query sequence of the alignment using cigar string
     start = 0
-    query_length = r.infer_read_length()  # Note, this also counts hard-clips
+    # query_length = r.infer_read_length()  # Note, this also counts hard-clips
     end = query_length
-
     if r.cigartuples[0][0] == 4 or r.cigartuples[0][0] == 5:
         start += r.cigartuples[0][1]
     if r.cigartuples[-1][0] == 4 or r.cigartuples[-1][0] == 5:
@@ -613,7 +661,7 @@ def get_query_pos_from_cigartuples(r):
 
 
 AlnBlock = namedtuple("SA", ["query_start", "query_end", "ref_start", "ref_end", "chrom", "mq", "strand", "this"])
-JoinEvent = namedtuple("JE", ["chrom", "event_pos", "chrom2", "pos2", "read_enum", "cigar_index"])
+JoinEvent = namedtuple("JE", ["chrom", "event_pos", "chrom2", "pos2", "query_pos", "query_end", "read_enum", "cigar_index"])
 
 
 class AlignmentsSA:
@@ -623,6 +671,7 @@ class AlignmentsSA:
         self.aln_strand = None
         self.query_aligns = []
         self.join_result = []
+        self.query_length = r.infer_read_length()  # Note, this also counts hard-clips
         self._alignments_from_sa(r, gettid)
 
     def connect_alignments(self, r, max_dist=1000, mq_thresh=0, read_enum=0):
@@ -633,24 +682,29 @@ class AlignmentsSA:
                 self._connect_right(r, max_dist, mq_thresh, read_enum)
 
     def _alignments_from_sa(self, r, gettid):
-        qstart, qend, query_length = get_query_pos_from_cigartuples(r)
+        qstart, qend, query_length = get_query_pos_from_cigartuples(r, self.query_length)
         this_aln = AlnBlock(query_start=qstart, query_end=qend,
                             ref_start=r.pos, ref_end=r.reference_end, chrom=r.rname, mq=r.mapq,
                             strand="-" if r.flag & 16 else "+", this=True)
         self.aln_strand = this_aln.strand
         query_aligns = [this_aln]
+
+        cdef int ref_start, ref_end, query_start, query_end
         for sa_block in r.get_tag("SA").split(";"):
             if sa_block == "":
                 break
+            query_start = 0
+            query_end = 0
             sa = sa_block.split(",", 5)
-            matches = [(int(slen), opp) for slen, opp in re.findall(r'(\d+)([A-Z]{1})', sa[3])]
-            query_start, query_end, ref_start, ref_end = get_query_pos_from_cigarstring(matches, int(sa[1]))
+            ref_start = int(sa[1])
+            ref_end = ref_start
+            parse_cigar(sa[3], query_start, query_end, ref_end)
             if this_aln.strand != sa[2]:
                 start_temp = query_length - query_end
                 query_end = start_temp + query_end - query_start
                 query_start = start_temp
             query_aligns.append(AlnBlock(query_start, query_end, ref_start, ref_end, gettid(sa[0]), int(sa[4]), sa[2], False))
-        query_aligns = sorted(query_aligns)
+        query_aligns.sort()
         cdef int index = 0
         for i, item in enumerate(query_aligns):
             if item.this:
@@ -658,13 +712,16 @@ class AlignmentsSA:
                 break
         self.query_aligns = query_aligns
 
-    def _connect_left(self, r, max_dist, mq_thresh, read_enum): #, max_gap_size=100):
+    def _connect_left(self, r, max_dist, mq_thresh, read_enum):
         a = self.query_aligns[self.index]
         b = self.query_aligns[self.index - 1]
-        # gap = abs(a.query_start - b.query_end)
-        # if gap > max_gap_size:
-        #     return
         event_pos = a.ref_start
+        query_pos = a.query_start
+        query_end = a.query_end
+        if r.is_reverse:
+            qtemp = query_pos
+            query_pos = self.query_length - query_end
+            query_end = self.query_length - qtemp
         chrom = a.chrom
         cigar_index = 0
         if b.strand == self.aln_strand:
@@ -673,21 +730,24 @@ class AlignmentsSA:
             pos2 = b.ref_start
         chrom2 = b.chrom
         if b.mq < mq_thresh:
-            self.join_result.append(JoinEvent(chrom, event_pos, chrom, event_pos, ReadEnum_t.BREAKEND, cigar_index))
+            self.join_result.append(JoinEvent(chrom, event_pos, chrom, event_pos, query_pos, query_end, ReadEnum_t.BREAKEND, cigar_index))
             return
         elif self.paired:
             if not r.flag & 2304 and r.flag & 2 and r.pnext <= event_pos and (chrom != chrom2 or abs(pos2 - event_pos) > max_dist):
-                self.join_result.append(JoinEvent(chrom, event_pos, chrom, event_pos, ReadEnum_t.BREAKEND, cigar_index))
+                self.join_result.append(JoinEvent(chrom, event_pos, chrom, event_pos, query_pos, query_end, ReadEnum_t.BREAKEND, cigar_index))
                 return
-        self.join_result.append(JoinEvent(chrom, event_pos, chrom2, pos2, read_enum, cigar_index))
+        self.join_result.append(JoinEvent(chrom, event_pos, chrom2, pos2, query_pos, query_end, read_enum, cigar_index))
 
-    def _connect_right(self, r, max_dist, mq_thresh, read_enum): #, max_gap_size=100):
+    def _connect_right(self, r, max_dist, mq_thresh, read_enum):
         a = self.query_aligns[self.index]
         b = self.query_aligns[self.index + 1]
-        # gap = abs(b.query_start - a.query_end)
-        # if gap > max_gap_size:
-        #     return
         event_pos = a.ref_end
+        query_pos = a.query_start
+        query_end = a.query_end
+        if r.is_reverse:
+            qtemp = query_pos
+            query_pos = self.query_length - query_end
+            query_end = self.query_length - qtemp
         chrom = a.chrom
         cigar_index = len(r.cigartuples) - 1
         if b.strand == self.aln_strand:
@@ -696,17 +756,17 @@ class AlignmentsSA:
             pos2 = b.ref_end
         chrom2 = b.chrom
         if b.mq < mq_thresh:
-            self.join_result.append(JoinEvent(chrom, event_pos, chrom, event_pos, ReadEnum_t.BREAKEND, cigar_index))
+            self.join_result.append(JoinEvent(chrom, event_pos, chrom, event_pos, query_pos, query_end, ReadEnum_t.BREAKEND, cigar_index))
             return
         elif self.paired:
             # If paired, and SA block fits between two normal primary alignments, and block is not local, then
             # ignore block and try and call insertion
             if not r.flag & 2304 and r.flag & 2 and r.pnext >= event_pos and (chrom != chrom2 or abs(pos2 - event_pos) > max_dist):  # is primary, is proper pair. Use primary mate info, not SA
-                self.join_result.append(JoinEvent(chrom, event_pos, chrom, event_pos, ReadEnum_t.BREAKEND, cigar_index))
+                self.join_result.append(JoinEvent(chrom, event_pos, chrom, event_pos, query_pos, query_end, ReadEnum_t.BREAKEND, cigar_index))
                 return
-        self.join_result.append(JoinEvent(chrom, event_pos, chrom2, pos2, read_enum, cigar_index))
-
+        self.join_result.append(JoinEvent(chrom, event_pos, chrom2, pos2, query_pos, query_end, read_enum, cigar_index))
 
+#
 cdef int cluster_clipped(Py_SimpleGraph G, r, ClipScoper_t clip_scope, chrom, pos, node_name):
     cdef int other_node
     cdef int count = 0
@@ -722,7 +782,7 @@ cdef int cluster_clipped(Py_SimpleGraph G, r, ClipScoper_t clip_scope, chrom, po
 
 cdef void add_to_graph(Py_SimpleGraph G, AlignedSegment r, PairedEndScoper_t pe_scope, TemplateEdges_t template_edges,
                        NodeToName node_to_name, genome_scanner,
-                       int flag, int chrom, tell, int cigar_index, int event_pos,
+                       int flag, int chrom, tell, int cigar_index, int event_pos, int query_pos,
                        int chrom2, int pos2, ClipScoper_t clip_scope, ReadEnum_t read_enum,
                        bint p1_overlaps, bint p2_overlaps, bint mm_only, int clip_l, site_adder,
                        int length_from_cigar, bint trust_ins_len, bint paired_end):
@@ -733,12 +793,8 @@ cdef void add_to_graph(Py_SimpleGraph G, AlignedSegment r, PairedEndScoper_t pe_
     cdef uint64_t v = xxhasher(bam_get_qname(r._delegate), len(r.qname), 42)  # Hash qname to save mem
     cdef int bnd_site_node, bnd_site_node2
     cdef int q_start
-    # echo("\nADDING", node_name, r.qname, (chrom, event_pos), (chrom2, pos2), flag, read_enum)
     node_to_name.append(v, flag, r.pos, chrom, tell, cigar_index, event_pos)  # Index this list to get the template_name
     genome_scanner.add_to_buffer(r, node_name, tell)  # Add read to buffer
-    if read_enum < 2:  # Prevents joining up within-read svs with between-read svs
-        q_start = r.query_alignment_start if not r.flag & 16 else r.infer_query_length() - r.query_alignment_end
-        template_edges.add(r.qname, flag, node_name, q_start)
 
     both_overlap = p1_overlaps and p2_overlaps
     if not paired_end or (paired_end and read_enum != BREAKEND and not mm_only and not chrom2 == -1):
@@ -761,14 +817,19 @@ cdef void add_to_graph(Py_SimpleGraph G, AlignedSegment r, PairedEndScoper_t pe_
         bnd_site_node2 = site_adder.find_nearest_site(chrom2, pos2)
         if bnd_site_node != bnd_site_node2 and bnd_site_node2 >= 0 and not G.hasEdge(node_name, bnd_site_node2):
             G.addEdge(node_name, bnd_site_node2, 0)
+
+    if read_enum < 2:  # Prevents joining up within-read svs with between-read svs
+        template_edges.add(r.qname, flag, node_name, query_pos)
+
     # Debug:
-    # if r.qname == "M03762:232:000000000-L65J4:1:2111:17729:15161":
+    # if r.qname == "cfa54189-601a-4a98-aeca-60ea41d0b931":
     #     echo("---", r.qname, read_enum, node_name, (event_pos, pos2), length_from_cigar, list(other_nodes))
-    # look = {'a4d38568-fd80-4785-8fa5-84ed132b445c', '2313a985-385c-4c84-b02c-dddfc627940b', '0031840a-bd2d-475d-9a04-528f71c7b512'}
+    # look = {129,128 }
+    # if node_name == 25:
     # if r.qname in look:
     # # if r.qname == "D00360:18:H8VC6ADXX:1:1210:7039:44052":
-    #     echo(r.qname, r.flag, node_name, (chrom, event_pos), (chrom2, pos2), list(other_nodes),
-    #          cigar_index, length_from_cigar, "enum=", read_enum)
+    #     echo(r.qname, length_from_cigar, "nodename:", node_name, list(other_nodes), (chrom, event_pos), (chrom2, pos2),
+    #          f"enum={read_enum}")
     # echo(list(other_nodes))
 
 
@@ -867,9 +928,9 @@ cdef void process_alignment(Py_SimpleGraph G, AlignedSegment r, int clip_l, int
     cdef bint good_clip
     if paired_end and read_enum == SPLIT and flag & 8:  # clip event, or whole read, but mate is unmapped
         return
+
     if site_adder:
-        if site_adder:
-            site_adder.add_any_sites(r.rname, event_pos, G, pe_scope, node_to_name, clustering_dist)
+        site_adder.add_any_sites(r.rname, event_pos, G, pe_scope, node_to_name, clustering_dist)
     if paired_end or flag & 1:
         pnext = r.pnext
         rnext = r.rnext
@@ -898,6 +959,7 @@ cdef void process_alignment(Py_SimpleGraph G, AlignedSegment r, int clip_l, int
                     event_pos = j.event_pos
                     chrom2 = j.chrom2
                     pos2 = j.pos2
+                    query_pos = j.query_pos
                     read_enum = j.read_enum
                     cigar_index = j.cigar_index
                     if read_enum == BREAKEND:
@@ -916,7 +978,7 @@ cdef void process_alignment(Py_SimpleGraph G, AlignedSegment r, int clip_l, int
                             pos2 = event_pos
 
                     add_to_graph(G, r, pe_scope, template_edges, node_to_name, genome_scanner, flag, chrom,
-                                 tell, cigar_index, event_pos, chrom2, pos2, clip_scope, read_enum,
+                                 tell, cigar_index, event_pos, query_pos, chrom2, pos2, clip_scope, read_enum,
                                  current_overlaps_roi, next_overlaps_roi,
                                  mm_only, clip_l, site_adder, 0, trust_ins_len, paired_end)
                 return
@@ -935,9 +997,13 @@ cdef void process_alignment(Py_SimpleGraph G, AlignedSegment r, int clip_l, int
                 pos2 = cigar_pos2
             else:
                 pos2 = event_pos
+
         add_to_graph(G, r, pe_scope, template_edges, node_to_name, genome_scanner, flag, chrom,
-                               tell, cigar_index, event_pos, chrom2, pos2, clip_scope, read_enum, current_overlaps_roi, next_overlaps_roi,
-                               mm_only, clip_l, site_adder, length_from_cigar, trust_ins_len, paired_end)
+                     tell, cigar_index, event_pos,
+                     1,
+                     chrom2, pos2, clip_scope, read_enum,
+                     current_overlaps_roi, next_overlaps_roi,
+                     mm_only, clip_l, site_adder, length_from_cigar, trust_ins_len, paired_end)
     ###
     else:  # Single end
         current_overlaps_roi, next_overlaps_roi = False, False  # not supported
@@ -947,21 +1013,28 @@ cdef void process_alignment(Py_SimpleGraph G, AlignedSegment r, int clip_l, int
                 all_aligns.connect_alignments(r, loci_dist, mapq_thresh, read_enum)
                 if not all_aligns.join_result:
                     return
-                # for chrom, event_pos, chrom2, pos2, read_e, cigar_index in all_aligns.join_result:
                 for j in all_aligns.join_result:
                     add_to_graph(G, r, pe_scope, template_edges, node_to_name, genome_scanner, flag, j.chrom,
-                                 tell, j.cigar_index, j.event_pos, j.chrom2, j.pos2, clip_scope, j.read_enum,
+                                 tell, j.cigar_index, j.event_pos,
+                                 j.query_pos,
+                                 j.chrom2, j.pos2, clip_scope, j.read_enum,
                                  current_overlaps_roi, next_overlaps_roi,
-                                 mm_only, clip_l, site_adder, 0, trust_ins_len, paired_end)
-
+                                 mm_only, clip_l, site_adder,
+                                 0,
+                                 trust_ins_len, paired_end)
         elif read_enum >= 2:  # Sv within read or breakend
+            if read_enum == BREAKEND:
+               return
             chrom2 = r.rname
             if cigar_index != -1 and r.cigartuples[cigar_index][0] != 1:  # If not insertion
                 pos2 = cigar_pos2
             else:
                 pos2 = event_pos
+
             add_to_graph(G, r, pe_scope, template_edges, node_to_name, genome_scanner, flag, chrom,
-                         tell, cigar_index, event_pos, chrom2, pos2, clip_scope, read_enum,
+                         tell, cigar_index, event_pos,
+                          1,
+                         chrom2, pos2, clip_scope, read_enum,
                          current_overlaps_roi, next_overlaps_roi, mm_only, clip_l, site_adder, length_from_cigar, trust_ins_len, paired_end)
 
 
@@ -1090,7 +1163,7 @@ cpdef tuple construct_graph(genome_scanner, infile, int max_dist, int clustering
                             int paired_end=1, int read_length=150, bint contigs=True,
                             float norm_thresh=100, float spd_thresh=0.3, bint mm_only=False,
                             sites=None, bint trust_ins_len=True, low_mem=False, temp_dir=".",
-                            find_n_aligned_bases=True):
+                            find_n_aligned_bases=True, position_distance_thresh=0.8, max_search_depth=20):
     logging.info("Building graph with clustering {} bp".format(clustering_dist))
     cdef TemplateEdges_t template_edges = TemplateEdges()  # Edges are added between alignments from same template, after building main graph
     cdef int event_pos, cigar_index, opp, length
@@ -1099,7 +1172,8 @@ cpdef tuple construct_graph(genome_scanner, infile, int max_dist, int clustering
                        minimizer_support_thresh=minimizer_support_thresh,
                        minimizer_breadth=minimizer_breadth, read_length=read_length)
     # Infers long-range connections, outside local scope using pe information
-    cdef PairedEndScoper_t pe_scope = PairedEndScoper(max_dist, clustering_dist, infile.header.nreferences, norm_thresh, spd_thresh, paired_end)
+    cdef PairedEndScoper_t pe_scope = PairedEndScoper(max_dist, clustering_dist, infile.header.nreferences, norm_thresh, spd_thresh, paired_end, position_distance_thresh, max_search_depth)
+
     bad_clip_counter = BadClipCounter(infile.header.nreferences, low_mem, temp_dir)
     cdef Py_SimpleGraph G = map_set_utils.Py_SimpleGraph()
     site_adder = None
@@ -1157,7 +1231,7 @@ cpdef tuple construct_graph(genome_scanner, infile, int max_dist, int clustering
                             events_to_add.push_back(make_cigar_event(opp, cigar_index, event_pos, pos2, length, ReadEnum_t.INSERTION))
                             added = True
                     elif opp == 2:
-                        if length >= min_sv_size:  # elif!
+                        if length >= min_sv_size:
                             pos2 = event_pos + length
                             events_to_add.push_back(make_cigar_event(opp, cigar_index, event_pos, pos2, length, ReadEnum_t.DELETION))
                             added = True
@@ -1283,9 +1357,9 @@ cdef tuple count_support_between(Py_SimpleGraph G, parts):
     cdef tuple t
     cdef unsigned long[:] p
     if len(parts) == 0:
-        return {}, {}
+        return None, None
     elif len(parts) == 1:
-        return {}, {0: parts[0]}
+        return None, {0: parts[0]}
     cdef Py_Int2IntMap p2i = map_set_utils.Py_Int2IntMap()
     for i, p in enumerate(parts):
         for node in p:
@@ -1315,7 +1389,7 @@ cdef tuple count_support_between(Py_SimpleGraph G, parts):
                     if t in seen_t:
                         continue
                     if t not in counts:
-                        counts[t] = [set([]), set([])]
+                        counts[t] = (set([]), set([]))
                     if j < i:
                         counts[t][0].add(child)
                         counts[t][1].add(node)
@@ -1331,7 +1405,8 @@ cdef tuple count_support_between(Py_SimpleGraph G, parts):
                     self_counts[i].append(node)
         seen_t.update(current_t)  # Only count edge once
         for t in current_t:  # save memory by converting support_between to array
-            counts[t] = [np.fromiter(m, dtype="uint32", count=len(m)) for m in counts[t]]
+            counts[t] = tuple(np.fromiter(m, dtype="uint32", count=len(m)) for m in counts[t])
+
     return counts, self_counts
 
 
@@ -1393,6 +1468,24 @@ cpdef break_large_component(Py_SimpleGraph G, component, int min_support):
     return jobs
 
 
+@cython.auto_pickle(True)
+cdef class GraphComponent:
+    cdef public object parts, s_between, reads, s_within, n2n, info
+    def __init__(self, parts, s_between, reads, s_within, n2n, info):
+        self.parts = parts
+        self.s_between = s_between
+        self.reads = reads
+        self.s_within = s_within
+        self.n2n = n2n
+        self.info = info
+    def as_dict(self):
+        return {"parts": self.parts,
+                "s_between": self.s_between,
+                "reads": self.reads,
+                "s_within": self.s_within,
+                "n2n": self.n2n,
+                "info": self.info}
+
 cpdef proc_component(node_to_name, component, read_buffer, infile, Py_SimpleGraph G, int min_support, int procs, int paired_end,
                      sites_index):
     n2n = {}
@@ -1425,39 +1518,31 @@ cpdef proc_component(node_to_name, component, read_buffer, infile, Py_SimpleGrap
     # Explore component for locally interacting nodes; create partitions using these
     partitions = get_partitions(G, component)
     support_between, support_within = count_support_between(G, partitions)
-    if len(support_between) == 0 and len(support_within) == 0:
+
+    if not support_between and not support_within:
         if not paired_end:
             if len(n2n) >= min_support or len(reads) >= min_support or info:
-                d = {"parts": [], "s_between": {}, "reads": reads, "s_within": {}, "n2n": n2n}
-                if info:
-                    d["info"] = info
-                return d
+                return GraphComponent(None, None, None, None, n2n, info)
             else:
                 return
         else:
             # single paired end template can have 3 nodes e.g. two reads plus supplementary
             if min_support == 1 and (len(n2n) >= min_support or len(reads) >= min_support):
-                d = {"parts": [], "s_between": {}, "reads": reads, "s_within": {}, "n2n": n2n}
-                if info:
-                    d["info"] = info
-                return d
+                return GraphComponent(None, None, None, None, n2n, info)
             elif len(reads) >= min_support or info:
-                d = {"parts": [], "s_between": {}, "reads": reads, "s_within": {}, "n2n": n2n}
-                if info:
-                    d["info"] = info
-                return d
+                return GraphComponent(None, None, None, None, n2n, info)
             else:
                 return
     # Debug:
-    # if 157835 in n2n:
+    # if 14 in n2n:
     #     echo("parts", partitions)
     #     echo("s_between", support_between)
     #     echo("s_within", support_within)
-
+    # if len(partitions) > 1:
     # echo("parts", partitions)
     # echo("s_between", support_between)
     # echo("s_within", support_within)
-
+    #
     # echo("n2n", n2n.keys())
     # node_look = set(range(653526, 653532))
     # node_look = set(range(8))
@@ -1466,7 +1551,12 @@ cpdef proc_component(node_to_name, component, read_buffer, infile, Py_SimpleGrap
     #     echo("parts", partitions)
     #     echo("s_between", sb)
     #     echo("s_within", support_within)
-    d = {"parts": partitions, "s_between": support_between, "reads": reads, "s_within": support_within, "n2n": n2n}
-    if info:
-        d["info"] = info
-    return d
+
+    support_between = tuple(support_between.items()) if support_between else None
+    support_within = tuple(support_within.items()) if support_within else None
+    return GraphComponent(partitions,
+                          support_between,
+                          reads,
+                          support_within,
+                          n2n,
+                          info)
diff --git a/dysgu/io_funcs.pyx b/dysgu/io_funcs.pyx
index baa3f7b..9409f60 100644
--- a/dysgu/io_funcs.pyx
+++ b/dysgu/io_funcs.pyx
@@ -129,7 +129,7 @@ cpdef list col_names(small_output):
 
     if small_output:
         return ["chrA", "posA", "chrB", "posB", "sample", "event_id", "grp_id", "n_in_grp", "kind", "type", "svtype", "join_type", "cipos95A", "cipos95B", 'contigA', 'contigB', "svlen", "svlen_precise", "rep", "gc",
-          ["GT", "GQ", "MAPQpri", "su", "spanning", "pe", "supp", "sc", "bnd",
+          ["GT", "GQ", "MAPQpri", "MAPQsupp", "su", "spanning", "pe", "supp", "sc", "bnd", "NMbase",
          "raw_reads_10kb", "neigh10kb", "plus",
                 "minus", "remap_score", "remap_ed", "bad_clip_count", "fcc", "inner_cn", "outer_cn", "prob"]
             ]
@@ -264,7 +264,7 @@ def make_main_record(r, dysgu_version, index, format_f, df_rows, add_kind, small
         info_extras += [f"MeanPROB={round(mean_prob, 3)}", f"MaxPROB={round(max_prob, 3)}"]
 
     if small_output:
-        fmt_keys = "GT:GQ:MAPQP:SU:WR:PE:SR:SC:BND:COV:NEIGH10:PS:MS:RMS:RED:BCC:FCC:ICN:OCN:PROB"
+        fmt_keys = "GT:GQ:MAPQP:MAPQS:SU:WR:PE:SR:SC:BND:NMB:COV:NEIGH10:PS:MS:RMS:RED:BCC:FCC:ICN:OCN:PROB"
     else:
         fmt_keys = "GT:GQ:NMP:NMS:NMB:MAPQP:MAPQS:NP:MAS:SU:WR:PE:SR:SC:BND:SQC:SCW:SQR:BE:COV:MCOV:LNK:NEIGH:NEIGH10:RB:PS:MS:SBT:NG:NSA:NXA:NMU:NDC:RMS:RED:BCC:FCC:STL:RAS:FAS:ICN:OCN:CMP:RR:JIT:PROB"
 
@@ -309,7 +309,7 @@ def make_main_record(r, dysgu_version, index, format_f, df_rows, add_kind, small
 
 def get_fmt(r, small_output):
     if small_output:
-        v = [r["GT"], r["GQ"], r['MAPQpri'], r['su'], r['spanning'], r['pe'], r['supp'], r['sc'], r['bnd'],
+        v = [r["GT"], r["GQ"], r['MAPQpri'], r['MAPQsupp'], r['su'], r['spanning'], r['pe'], r['supp'], r['sc'], r['bnd'], r['NMbase'],
              r['raw_reads_10kb'], r['neigh10kb'], r["plus"], r["minus"], r["remap_score"], r["remap_ed"],
              r["bad_clip_count"], round(r["fcc"], 3), round(r["inner_cn"], 3), round(r["outer_cn"], 3), r['prob']
              ]
diff --git a/dysgu/main.py b/dysgu/main.py
index bf328ab..df75ea9 100644
--- a/dysgu/main.py
+++ b/dysgu/main.py
@@ -5,7 +5,7 @@
 import shutil
 import time
 from multiprocessing import cpu_count
-from subprocess import run
+from subprocess import run, Popen, PIPE
 from importlib.metadata import version
 import warnings
 from dysgu import cluster, view, sv2bam, filter_normals
@@ -189,6 +189,8 @@ def cli():
               type=int, callback=add_option_set)
 @click.option('--dist-norm', help=f"Distance normalizer  [default: {defaults['dist_norm']}]", type=float, callback=add_option_set)
 @click.option('--spd', help="Span position distance", default=0.3, type=float, show_default=True)
+@click.option('--sd', help="Span distance, only SV span is considered, lower values separate multi-allelic sites", default=0.8, type=float, show_default=True)
+@click.option('--search-depth', help="Search through this many local reads for matching SVs. Increase this to identify low frequency events", default=20, type=float, show_default=True)
 @click.option('--trust-ins-len', help=f"Trust insertion length from cigar, for high error rate reads use False  [default: {defaults['trust_ins_len']}]",
               type=str, callback=add_option_set)
 @click.option('--length-extend', help=f"Extend SV length if any nearby gaps found with length >= length-extend. Ignored for paired-end reads", type=int, default=15, show_default=True)
@@ -348,6 +350,8 @@ def get_reads(ctx, **kwargs):
               type=int, callback=add_option_set)
 @click.option('--dist-norm', help=f"Distance normalizer  [default: {defaults['dist_norm']}]", type=float, callback=add_option_set)
 @click.option('--spd', help="Span position distance", default=0.3, type=float, show_default=True)
+@click.option('--sd', help="Span distance, only SV span is considered, lower values separate multi-allelic sites", default=0.8, type=float, show_default=True)
+@click.option('--search-depth', help="Search through this many local reads for matching SVs. Increase this to identify low frequency events", default=20, type=float, show_default=True)
 @click.option('--trust-ins-len', help=f"Trust insertion length from cigar, for high error rate reads use False  [default: {defaults['trust_ins_len']}]",
               type=str, callback=add_option_set)
 @click.option('--length-extend', help=f"Extend SV length if any nearby gaps found with length >= length-extend. Ignored for paired-end reads", type=int, default=15, show_default=True)
@@ -427,6 +431,8 @@ def call_events(ctx, **kwargs):
 @click.option("-p", "--procs", help="Number of processors to use when merging, requires --wd option to be supplied", type=cpu_range, default=1, show_default=True)
 @click.option("-d", "--wd", help="Working directory to use/create when merging", type=click.Path(exists=False), required=False)
 @click.option("-c", "--clean", help="Remove working directory when finished", is_flag=True, flag_value=True, show_default=False, default=False)
+@click.option("--progress", help="Prints detailed progress information",  is_flag=True, flag_value=True, show_default=False, default=False)
+@click.option("--cohort-update", help="Updated this cohort file with new calls from input_files", required=False, type=click.Path(exists=True))
 @click.option("--collapse-nearby", help="Merges more aggressively by collapsing nearby SV",
               default="True", type=click.Choice(["True", "False"]), show_default=True)
 @click.option("--merge-across", help="Merge records across input samples", default="True",
@@ -435,6 +441,8 @@ def call_events(ctx, **kwargs):
               default="False", type=click.Choice(["True", "False"]), show_default=True)
 @click.option("--merge-dist", help="Distance threshold for merging",
               default=500, type=int, show_default=True)
+@click.option("--max-comparisons", help="Compare each event with up to --max-comparisons local SVs",
+              default=20, type=int, show_default=True)
 @click.option("--separate", help="Keep merged tables separate, adds --post-fix to file names, csv format only",
               default="False", type=click.Choice(["True", "False"]), show_default=True)
 @click.option("--post-fix", help="Adds --post-fix to file names, only if --separate is True",
@@ -467,8 +475,10 @@ def view_data(ctx, **kwargs):
 @click.option("--target-sample", help="If input_vcf if multi-sample, use target-sample as input", required=False, type=str, default="")
 @click.option("--keep-all", help="All SVs classified as normal will be kept in the output, labelled as filter=normal", is_flag=True, flag_value=True, show_default=False, default=False)
 @click.option("--ignore-read-groups", help="Ignore ReadGroup RG tags when parsing sample names. Filenames will be used instead", is_flag=True, flag_value=True, show_default=False, default=False)
-@click.option("--min-prob", help="Remove SVs with PROB value < min-prob", default=0.1, type=float, show_default=True)
-@click.option("--pass-prob", help="Re-label SVs as PASS if PROB value >= pass-prob", default=1.0, type=float, show_default=True)
+@click.option("--max-divergence", help="Remove SV if normal_bam displays divergence > max-divergence at same location", default=0.1, type=float, show_default=True)
+@click.option("--min-prob", help="Remove SV with PROB value < min-prob", default=0.1, type=float, show_default=True)
+@click.option("--min-mapq", help="Remove SV with mean mapqq < min-mapq", default=10, type=float, show_default=True)
+@click.option("--pass-prob", help="Re-label SV as PASS if PROB value >= pass-prob", default=1.0, type=float, show_default=True)
 @click.option("--interval-size", help="Interval size for searching normal-vcf/normal-bams", default=1000, type=int, show_default=True)
 @click.option("--random-bam-sample", help="Choose N random normal-bams to search. Use -1 to ignore", default=-1, type=int, show_default=True)
 @click.pass_context
@@ -482,6 +492,8 @@ def filter_normal(ctx, **kwargs):
 
 
 @cli.command("test")
+@click.option("--verbose", help="Show output of test commands",
+              is_flag=True, flag_value=True, show_default=False, default=False)
 @click.pass_context
 def test_command(ctx, **kwargs):
     """Run dysgu tests"""
@@ -531,9 +543,14 @@ def test_command(ctx, **kwargs):
                   "-o " + pwd + '/test.merge.dysgu{}.vcf'.format(dysgu_version)])
     for t in tests:
         c = " ".join(t)
-        v = run(shlex.split(c), shell=False, capture_output=True, check=True)
-        if v.returncode != 0:
-            raise RuntimeError(t, "finished with non zero: {}".format(v))
+        process = Popen(shlex.split(c), stdout=PIPE, stderr=PIPE, text=True)
+        if kwargs["verbose"]:
+            for line in process.stderr:
+                click.echo(line.strip(), err=True)
+        process.wait()
+        if process.returncode != 0:
+            logging.warning(f"WARNING: Command failed with return code {process.returncode}")
         else:
-            click.echo("PASS: " + c, err=True)
+            click.echo("PASS: " + c + "\n", err=True)
+
     logging.info("Run test complete")
diff --git a/dysgu/post_call.py b/dysgu/post_call.py
index b9a0044..d856b25 100644
--- a/dysgu/post_call.py
+++ b/dysgu/post_call.py
@@ -286,9 +286,11 @@ def ref_repetitiveness(events, ref_genome):
         if e.svlen < 150 and e.svtype == "DEL":
             try:
                 ref_seq = ref_genome.fetch(e.chrA, e.posA, e.posB).upper()
-            except ValueError:  # todo out or range, needs fixing
-                continue
-            e.ref_rep = compute_rep(ref_seq)
+                e.ref_rep = compute_rep(ref_seq)
+            except (ValueError, KeyError, IndexError) as errors:
+                # Might be different reference genome version, compared to bam genome
+                logging.warning("Error fetching reference chromosome: {}".format(e.chrA), errors)
+                e.ref_seq = 0
     return events
 
 
diff --git a/dysgu/re_map.py b/dysgu/re_map.py
index 3418a7d..08be124 100644
--- a/dysgu/re_map.py
+++ b/dysgu/re_map.py
@@ -4,7 +4,7 @@
 from dysgu.coverage import merge_intervals
 from dysgu.assembler import compute_rep
 import math
-import edlib
+import dysgu.edlib as edlib
 import logging
 
 
diff --git a/dysgu/sites_utils.py b/dysgu/sites_utils.py
index 8d0f3eb..223c183 100644
--- a/dysgu/sites_utils.py
+++ b/dysgu/sites_utils.py
@@ -65,7 +65,7 @@ def vcf_reader(pth, infile, parse_probs, sample_name, ignore_sample, default_pro
             continue
         if ignore_sample and sample_name in r.samples and "GT" in r.samples[sample_name]:
             this_gt = r.samples[sample_name]['GT']
-            if not (this_gt == "0/0" or this_gt == "0" or this_gt == "." or this_gt == "./." or this_gt == "0|0"):
+            if not (this_gt == (0, 0) or this_gt == "0/0" or this_gt == "0" or this_gt == "." or this_gt == "./." or this_gt == "0|0"):
                 continue
         if "CHROM2" in r.info:
             chrom2_info = r.info["CHROM2"]
@@ -103,6 +103,11 @@ def vcf_reader(pth, infile, parse_probs, sample_name, ignore_sample, default_pro
         if chrom == -1 or chrom2 == -1:
             logging.warning(f"Chromosome from record in --sites not found in input file header CHROM={r.chrom}, POS={r.start}, CHROM2={chrom2_info}, END={r.stop}")
 
+        if isinstance(chrom, str):
+            raise ValueError(f"Could not find {chrom} in bam file header")
+        if isinstance(chrom2, str):
+            chrom2 = chrom
+
         start = r.start  # note pysam converts to zero-based index like bam
         stop = r.stop
         if chrom == chrom2 and stop < start:
@@ -119,7 +124,7 @@ def vcf_reader(pth, infile, parse_probs, sample_name, ignore_sample, default_pro
             else:
                 svlen = int(r.info["SVLEN"])
         else:
-            svlen = None
+            svlen = -1
 
         # is_dysgu = False
         # if "SVMETHOD" in r.info and "DYSGU" in r.info["SVMETHOD"]:
@@ -144,7 +149,6 @@ def vcf_reader(pth, infile, parse_probs, sample_name, ignore_sample, default_pro
             raise ValueError("PROB or MeanPROB must be in a float in range 0-1, error at CHROM {}, POS {}".format(chrom, start))
 
         recs[chrom].append(Site(chrom, start, chrom2, stop, svt, idx, r.id, svlen, pval))
-
     if not_parsed:
         logging.warning("Some records had incompatible SVTYPE: {}".format(
             str({k: v for k, v in Counter(not_parsed).items()})))
@@ -152,7 +156,8 @@ def vcf_reader(pth, infile, parse_probs, sample_name, ignore_sample, default_pro
 
 
 def append_uncalled(df, site_adder, infile, parse_probs):
-
+    if site_adder is None:
+        raise ValueError("Sites was None type")
     # add 0/0 genotype to uncalled variants in --sites
     found_sites = set([i for i in df["site_info"] if i])
     uncalled = []
diff --git a/dysgu/sv_category.pxd b/dysgu/sv_category.pxd
index 105f3d0..7457a6f 100644
--- a/dysgu/sv_category.pxd
+++ b/dysgu/sv_category.pxd
@@ -7,6 +7,7 @@ cdef class AlignmentItem:
         a_len, b_len, query_gap, read_overlaps_mate, size_inferred, query_overlap, inferred_sv_len
     cdef public str svtype, join_type
     cdef public object read_a, read_b
+    cdef public object a_node_info, b_node_info
 
 
 cdef void classify_d(AlignmentItem v)
\ No newline at end of file
diff --git a/dysgu/sv_category.pyx b/dysgu/sv_category.pyx
index 5189413..97199dd 100644
--- a/dysgu/sv_category.pyx
+++ b/dysgu/sv_category.pyx
@@ -6,16 +6,10 @@ from dysgu.map_set_utils import echo
 
 cdef class AlignmentItem:
     """Data holder for classifying alignments into SV types"""
-    # cdef public int chrA, chrB, priA, priB, rA, rB, posA, endA, posB, endB, strandA, strandB, left_clipA, right_clipA,\
-    #     left_clipB, right_clipB, breakA_precise, breakB_precise, breakA, breakB, a_qstart, a_qend, b_qstart, b_qend,\
-    #     a_len, b_len, query_gap, read_overlaps_mate, size_inferred, query_overlap, inferred_sv_len
-    # cdef public str svtype, join_type
-    # cdef public object read_a, read_b
     def __cinit__(self, int chrA, int chrB, int priA, int priB, int rA, int rB, int posA, int endA, int posB, int endB,
                   int strandA, int strandB, int left_clipA, int right_clipA, int left_clipB, int right_clipB,
                   int a_qstart, int a_qend, int b_qstart, int b_qend, int a_len, int b_len,
-                  int read_overlaps_mate, object read_a,
-                  object read_b):
+                  int read_overlaps_mate, read_a, read_b, a_node_info=None, b_node_info=None):
         self.chrA = chrA
         self.chrB = chrB
         self.priA = priA
@@ -41,7 +35,8 @@ cdef class AlignmentItem:
         self.read_overlaps_mate = read_overlaps_mate
         self.read_a = read_a
         self.read_b = read_b
-
+        self.a_node_info = a_node_info
+        self.b_node_info = b_node_info
         # These will be determined
         self.breakA_precise = 0
         self.breakB_precise = 0
@@ -53,6 +48,56 @@ cdef class AlignmentItem:
         self.inferred_sv_len = -1
         self.size_inferred = 0  # set to 1 if insertion size was inferred
 
+# cdef void from_node_info(AlignmentItem v):
+#     a_info = v.a_node_info
+#     b_info = v.b_node_info
+#
+#     v.breakA = v.a_node_info.event_pos
+#     v.breakA_precise = 1 if v.a_node_info.read_enum == 1 else 0
+#     v.breakB = v.b_node_info.event_pos
+#     v.breakB_precise = 1 if v.b_node_info.read_enum == 1 else 0
+#
+#     v.join_type = f"{v.strandA}to{v.strandB}"
+#
+#     cdef int query_gap = 0
+#     cdef int query_overlap = 0
+#     if v.rA == v.rB:  # same read
+#         if v.b_qstart < v.a_qstart:  # B is first on query
+#             query_gap = v.a_qstart - v.b_qend
+#         else:
+#             query_gap = v.b_qstart - v.a_qend
+#         if query_gap < 0:
+#             query_overlap = abs(query_gap)
+#             query_gap = 0
+#
+#     v.query_gap = query_gap
+#     v.query_overlap = query_overlap
+#
+#     if a_info.chrom != b_info.chrom:
+#         v.svtype = "TRA"
+#     else:
+#         if a_info.event_pos < b_info.event_pos:
+#             if a_info.connect_right:
+#                 if not b_info.connect_right:
+#                     v.svtype = "DEL"
+#                 else:
+#                     v.svtype = "INV"
+#             else:
+#                 if not b_info.connect_right:
+#                     v.svtype = "INV"
+#                 else:
+#                     v.svtype = "DUP"
+#         else:
+#             if a_info.connect_right:
+#                 if not b_info.connect_right:
+#                     v.svtype = "DUP"
+#                 else:
+#                     v.svtype = "INV"
+#             else:
+#                 if not b_info.connect_right:
+#                     v.svtype = "INV"
+#                 else:
+#                     v.svtype = "DEL"
 
 cdef void two_primary(AlignmentItem v):
 
@@ -272,7 +317,6 @@ cdef void same_read(AlignmentItem v):
                     v.breakB = v.endB
 
                 v.svtype = "DUP"
-                # v.svtype = "INS"
                 # Check if gap on query is bigger than gap on reference
                 ref_gap = abs(v.breakB - v.breakA)
                 if v.a_qstart > v.b_qstart:  # B is first on query seq
@@ -785,7 +829,6 @@ cdef void translocation(AlignmentItem v):
     v.query_gap = query_gap
     v.query_overlap = query_overlap
 
-
 cdef void classify_d(AlignmentItem v):  #, debug=False):
 
     v.breakA_precise = 0
diff --git a/dysgu/view.py b/dysgu/view.py
index 42eefe6..0c1de06 100644
--- a/dysgu/view.py
+++ b/dysgu/view.py
@@ -13,6 +13,8 @@
 import gzip
 import pysam
 import numpy as np
+from scipy.spatial.distance import cosine
+import networkx as nx
 from sys import stdout
 from heapq import heappop, heappush
 import resource
@@ -373,7 +375,7 @@ def get_names_list(file_list, ignore_csv=True):
     return seen_names, names_list
 
 
-def process_file_list(args, file_list, seen_names, names_list, log_messages):
+def process_file_list(args, file_list, seen_names, names_list, log_messages, show_progress=False, job_id=None):
     dfs = []
     header = None
     contig_names = None
@@ -398,10 +400,14 @@ def process_file_list(args, file_list, seen_names, names_list, log_messages):
         df = mung_df(df, args)
         if args["merge_within"] == "True":
             l_before = len(df)
+            if show_progress and job_id:
+                logging.info("{} started, records {}".format(job_id, l_before))
             df = merge_df(df, 1, args["merge_dist"], {}, merge_within_sample=True,
                           aggressive=args['collapse_nearby'] == "True", log_messages=log_messages)
             if log_messages:
                 logging.info("{} rows before merge-within {}, rows after {}".format(name, l_before, len(df)))
+            elif show_progress and job_id:
+                logging.info("{} rows before merge-within {}, rows after {}".format(job_id, l_before, len(df)))
         if "partners" in df:
             del df["partners"]
         dfs.append(df)
@@ -416,10 +422,15 @@ def process_file_list(args, file_list, seen_names, names_list, log_messages):
 
     outfile = open_outfile(args, names_list, log_messages=log_messages)
     if args["out_format"] == "vcf":
+        lens_before = list(map(len, dfs))
+        if show_progress and job_id:
+            logging.info("{} started, records {}".format(job_id, lens_before))
         count = io_funcs.to_vcf(df, args, seen_names, outfile, header=header, small_output_f=True,
                                 contig_names=contig_names, show_names=log_messages)
         if log_messages:
-            logging.info("Sample rows before merge {}, rows after {}".format(list(map(len, dfs)), count))
+            logging.info("Sample rows before merge {}, rows after {}".format(lens_before, count))
+        elif show_progress and job_id:
+            logging.info("{} rows after {}".format(job_id, count))
     else:
         to_csv(df, args, outfile, small_output=False)
 
@@ -444,7 +455,7 @@ def write(self, r):
         self.vcf.write(str(r))
 
 
-def shard_job(wd, item_path, name, Global):
+def shard_job(wd, item_path, name, Global, show_progress):
     shards = {}
     vcf = pysam.VariantFile(item_path, 'r')
     contigs = len(vcf.header.contigs)
@@ -473,6 +484,8 @@ def shard_job(wd, item_path, name, Global):
     for v in shards.values():
         v.close()
     vcf.close()
+    if show_progress:
+        logging.info(f"Finished splitting {item_path}")
     return list(shards.keys()), rows, name
 
 
@@ -545,7 +558,7 @@ def sort_into_single_file(out_f, vcf_header, file_paths_to_combine, sample_list)
     return written
 
 
-def shard_data(args, input_files, Global):
+def shard_data(args, input_files, Global, show_progress):
     logging.info(f"Merge distance: {args['merge_dist']} bp")
     out_f = args['svs_out'] if args['svs_out'] else '-'
     logging.info("SVs output to {}".format(out_f if out_f != '-' else 'stdout'))
@@ -554,9 +567,11 @@ def shard_data(args, input_files, Global):
     seen_names, names_list = get_names_list(input_files, ignore_csv=False)
 
     # Split files into shards
+    if show_progress:
+        logging.info("Splitting files into shards")
     job_args = []
     for name, item in zip(names_list, input_files):
-        job_args.append((args['wd'], item, name, Global))
+        job_args.append((args['wd'], item, name, Global, show_progress))
     pool = multiprocessing.Pool(args['procs'])
     results = pool.starmap(shard_job, job_args)
     input_rows = {}
@@ -572,6 +587,8 @@ def shard_data(args, input_files, Global):
     resource.setrlimit(resource.RLIMIT_NOFILE, (min(hard, max(Global.soft, soft) * len(input_files)), hard))
 
     # Process shards
+    if show_progress:
+        logging.info("Processing shards")
     job_args2 = []
     needed_args = {k: args[k] for k in ["wd", "metrics", "merge_within", "merge_dist", "collapse_nearby", "merge_across", "out_format", "separate", "verbosity", "add_kind"]}
     merged_outputs = []
@@ -584,7 +601,7 @@ def shard_data(args, input_files, Global):
         fout = os.path.join(args['wd'], block_id + '_merged.vcf')
         merged_outputs.append(fout)
         target_args["svs_out"] = fout
-        job_args2.append((target_args, file_targets, seen_names, names, False))
+        job_args2.append((target_args, file_targets, seen_names, names, False, show_progress, block_id))
 
     if args['procs'] > 1:
         pool.starmap(process_file_list, job_args2)
@@ -616,6 +633,191 @@ def shard_data(args, input_files, Global):
             os.remove(item)
 
 
+def get_cosine_similarity(r_format_array, candidates, samp, target_keys):
+    res = []
+    for index, c in candidates:
+        fmt = c.samples[samp]
+        vals2 = np.array([fmt[k] if k in fmt and fmt[k] is not None else 0 for k in target_keys]) + 1e-4
+        cs = 1 - abs(cosine(r_format_array, vals2))
+        res.append((index, c, cs))
+    return res
+
+
+def find_similar_candidates(current_cohort_file, variant_table, samp):
+    tot = 0
+    G = nx.Graph()
+    cached = {}
+    cohort_index = 0
+    for r in current_cohort_file.fetch():
+        tot += 1
+        if not r.chrom in variant_table:
+            cohort_index += 1
+            continue
+        if not r.samples[samp]:
+            cohort_index += 1
+            continue
+        t_idx = variant_table[r.chrom].bisect_left(r)
+        if t_idx >= len(variant_table[r.chrom]):
+            t_idx = 0 if t_idx == 0 else t_idx - 1
+        while t_idx > 0 and variant_table[r.chrom][t_idx].pos > r.pos - 150:
+            t_idx -= 1
+        candidates = []
+        for i in range(t_idx, len(variant_table[r.chrom])):
+            vr = variant_table[r.chrom][i]
+            if abs(vr.pos - r.pos) > 250 and vr.pos > r.pos:
+                break
+            if vr.info["SVTYPE"] != r.info["SVTYPE"]:
+                continue
+            elif vr.info["SVTYPE"] == "TRA":
+                if vr.info["CHR2"] != r.info["CHR2"]:
+                    continue
+                if abs(vr.info["CHR2_POS"] < r.info["CHR2_POS"]) > 250:
+                    continue
+
+            candidates.append((i, vr))
+
+        if len(candidates):
+            # NMB is skipped for older versions of dysgu
+            numeric = {k: v if v is not None else 0 for k, v in r.samples[samp].items() if k != "GT" and k != "NMB"}
+            target_keys = numeric.keys()
+            r_format_array = np.array(list(numeric.values())) + 1e-4
+            candidates = get_cosine_similarity(r_format_array, candidates, samp, target_keys)
+            for index, c, cs_similarity in candidates:
+                cached[(index, r.chrom)] = c
+                G.add_edge(cohort_index, (index, r.chrom), weight=cs_similarity)
+
+        cohort_index += 1
+
+    # Find single best u out-edge and single best v out-edge
+    matching_edges_u = {}
+    matching_edges_v = {}
+    for u in G.nodes():
+        if isinstance(u, tuple):
+            continue
+        best = None
+        for v in G.neighbors(u):
+            if not isinstance(v, tuple):
+                continue
+            w = G[u][v]["weight"]
+            if not best or w > best[1]:
+                best = v, w
+        if best:
+            if best[0] not in matching_edges_v:
+                matching_edges_v[best[0]] = (u, best[1])
+            elif best[1] >= matching_edges_v[best[0]][1]:
+                del matching_edges_u[ matching_edges_v[best[0]][0] ]
+                matching_edges_v[best[0]] = (u, best[1])
+            else:
+                continue
+            if u not in matching_edges_u:
+                matching_edges_u[u] = best
+            elif best[1] >= matching_edges_u[u][1]:  # update u edge
+                del matching_edges_v[ matching_edges_u[u][0] ]
+                matching_edges_u[u] = best
+
+    matches = {}
+    for u, (v, w) in matching_edges_u.items():
+        if w != 1:
+            matches[u] = cached[v]
+
+    return tot, matches
+
+
+def update_cohort_only(args):
+
+    cohort = pysam.VariantFile(args["cohort_update"])
+    cohort_samples = set(cohort.header.samples)
+    header = cohort.header
+    input_command = ' '.join(sys.argv)
+    header.add_line(f'##command="{input_command}"')
+    if args["svs_out"] == "-" or args["svs_out"] is None:
+        logging.info("SVs output to stdout")
+        args["svs_out"] = "-"
+    else:
+        logging.info("SVs output to {}".format(args["svs_out"]))
+    outfile = pysam.VariantFile(args["svs_out"], "w", header=header)
+
+    samples = {}
+    samps_counts = defaultdict(int)
+    for pth in args["input_files"]:
+        v = pysam.VariantFile(pth)
+        samps = list(v.header.samples)
+        if len(samps) > 1:
+            raise ValueError(f"Only one sample supported per input file but {pth} has {list(samps)}")
+        samp = samps[0]
+        samps_counts[samp] += 1
+        if samp in samples:
+            logging.warning(f"{samp} already in samples list, {samp} is assumed to be {samp}_{samps_counts[samp]} in cohort file")
+            samp = f"{samp}_{samps_counts[samp]}"
+        if samp not in cohort_samples:
+            raise ValueError(f"Input file has sample name {samp}, but this was not found in the cohort file {args['cohort_update']}")
+        samples[samp] = v
+
+    # Go through each input sample and add to cohort
+    sample_index = 0
+    current_cohort_file = cohort
+    for samp, samp_file in samples.items():
+        variant_table = defaultdict(lambda: sortedcontainers.SortedKeyList([], key=lambda x: x.pos - 25))
+        if sample_index == len(samples) - 1:
+            current_out_file = outfile
+        else:
+            current_out_file = pysam.VariantFile(f"{args['wd']}/merge_sample_idx_{sample_index}.vcf", "w", header=header)
+        if sample_index > 0:
+            try:
+                os.remove(f"{args['wd']}/merge_sample_idx_{sample_index - 1}.vcf")
+            except OSError:
+                pass
+        for r in samp_file.fetch():
+            variant_table[r.chrom].add(r)
+
+        n_updated = 0
+        # First pass, find matching SVs
+        tot, matchings = find_similar_candidates(current_cohort_file, variant_table, samp)
+
+        # Second pass update matching SVs
+        cohort_index = 0
+        for r in current_cohort_file.fetch():
+            if cohort_index not in matchings:
+                current_out_file.write(r)
+                cohort_index += 1
+                continue
+            vr = matchings[cohort_index]
+            cohort_index += 1
+            updated = False
+            ch_fmt = dict(r.samples[samp].items())
+            for k, v in vr.samples[samp].items():
+                if k not in ch_fmt or ch_fmt[k] is None:
+                    continue
+                elif isinstance(v, float):
+                    if int(v * 100) != int(ch_fmt[k] * 100):
+                        ch_fmt[k] = v
+                        updated = True
+                elif v != r.samples[samp][k]:
+                    ch_fmt[k] = v
+                    updated = True
+            if updated:
+                for k, v in ch_fmt.items():
+                    r.samples[samp][k] = v
+                probs = []
+                for s in cohort_samples:
+                    probs.append(r.samples[s]["PROB"])
+                mean = sum(probs) / len(probs)
+                max_prob = max(probs)
+                r.info["MeanPROB"] = mean
+                r.info["MaxPROB"] = max_prob
+                n_updated += 1
+
+            current_out_file.write(r)
+
+        logging.info(f"{sample_index + 1}/{len(samples)} - {samp}, updated: {n_updated}/{tot}")
+        current_out_file.close()
+        if sample_index != len(samples) - 1:
+            current_cohort_file = pysam.VariantFile(f"{args['wd']}/merge_sample_idx_{sample_index}.vcf")
+            sample_index += 1
+
+    return
+
+
 def view_file(args):
 
     t0 = time.time()
@@ -630,22 +832,31 @@ def view_file(args):
         if not all(os.path.splitext(i)[1] == ".csv" for i in args["input_files"]):
             raise ValueError("All input files must have .csv extension")
 
-    args["metrics"] = False  # only option supported so far
-    args["contigs"] = False
+    if args["cohort_update"]:
+        for opt, v in (("out_format", "vcf"), ("merge_within", "False"), ("merge_across", "True"), ("add_kind", "False"), ("separate", "False")):
+            if args[opt] != v:
+                raise ValueError(f"{opt}={v} fot supported with --cohort-update")
+        if args["wd"] is None:
+            raise ValueError("Need a working directory --wd")
+        update_cohort_only(args)
 
-    soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
-    manager = multiprocessing.Manager()
-    Global = manager.Namespace()
-    Global.open_files = soft  # we dont know how many file descriptors are already open by the user, assume all
-    Global.soft = soft
-
-    if not args["wd"]:
-        seen_names, names_list = get_names_list(args["input_files"])
-        process_file_list(args, args["input_files"], seen_names, names_list, log_messages=True)
     else:
-        shard_data(args, args["input_files"], Global=Global)
-        if args['clean']:
-            os.rmdir(args['wd'])
+        args["metrics"] = False  # only option supported so far
+        args["contigs"] = False
+
+        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
+        manager = multiprocessing.Manager()
+        Global = manager.Namespace()
+        Global.open_files = soft  # we dont know how many file descriptors are already open by the user, assume all
+        Global.soft = soft
+
+        if not args["wd"]:
+            seen_names, names_list = get_names_list(args["input_files"])
+            process_file_list(args, args["input_files"], seen_names, names_list, log_messages=True)
+        else:
+            shard_data(args, args["input_files"], Global=Global, show_progress=args['progress'])
+            if args['clean']:
+                os.rmdir(args['wd'])
 
     logging.info("dysgu merge complete h:m:s, {}".format(str(datetime.timedelta(seconds=int(time.time() - t0))),
                                                     time.time() - t0))
diff --git a/requirements.txt b/requirements.txt
index fe93871..aaa1d69 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,9 +4,8 @@ click>=8.0
 numpy
 scipy
 pandas
-pysam==0.21.0
+pysam==0.22.0
 networkx>=2.4
 scikit-learn>=0.22
-edlib
 sortedcontainers
 lightgbm
diff --git a/setup.py b/setup.py
index 1e87c30..484e2a2 100644
--- a/setup.py
+++ b/setup.py
@@ -58,7 +58,7 @@ def get_extra_args():
 
 extras = get_extra_args() + ["-Wno-sign-compare", "-Wno-unused-function",
                              "-Wno-unused-result", '-Wno-ignored-qualifiers',
-                             "-Wno-deprecated-declarations",
+                             "-Wno-deprecated-declarations", "-fpermissive"
                              ]
 
 ext_modules = list()
@@ -128,22 +128,23 @@ def get_extra_args():
 
 # Scikit-bio module
 ssw_extra_compile_args = ["-Wno-deprecated-declarations", '-std=c99', '-I.']
-# if icc or sysconfig.get_config_vars()['CC'] == 'icc':
-#     ssw_extra_compile_args.extend(['-qopenmp-simd', '-DSIMDE_ENABLE_OPENMP'])
-# elif not (clang or sysconfig.get_config_vars()['CC'] == 'clang'):
-#     ssw_extra_compile_args.extend(['-fopenmp-simd', '-DSIMDE_ENABLE_OPENMP'])
+
 
 ext_modules.append(Extension(f"dysgu.scikitbio._ssw_wrapper",
                              [f"dysgu/scikitbio/_ssw_wrapper.pyx", f"dysgu/scikitbio/ssw.c"],
                              include_dirs=[f"{root}/dysgu/scikitbio", numpy.get_include()],
                              extra_compile_args=ssw_extra_compile_args,
-                             # define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
                              language="c"))
 
+ext_modules.append(Extension(f"dysgu.edlib.edlib",
+                             [f"dysgu/edlib/edlib.pyx", f"dysgu/edlib/src/edlib.cpp"],
+                             include_dirs=[f"{root}/dysgu/edlib", numpy.get_include()],
+                             extra_compile_args=["-O3", "-std=c++11"],
+                             language="c++"))
 
 # Dysgu modules
 for item in ["sv2bam", "io_funcs", "graph", "coverage", "assembler", "call_component",
-             "map_set_utils", "cluster", "sv_category", "extra_metrics"]:  # "post_call_metrics",
+             "map_set_utils", "cluster", "sv_category", "extra_metrics"]:
 
     ext_modules.append(Extension(f"dysgu.{item}",
                                  [f"dysgu/{item}.pyx"],
@@ -164,8 +165,8 @@ def get_extra_args():
     url="https://github.com/kcleal/dysgu",
     description="Structural variant calling",
     license="MIT",
-    version='1.6.2',
-    python_requires='>=3.7',
+    version='1.6.3',
+    python_requires='>=3.10',
     install_requires=[  # runtime requires
             'setuptools>=63.0',
             'cython',
@@ -173,12 +174,11 @@ def get_extra_args():
             'numpy>=1.18',
             'scipy',
             'pandas',
-            'pysam==0.21.0',
+            'pysam==0.22.0',
             'networkx>=2.4',
             'scikit-learn>=0.22',
             'sortedcontainers',
             'lightgbm',
-            'edlib',
         ],
     setup_requires=[
             'setuptools>=63.0',
@@ -187,14 +187,13 @@ def get_extra_args():
             'numpy>=1.18',
             'scipy',
             'pandas',
-            'pysam==0.21.0',
+            'pysam==0.22.0',
             'networkx>=2.4',
             'scikit-learn>=0.22',
             'sortedcontainers',
             'lightgbm',
-            'edlib'
         ],
-    packages=["dysgu", "dysgu.tests", "dysgu.scikitbio"],
+    packages=["dysgu", "dysgu.tests", "dysgu.scikitbio", "dysgu.edlib"],
     ext_modules=cythonize(ext_modules),
     include_package_data=True,
     zip_safe=False,