diff --git a/dysgu/cluster.pyx b/dysgu/cluster.pyx index 1181ccd..178b70b 100644 --- a/dysgu/cluster.pyx +++ b/dysgu/cluster.pyx @@ -1178,7 +1178,8 @@ def cluster_reads(args): if kind2 == "stdin" or kind2 == "-" or kind2 not in opts: raise ValueError("--ibam must be a .bam/cam/sam file") ibam = pysam.AlignmentFile(args["ibam"], opts[kind2], threads=1, - reference_filename=None if kind2 != "cram" else args["reference"]) + reference_filename=None if kind2 != "cram" else args["reference"]) + if "RG" in infile.header: rg = infile.header["RG"] if "SM" in rg[0]: diff --git a/dysgu/coverage.pyx b/dysgu/coverage.pyx index 8e526fd..712e676 100644 --- a/dysgu/coverage.pyx +++ b/dysgu/coverage.pyx @@ -326,7 +326,24 @@ cdef class GenomeScanner: cdef uint32_t *cigar_p cdef int i cdef int non_match_count, matched_bases + + # first check SO tag in HD + if "HD" in file_iter.header: + hd = file_iter.header["HD"] + if "SO" in hd: + if hd["SO"] == "unsorted": + # raise ValueError for SO:unsorted + raise ValueError("Input bam file must be sorted") + else: + logging.info("Check PASS : Input BAM file has sorted header") + + prev_alignment = None for a in file_iter: + # double check with index + if prev_alignment and prev_alignment.reference_id > a.reference_id: + raise ValueError("Input bam file must be sorted") + prev_alignment = a + if ibam is None: if a.flag & 1284 or a.mapq < self.mapq_threshold or a.cigartuples is None: continue