Skip to content

Commit

Permalink
Option to count partially mapped reads
Browse files Browse the repository at this point in the history
In case more flexibility is required for counting.
Keep partial_map row for compatibility when comparing
with strictly mapped reads.
  • Loading branch information
mcmero committed May 6, 2024
1 parent 08380f6 commit a6b4c66
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .test/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ params {

// fasta file of guide sequences, leave blank to skip guide counting
guides_fasta = "$projectDir/.test/data/guides_simulated.fasta"

// count partially mapped reads
lenient_counts = false
}

//SQL DB Plugin
Expand Down
10 changes: 7 additions & 3 deletions bin/count_guides.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def parse_args():
parser.add_argument('guide_reference',
type=str,
help='Guide reference fasta file.')
parser.add_argument('--lenient',
action='store_true',
help='Count partial mappings.')

return parser.parse_args()

Expand Down Expand Up @@ -69,13 +72,14 @@ def main():
continue

guide_len = guide_lens[read.reference_name]
if read.reference_end - read.reference_start != guide_len:
read_spans_ref = read.reference_end - read.reference_start == guide_len
if args.lenient or read_spans_ref:
counts[read.reference_name] += 1
else:
counts['partial_map'] += 1
print(f'Guide {read.reference_name} does not span the whole guide',
file=sys.stderr)

counts[read.reference_name] += 1

sample_name = os.path.basename(args.bam).split(".")[0]
print(f'guide\t{sample_name}')
for guide, count in counts.items():
Expand Down
3 changes: 2 additions & 1 deletion modules/count.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ process CountGuides {

script:
def outCounts = "${sampleName}_guide_counts.txt"
def lenientFlag = params.lenient_counts ? "--lenient" : ""
/*
count collation is a hacky bash script to get collated output,
the script pastes the count files together, and then cuts out
Expand All @@ -58,7 +59,7 @@ process CountGuides {
samtools view -S -b | \
samtools sort -o \${sample}.bam
count_guides.py \${sample}.bam ${params.guides_fasta} > \${sample}_counts.txt
count_guides.py \${sample}.bam ${params.guides_fasta} ${lenientFlag} > \${sample}_counts.txt
done
paste *_counts.txt > tmpfile
Expand Down
3 changes: 3 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ params {

// fasta file of guide sequences, leave blank to skip guide counting
guides_fasta = ""

// count partially mapped reads
lenient_counts = false
}

//SQL DB Plugin
Expand Down
5 changes: 5 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@
"default": "",
"fa_icon": "fas fa-file",
"description": "Reference fasta file containing guide sequences (leave blank to skip guide counting)."
},
"lenient_counts" : {
"type": "boolean",
"default": false,
"description": "If true, count partially mapped reads."
}
}
},
Expand Down

0 comments on commit a6b4c66

Please sign in to comment.