Skip to content

Commit

Permalink
read input BAM/CRAM
Browse files Browse the repository at this point in the history
  • Loading branch information
LindoNkambule committed Feb 22, 2024
1 parent df1a30a commit ba7a435
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions gwaspy/imputation/glimpse2_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,15 @@ def extract_ref_sites(
j.command(f"""bcftools view -G -Ou -o {j.ref_sites['bcf']} {ref_bcf['bcf']} """)
j.command(f"""bcftools index {j.ref_sites['bcf']} --output {j.ref_sites['bcf.csi']} --threads {ncpu}""")

j.command(f'''bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' {j.ref_sites['bcf']} | bgzip -c > {j.ref_sites['tsv.gz']}''')
j.command(f"""bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' {j.ref_sites['bcf']} | bgzip -c > {j.ref_sites['tsv.gz']}""")
j.command(f"""tabix -s1 -b2 -e2 {j.ref_sites['tsv.gz']}""")

return j

# Step 3) Computing genotype likelihoods (GLs) for a single sample at specific positions
def compute_gls(
b: hb.batch.Batch,
sample_bam: hb.ResourceGroup = None,
sample_bam_path: str = None,
ref_sites: hb.ResourceGroup = None,
ref_genome_fasta: hb.ResourceGroup = None,
sample_id: str = None,
Expand All @@ -107,6 +107,10 @@ def compute_gls(
) -> Job:
j = b.new_job(name=f'compute GLs:{sample_id}-{chrom}')

bam_idx = f'{sample_bam_path}.bai' if hfs.exists(f'{sample_bam_path}.bai') else f'{sample_bam_path}.crai'
sample_bam = b.read_input_group(**{'bam': sample_bam_path,
'idx': bam_idx})

j.image(img)
j.cpu(ncpu)
j.memory(memory)
Expand All @@ -121,7 +125,7 @@ def compute_gls(
)

# compute GLs
j.command(f'''bcftools mpileup -f {ref_genome_fasta['fasta']} -I -E -a 'FORMAT/DP' -T {ref_sites['bcf']} -r {chrom} {sample_bam['bam']} -Ou | bcftools call -Aim -C alleles -T {ref_sites['tsv.gz']} -Ou -o {j.gl_vcf['bcf']}''')
j.command(f"""bcftools mpileup -f {ref_genome_fasta['fasta']} -I -E -a 'FORMAT/DP' -T {ref_sites['bcf']} -r {chrom} {sample_bam['bam']} -Ou | bcftools call -Aim -C alleles -T {ref_sites['tsv.gz']} -Ou -o {j.gl_vcf['bcf']}""")

j.command(f"""bcftools index {j.gl_vcf['bcf']} --output {j.gl_vcf['bcf.csi']} --threads {ncpu}""")

Expand Down Expand Up @@ -308,7 +312,7 @@ def ligate_chunks(
bams_gls = [
compute_gls(
b=batch,
sample_bam=bam_list[s][1],
sample_bam_path=bam_list[s][1],
sample_id=bam_list[s][0],
ref_sites=ref_panel_sites,
ref_genome_fasta=ref_genome,
Expand Down

0 comments on commit ba7a435

Please sign in to comment.