Skip to content

Commit

Permalink
add option to fill AC,AN tags if input is missing them
Browse files Browse the repository at this point in the history
  • Loading branch information
LindoNkambule committed Feb 14, 2024
1 parent 6329cc1 commit 5623f5a
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
4 changes: 4 additions & 0 deletions gwaspy/phasing/phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def run_phase(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
fam_file: str = None,
data_type: str = 'array',
software: str = 'shapeit',
fill_tags: bool = False,
output_filename: str = None,
out_dir: str = None):

Expand Down Expand Up @@ -44,6 +45,7 @@ def run_phase(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
reference_path=ref_path,
fam_file=pedigree,
data_type=data_type,
fill_tags=fill_tags,
output_filename=output_filename,
output_path=out_dir)
# else: To add BEAGLE
Expand All @@ -57,6 +59,7 @@ def main():
parser.add_argument('--local', action='store_true')
parser.add_argument('--billing-project', required=True)
parser.add_argument('--data-type', type=str, default='array', choices=['array', 'wgs'])
parser.add_argument('--fill-tags', action='store_true')
parser.add_argument('--software', type=str, default='shapeit', choices=['beagle', 'shapeit'])
parser.add_argument('--output-filename', type=str, required=True)
parser.add_argument('--out-dir', type=str, required=True)
Expand All @@ -75,5 +78,6 @@ def main():
fam_file=args.pedigree,
data_type=args.data_type,
software=args.software,
fill_tags=args.fill_tags,
output_filename=args.output_filename,
out_dir=args.out_dir)
41 changes: 41 additions & 0 deletions gwaspy/phasing/shapeit5_phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,45 @@ def size(file: str):
return size_gigs


def annotate_vcf(
b: hb.batch.Batch = None,
vcf: hb.ResourceGroup = None,
region: str = None,
ncpu: int = 8,
memory: str = 'standard',
storage: int = None,
img: str = 'docker.io/lindonkambule/gwaspy_phase_impute:latest',
) -> Job:
j = b.new_job(name=f'Add AC, AN tags to input: {region}')

j.image(img)
j.cpu(ncpu)
j.memory(memory)
j.regions(['us-central1'])
j.storage(f'{storage}Gi')

j.declare_resource_group(
annotated_vcf={
'vcf': '{root}.bcf',
'index': '{root}.bcf.csi'
}
)

j.command(f"""
bcftools +fill-tags {vcf['vcf']} -Ou -- -t AN,AC --output {j.annotated_vcf['vcf']}
bcftools index {j.annotated_vcf['vcf']} --output {j.annotated_vcf['index']} --threads {ncpu}
""")

return j


def shapeit_phasing(
batch: hb.Batch = None,
input_path: str = None,
reference_path: Optional[str] = None,
fam_file: Optional[hb.ResourceFile] = None,
data_type: str = 'array',
fill_tags: bool = False,
output_filename: str = None,
output_path: str = None):

Expand Down Expand Up @@ -244,6 +277,14 @@ def concatenate_rare_chunks(
ref_vcf = None
ref_size = 0

if fill_tags:
chrom_vcf = annotate_vcf(
b=batch,
vcf=chrom_vcf,
region=f'chr{i}',
storage=round(vcf_size*1.5 + ref_size + 2)
).annotated_vcf

if data_type == 'array':
phase_common(
b=batch,
Expand Down

0 comments on commit 5623f5a

Please sign in to comment.