-
Notifications
You must be signed in to change notification settings - Fork 0
/
child_search_variants.py
executable file
·100 lines (82 loc) · 3.33 KB
/
child_search_variants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3
"""
This program inserts WP3-mapped data from a provided json file
into the postgres database used by the reference Beacon 2.0 implementation.
"""
import argparse
import sys
import psycopg2 as pg
import vcf
def clear_tables(connection, cursor):
"""
Rmove initial data if any from existing tables
"""
cursor.execute("TRUNCATE TABLE variants, calls")
connection.commit()
return
def ingest_variants(connection, cursor, vcffile, nvar):
"""
Given the Postgres connection, and a cursor,
insert the variants from the vcf file into the postgres database.
"""
count = 0
vcf_reader = vcf.Reader(open(vcffile, 'r'))
for record in vcf_reader:
has_var = [call.sample for call in record.samples if call.is_variant]
if not has_var:
continue
allele_fraction = 0
if "AF" in record.INFO:
allele_fraction = record.INFO["AF"][0]
gene = ""
if "GENE" in record.INFO:
gene = record.INFO["GENE"]
startpos = record.POS
chrom, ref, alt = record.CHROM, record.REF, str(record.ALT[0])
endpos = startpos + len(ref) - 1
cursor.execute("""INSERT INTO variants(variant_id, chromosome, reference, alternate, start, "end", gene, AF)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
RETURNING id""",
(record.ID, chrom, ref, alt, startpos, endpos, gene, allele_fraction))
variant_id = cursor.fetchone()[0]
samples_w_variant = [(variant_id, samp, '/'.join(record.genotype(samp).gt_alleles)) for samp in has_var]
cursor.executemany("""INSERT INTO calls(variant_id, sample_id, call)
VALUES(%s,%s,%s)""", samples_w_variant)
count += 1
if count % 100 == 0:
print(count)
if count % nvar == 0:
break
connection.commit()
def main():
"""
Parse arguments, make database connection, read file, and start ingest
"""
parser = argparse.ArgumentParser()
parser.add_argument("--server", help="Postgres server hostname", default="0.0.0.0")
parser.add_argument("--port", help="Postgres server port", default=5432)
parser.add_argument("--username", help="Postgres username", default="search")
parser.add_argument("--password", help="Postgres password", default="secretpassword")
parser.add_argument("--database", help="Postgres database", default="search")
parser.add_argument("-n", help="Number of variants", type=int, default=1000)
parser.add_argument("vcffile", help="VCF file", default=None)
args = parser.parse_args()
connection = None
try:
connection = pg.connect(user=args.username,
password=args.password,
host=args.server,
port=args.port,
database=args.database)
cursor = connection.cursor()
except (Exception, pg.Error) as error:
print("Error while connecting to PostgreSQL", error, file=sys.stderr)
if connection:
cursor.close()
connection.close()
if not connection:
return
clear_tables(connection, cursor)
ingest_variants(connection, cursor, args.vcffile, args.n)
if __name__ == "__main__":
main()