Skip to content

Commit

Permalink
add metadata to output vcf; rename variables for clarity
Browse files Browse the repository at this point in the history
  • Loading branch information
jn80842 committed Jun 9, 2014
1 parent 840d3ef commit 8a9d4bc
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 12 deletions.
18 changes: 10 additions & 8 deletions smashbenchmarking/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@
import datetime

from parsers.genome import Genome
from vcf_eval.variants import Variants,evaluate_variants,output_errors
from vcf_eval.variants import Variants,evaluate_variants,output_annotated_variants
from vcf_eval.chrom_variants import VARIANT_TYPE
from vcf_eval.callset_helper import MAX_INDEL_LEN
from normalize_vcf import normalize

# metadata
SMASHVERSION = 1.0
SMASHVERSION = "1.0"
date_run = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

# this needs to move to another class
Expand Down Expand Up @@ -204,9 +204,8 @@ def is_valid_file(parser, arg):
help="The error rate of SVs in the ground truth data")
parser.add_argument("-w","--rescue_window_size",dest="window",type=int,
default=50,help="The size of the window for rescuing")
parser.add_argument("--err_vcf",dest="err_vcf",action="store",
help="""An optional output VCF to hold detected
false-negatives and false-positives""")
parser.add_argument("--output_vcf",dest="output_vcf",action="store",
help="""An optional output VCF to hold annotated variants from both source files""")
parser.add_argument("--normalize",action="store_true",
help="Optionally normalize variants before evaluating them; requires reference file")
parser.add_argument("--output",action="store",
Expand Down Expand Up @@ -234,6 +233,9 @@ def get_sv_err(true_vars, sv_err_rate):
def get_text_header(params):
return "# SMaSH version %s, run %s\n# cmdline args: %s" % (SMASHVERSION,date_run," ".join(params))

def get_vcf_header_lines(params):
return ["##SMaSH version %s" % SMASHVERSION, "##Date run %s" % date_run, "##cmdline args: %s" % " ".join(params)]

def main(params):
args = parse_args(params)
if args.normalize and not args.reference:
Expand Down Expand Up @@ -276,7 +278,7 @@ def main(params):

sv_eps = args.sv_eps

stat_reporter, errors = evaluate_variants(
stat_reporter, annotated_vars = evaluate_variants(
true_vars,
pred_vars,
sv_eps,
Expand Down Expand Up @@ -318,8 +320,8 @@ def print_oth(var_type, description):
print_sv(VARIANT_TYPE.SV_INS, 'SV INSERTION'),
print_oth(VARIANT_TYPE.SV_OTH, 'SV OTHER')

if args.err_vcf :
output_errors(errors,ref.keys() if ref != None else None, open(args.err_vcf,'w'))
if args.output_vcf :
output_annotated_variants(annotated_vars,ref.keys() if ref != None else None, open(args.output_vcf,'w'),get_vcf_header_lines(params))

if __name__ == '__main__':
main(params=sys.argv[1:])
10 changes: 6 additions & 4 deletions smashbenchmarking/vcf_eval/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def aggregator(vartype):
aggregate[attribute] += stats[attribute]
return dict(aggregate)

def errors(chrom_order):
def annotated_var_iter(chrom_order):
if chrom_order == None:
chrom_order = sorted(map(lambda t: t.chrom, stats_by_chrom))
stats_by_chrom_dict = dict(map(lambda t: [t.chrom,t],stats_by_chrom) )
Expand All @@ -103,7 +103,7 @@ def errors(chrom_order):
var_iterator = chain(var_iterator,vcf_by_position([tp_iter,fp_iter,fn_iter,known_fp_iter,rescued_iter]))
return var_iterator

return aggregator,errors
return aggregator,annotated_var_iter


def _eval_aggregate(true_variants, pred_variants, known_fp, evaluate):
Expand All @@ -122,13 +122,15 @@ def evaluate(true,pred,known_fp):
return _eval_aggregate(true_variants,pred_variants,known_fp,evaluate)


def output_errors(err_aggregate,contig_ordering,outVCF):
def output_annotated_variants(var_aggregate,contig_ordering,outVCF,vcf_header_lines):
# write the header
outVCF.write("##fileformat=VCFv4.1\n")
for s in vcf_header_lines:
outVCF.write(s + "\n")
outVCF.write("##INFO=<ID=smash_type,Type=String,Description=\"classify variant as TP,FP,FN,or rescued\">\n")
outVCF.write("##INFO=<ID=source_file,Type=Integer,Description=\"variant originally in first or second vcf passed to SMaSH\">\n")
outVCF.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
for rec in err_aggregate(contig_ordering):
for rec in var_aggregate(contig_ordering):
outVCF.write("%s\n" % rec )

import heapq
Expand Down

0 comments on commit 8a9d4bc

Please sign in to comment.