diff --git a/smashbenchmarking/normalize_vcf.py b/smashbenchmarking/normalize_vcf.py index e04988a..2a8aaf7 100755 --- a/smashbenchmarking/normalize_vcf.py +++ b/smashbenchmarking/normalize_vcf.py @@ -78,10 +78,9 @@ def genotype(vcfrecord): return vcfrecord.samples[0].gt_nums return {0 : "0/0", 1 : "0/1", 2: "1/1", None : "."}[vcfrecord.samples[0].gt_type] -# assume that input position is 0-based, so offset by 1 def write(record, writer): - return writer.write_record(record.CHROM, record.POS + 1, '.', - record.REF, record.ALT, genotype(record)) # TODO: more gtypes. + return writer.write_record(record.CHROM, record.POS, '.', + record.REF, ','.join(map(lambda a: str(a),record.ALT)), genotype(record)) # TODO: more gtypes. left_slides = [] diff --git a/smashbenchmarking/parsers/vcfwriter.py b/smashbenchmarking/parsers/vcfwriter.py index 76271ab..8bba253 100644 --- a/smashbenchmarking/parsers/vcfwriter.py +++ b/smashbenchmarking/parsers/vcfwriter.py @@ -84,7 +84,7 @@ def write_record(self, CHROM, POS, ID, REF, ALT, gtype): QUAL = 20 # Default 1/100 error probability. FILTER = 'PASS' FORMAT = 'GT' - print(CHROM, POS + 1, ID, REF, ALT, QUAL, FILTER, '.', FORMAT, gtype, + print(CHROM, POS, ID, REF, ALT, QUAL, FILTER, '.', FORMAT, gtype, sep='\t', file=self._output) return write diff --git a/test/normalize.py b/test/normalize.py index 85557e8..668b7ff 100755 --- a/test/normalize.py +++ b/test/normalize.py @@ -51,7 +51,7 @@ def getVcf(self,str): vcf_io = StringIO.StringIO(str) return vcf.Reader(vcf_io) - def normalizeString(self,vcf_str): + def normalizeStringToWriter(self,vcf_str): vcf_io = StringIO.StringIO(vcf_str) test_vcf = vcf.Reader(vcf_io) output_io = StringIO.StringIO() @@ -234,5 +234,17 @@ def testMultipleAltAlleles(self): self.assertEqual(record.REF,'G') self.assertEqual(record.ALT[0],'CG') + def testNormalizerWriter(self): + vcf_str = """##fileformat=VCFv4.0 +##FORMAT=\n +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n +chr1 2 . a c 20 PASS . GT 0/1\n +chr1 4 . A G 20 PASS . GT 1/1\n +""" + output_vcf = self.normalizeStringToWriter(vcf_str) + r1 = output_vcf.next() + self.assertEqual(r1.POS,2) + + if __name__ == '__main__': unittest.main()