diff --git a/DESCRIPTION b/DESCRIPTION
index 64d2855..3b63fd2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -3,7 +3,7 @@ Maintainer: Stefan Dentro <sd11@sanger.ac.uk>
 License: GPL-3
 Type: Package
 Title: Battenberg subclonal copy number caller
-Version: 2.2.7
+Version: 2.2.8
 Authors@R: c(person("David", "Wedge", role=c("aut"), email="dw9@sanger.ac.uk"),
     person("Peter", "Van Loo", role=c("aut")),
     person("Stefan","Dentro", email="sd11@sanger.ac.uk", role=c("aut", "cre")),
@@ -23,7 +23,10 @@ Imports:
   ggplot2,
   readr,
   gtools,
-  gridExtra
+  gridExtra,
+  doParallel,
+  parallel,
+  foreach
 URL: https://github.com/Wedge-Oxford/battenberg
 LazyLoad: yes
 License: file LICENSE
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..4d5576a
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,50 @@
+#FROM r-base
+FROM ubuntu:16.04
+
+# Add dependencies
+RUN apt-get update && apt-get install -y libxml2 libxml2-dev libcurl4-gnutls-dev r-cran-rgl git libssl-dev curl
+
+RUN mkdir /tmp/downloads
+
+RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/htslib/archive/1.2.1.tar.gz && \
+    mkdir /tmp/downloads/htslib && \
+    tar -C /tmp/downloads/htslib --strip-components 1 -zxf tmp.tar.gz && \
+    make -C /tmp/downloads/htslib && \
+    rm -f /tmp/downloads/tmp.tar.gz
+
+ENV HTSLIB /tmp/downloads/htslib
+
+RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/alleleCount/archive/v2.1.2.tar.gz && \
+    mkdir /tmp/downloads/alleleCount && \
+    tar -C /tmp/downloads/alleleCount --strip-components 1 -zxf tmp.tar.gz && \
+    cd /tmp/downloads/alleleCount/c && \
+    mkdir bin && \
+    make && \
+    cp /tmp/downloads/alleleCount/c/bin/alleleCounter /usr/local/bin/. && \
+    cd /tmp/downloads && \
+    rm -rf /tmp/downloads/alleleCount /tmp/downloads/tmp.tar.gz
+
+RUN curl -sSL -o tmp.tar.gz --retry 10 https://mathgen.stats.ox.ac.uk/impute/impute_v2.3.2_x86_64_static.tgz && \
+    mkdir /tmp/downloads/impute2 && \
+    tar -C /tmp/downloads/impute2 --strip-components 1 -zxf tmp.tar.gz && \
+    cp /tmp/downloads/impute2/impute2 /usr/local/bin && \
+    rm -rf /tmp/downloads/impute2 /tmp/downloads/tmp.tar.gz
+
+RUN R -q -e 'source("http://bioconductor.org/biocLite.R"); biocLite(c("devtools","RColorBrewer","ggplot2","gridExtra","readr","doParallel","foreach"))'
+RUN R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")'
+RUN R -q -e 'devtools::install_github("Wedge-Oxford/battenberg")'
+
+RUN curl -sSL -o battenberg_wgs.R https://raw.githubusercontent.com/Wedge-Oxford/battenberg/master/inst/example/battenberg_wgs.R
+# modify paths to reference files
+RUN cat battenberg_wgs.R | \
+    sed 's|IMPUTEINFOFILE = \".*|IMPUTEINFOFILE = \"/opt/battenberg_reference/1000genomes_2012_v3_impute/impute_info.txt\"|' | \
+    sed 's|G1000PREFIX = \".*|G1000PREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesAlleles2012_chr\"|' | \
+    sed 's|G1000PREFIX_AC = \".*|G1000PREFIX_AC = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesloci2012_chr\"|' | \
+    sed 's|GCCORRECTPREFIX = \".*|GCCORRECTPREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_gcContent/1000_genomes_GC_corr_chr_\"|' | \
+    sed 's|PROBLEMLOCI = \".*|PROBLEMLOCI = \"/opt/battenberg_reference/battenberg_problem_loci/probloci_270415.txt.gz\"|' > /usr/local/bin/battenberg_wgs.R && rm -f battenberg_wgs.R
+
+#RUN curl -sSL -o battenberg_snp6.R https://raw.githubusercontent.com/Wedge-Oxford/battenberg/master/inst/example/battenberg_snp6.R
+#RUN cat battenberg_snp6.R | \
+#    sed 's|IMPUTEINFOFILE = \".*|IMPUTEINFOFILE = \"/opt/battenberg_reference/1000genomes_2012_v3_impute/impute_info.txt\"|' | \
+#    sed 's|G1000PREFIX = \".*|G1000PREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesAlleles2012_chr\"|' | \
+#    sed 's|SNP6_REF_INFO_FILE = \".*|SNP6_REF_INFO_FILE = \"/opt/battenberg_reference/battenberg_snp6/snp6_ref_info_file.txt\"|' > /usr/local/bin/battenberg_snp6.R && rm -f battenberg_wgs.R
diff --git a/NAMESPACE b/NAMESPACE
index e11517c..d715f18 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,6 +4,7 @@ export(plot.haplotype.data)
 export(GetChromosomeBAFs)
 export(GetChromosomeBAFs_SNP6)
 export(allele_ratio_plot)
+export(battenberg)
 export(calc_psi_t)
 export(calc_rho_psi_refit)
 export(callSubclones)
@@ -24,10 +25,13 @@ export(getBAFsAndLogRs)
 export(infer_gender_birdseed)
 export(make_posthoc_plots)
 export(parse.imputeinfofile)
+export(prepare_snp6)
+export(prepare_wgs)
 export(read_table_generic)
 export(run.impute)
 export(runASCAT)
 export(run_clonal_ASCAT)
+export(run_haplotyping)
 export(segment.baf.phased)
 export(segment.baf.phased.sv)
 export(squaresplot)
@@ -43,7 +47,12 @@ importFrom(ASCAT,ascat.plotNonRounded)
 importFrom(ASCAT,ascat.plotSunrise)
 importFrom(ASCAT,make_segments)
 importFrom(RColorBrewer,brewer.pal)
+importFrom(doParallel,registerDoParallel)
+importFrom(foreach,"%dopar%")
+importFrom(foreach,foreach)
 importFrom(gridExtra,arrangeGrob)
 importFrom(gridExtra,grid.arrange)
 importFrom(gtools,mixedsort)
+importFrom(parallel,makeCluster)
+importFrom(parallel,stopCluster)
 importFrom(readr,read_table)
diff --git a/R/Battenberg-package.R b/R/Battenberg-package.R
index fd3c32a..0445e5d 100644
--- a/R/Battenberg-package.R
+++ b/R/Battenberg-package.R
@@ -4,4 +4,7 @@
 #' @importFrom gridExtra grid.arrange arrangeGrob
 #' @importFrom ASCAT make_segments ascat.plotSunrise ascat.plotAscatProfile ascat.plotNonRounded
 #' @importFrom gtools mixedsort
+#' @importFrom parallel makeCluster stopCluster
+#' @importFrom doParallel registerDoParallel
+#' @importFrom foreach foreach %dopar%
 NULL
diff --git a/R/battenberg.R b/R/battenberg.R
new file mode 100644
index 0000000..d27d457
--- /dev/null
+++ b/R/battenberg.R
@@ -0,0 +1,230 @@
+
+#' Run the Battenberg pipeline
+#' 
+#' @param tumourname Tumour identifier, this is used as a prefix for the output files. If allele counts are supplied separately, they are expected to have this identifier as prefix.
+#' @param normalname Matched normal identifier, this is used as a prefix for the output files. If allele counts are supplied separately, they are expected to have this identifier as prefix.
+#' @param tumour_data_file A BAM or CEL file for the tumour
+#' @param normal_data_file A BAM or CEL file for the normal
+#' @param imputeinfofile Full path to a Battenberg impute info file with pointers to Impute2 reference data
+#' @param g1000prefix Full prefix path to 1000 Genomes SNP loci data, as part of the Battenberg reference data
+#' @param problemloci Full path to a problem loci file that contains SNP loci that should be filtered out
+#' @param gccorrectprefix Full prefix path to GC content files, as part of the Battenberg reference data, not required for SNP6 data (Default: NA)
+#' @param g1000allelesprefix Full prefix path to 1000 Genomes SNP alleles data, as part of the Battenberg reference data, not required for SNP6 data (Default: NA)
+#' @param ismale A boolean set to TRUE if the donor is male, set to FALSE if female, not required for SNP6 data (Default: NA)
+#' @param data_type String that contains either wgs or snp6 depending on the supplied input data (Default: wgs)
+#' @param impute_exe Pointer to the Impute2 executable (Default: impute2, i.e. expected in $PATH)
+#' @param allelecounter_exe Pointer to the alleleCounter executable (Default: alleleCounter, i.e. expected in $PATH)
+#' @param nthreads The number of concurrent processes to use while running the Battenberg pipeline (Default: 8)
+#' @param platform_gamma Platform scaling factor, suggestions are set to 1 for wgs and to 0.55 for snp6 (Default: 1)
+#' @param phasing_gamma Gamma parameter used when correcting phasing mistakes (Default: 1)
+#' @param segmentation_gamma The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)
+#' @param segmentation_kmin Kmin represents the minimum number of probes/SNPs that a segment should consist of (Default: 3)
+#' @param phasing_kmin Kmin used when correcting for phasing mistakes (Default: 3)
+#' @param clonality_dist_metric  Distance metric to use when choosing purity/ploidy combinations (Default: 0)
+#' @param ascat_dist_metric Distance metric to use when choosing purity/ploidy combinations (Default: 1)
+#' @param min_ploidy Minimum ploidy to be considered (Default: 1.6)
+#' @param max_ploidy Maximum ploidy to be considered (Default: 4.8)
+#' @param min_rho Minimum purity to be considered (Default: 0.1)
+#' @param min_goodness Minimum goodness of fit required for a purity/ploidy combination to be accepted as a solution (Default: 0.63)
+#' @param uninformative_BAF_threshold The threshold beyond which BAF becomes uninformative (Default: 0.51)
+#' @param min_normal_depth Minimum depth required in the matched normal for a SNP to be considered as part of the wgs analysis (Default: 10)
+#' @param min_base_qual Minimum base quality required for a read to be counted when allele counting (Default: 20)
+#' @param min_map_qual Minimum mapping quality required for a read to be counted when allele counting (Default: 35)
+#' @param calc_seg_baf_option Sets way to calculate BAF per segment: 1=mean, 2=median (Default: 1)
+#' @param skip_allele_counting Provide TRUE when allele counting can be skipped (i.e. its already done) (Default: FALSE)
+#' @param skip_preprocessing Provide TRUE when preprocessing is already complete (Default: FALSE)
+#' @param skip_phasing  Provide TRUE when phasing is already complete (Default: FALSE)
+#' @param snp6_reference_info_file Reference files for the SNP6 pipeline only (Default: NA)
+#' @param apt.probeset.genotype.exe Helper tool for extracting data from CEL files, SNP6 pipeline only (Default: apt-probeset-genotype)
+#' @param apt.probeset.summarize.exe  Helper tool for extracting data from CEL files, SNP6 pipeline only (Default: apt-probeset-summarize)
+#' @param norm.geno.clust.exe  Helper tool for extracting data from CEL files, SNP6 pipeline only (Default: normalize_affy_geno_cluster.pl)
+#' @param birdseed_report_file Sex inference output file, SNP6 pipeline only (Default: birdseed.report.txt)
+#' @param heterozygousFilter Legacy option to set a heterozygous SNP filter, SNP6 pipeline only (Default: "none")
+#' @author sd11
+#' @export
+battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file, imputeinfofile, g1000prefix, problemloci, 
+                      gccorrectprefix=NA, g1000allelesprefix=NA, ismale=NA, data_type="wgs", impute_exe="impute2", allelecounter_exe="alleleCounter", nthreads=8, platform_gamma=1, phasing_gamma=1,
+                      segmentation_gamma=10, segmentation_kmin=3, phasing_kmin=1, clonality_dist_metric=0, ascat_dist_metric=1, min_ploidy=1.6,
+                      max_ploidy=4.8, min_rho=0.1, min_goodness=0.63, uninformative_BAF_threshold=0.51, min_normal_depth=10, min_base_qual=20, 
+                      min_map_qual=35, calc_seg_baf_option=1, skip_allele_counting=F, skip_preprocessing=F, skip_phasing=F,
+                      snp6_reference_info_file=NA, apt.probeset.genotype.exe="apt-probeset-genotype", apt.probeset.summarize.exe="apt-probeset-summarize", 
+                      norm.geno.clust.exe="normalize_affy_geno_cluster.pl", birdseed_report_file="birdseed.report.txt", heterozygousFilter="none") {
+  
+  requireNamespace("foreach")
+  requireNamespace("doParallel")
+  requireNamespace("parallel")
+  
+  if (data_type=="wgs" & is.na(ismale)) {
+    print("Please provide a boolean denominator whether this sample represents a male donor")
+    q(save="no", status=1)
+  }
+  
+  if (data_type=="wgs" & is.na(g1000allelesprefix)) {
+    print("Please provide a path to 1000 Genomes allele reference files")
+    q(save="no", status=1)
+  }
+  
+  if (data_type=="wgs" & is.na(gccorrectprefix)) {
+    print("Please provide a path to GC content reference files")
+    q(save="no", status=1)
+  }
+
+  if (data_type=="wgs" | data_type=="WGS") {
+    chrom_names = get.chrom.names(imputeinfofile, ismale)
+    logr_file = paste(tumourname, "_mutantLogR_gcCorrected.tab", sep="")
+    allelecounts_file = paste(tumourname, "_alleleCounts.tab", sep="")
+  } else if (data_type=="snp6" | data_type=="SNP6") {
+    chrom_names = get.chrom.names(imputeinfofile, TRUE)
+    logr_file = paste(tumourname, "_mutantLogR.tab", sep="")
+    allelecounts_file = NULL
+  }
+ 
+  if (!skip_preprocessing) {
+    if (data_type=="wgs" | data_type=="WGS") {
+      # Setup for parallel computing
+      clp = parallel::makeCluster(nthreads)
+      doParallel::registerDoParallel(clp)
+            
+      prepare_wgs(chrom_names=chrom_names, 
+                  tumourbam=tumour_data_file, 
+                  normalbam=normal_data_file, 
+                  tumourname=tumourname, 
+                  normalname=normalname, 
+                  g1000allelesprefix=g1000allelesprefix, 
+                  g1000prefix=g1000prefix, 
+                  gccorrectprefix=gccorrectprefix, 
+                  min_base_qual=min_base_qual, 
+                  min_map_qual=min_map_qual, 
+                  allelecounter_exe=allelecounter_exe, 
+                  min_normal_depth=min_normal_depth, 
+                  nthreads=nthreads,
+                  skip_allele_counting=skip_allele_counting)
+      
+      # Kill the threads
+      parallel::stopCluster(clp)
+      
+    } else if (data_type=="snp6" | data_type=="SNP6") {
+      
+      prepare_snp6(tumour_cel_file=tumour_data_file, 
+                   normal_cel_file=normal_data_file, 
+                   tumourname=tumourname, 
+                   chrom_names=chrom_names, 
+                   snp6_reference_info_file=snp6_reference_info_file, 
+                   apt.probeset.genotype.exe=apt.probeset.genotype.exe, 
+                   apt.probeset.summarize.exe=apt.probeset.summarize.exe,
+                   norm.geno.clust.exe=norm.geno.clust.exe, 
+                   birdseed_report_file=birdseed_report_file)
+      
+    } else {
+      print("Unknown data type provided, please provide wgs or snp6")
+      q(save="no", status=1)
+    }
+  }
+  
+  if (data_type=="snp6" | data_type=="SNP6") {
+    # Infer what the gender is - WGS requires it to be specified
+    gender = infer_gender_birdseed(birdseed_report_file)
+    ismale = gender == "male"
+  }
+ 
+  if (!skip_phasing) {
+    # Setup for parallel computing
+    clp = parallel::makeCluster(nthreads)
+    doParallel::registerDoParallel(clp)
+    
+    # Reconstruct haplotypes 
+    # mclapply(1:length(chrom_names), function(chrom) {
+    foreach::foreach (chrom=1:length(chrom_names)) %dopar% {
+      print(chrom)
+      
+      run_haplotyping(chrom=chrom, 
+                      tumourname=tumourname, 
+                      normalname=normalname, 
+                      ismale=ismale, 
+                      imputeinfofile=imputeinfofile, 
+                      problemloci=problemloci, 
+                      impute_exe=impute_exe, 
+                      min_normal_depth=min_normal_depth,
+  		                chrom_names=chrom_names,
+  		                snp6_reference_info_file=snp6_reference_info_file,
+  		                heterozygousFilter=heterozygousFilter)
+    }#, mc.cores=nthreads)
+    
+    # Kill the threads as from here its all single core
+    parallel::stopCluster(clp)
+    
+    # Combine all the BAF output into a single file
+    combine.baf.files(inputfile.prefix=paste(tumourname, "_chr", sep=""), 
+                      inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", 
+                      outputfile=paste(tumourname, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+                      no.chrs=length(chrom_names))
+  }
+  
+  # Segment the phased and haplotyped BAF data
+  segment.baf.phased(samplename=tumourname,
+                     inputfile=paste(tumourname, "_heterozygousMutBAFs_haplotyped.txt", sep=""), 
+                     outputfile=paste(tumourname, ".BAFsegmented.txt", sep=""),
+                     gamma=segmentation_gamma,
+                     phasegamma=phasing_gamma,
+                     kmin=segmentation_kmin,
+                     phasekmin=phasing_kmin,
+                     calc_seg_baf_option=calc_seg_baf_option)
+  
+  # Fit a clonal copy number profile
+  fit.copy.number(samplename=tumourname,
+                  outputfile.prefix=paste(tumourname, "_", sep=""),
+                  inputfile.baf.segmented=paste(tumourname, ".BAFsegmented.txt", sep=""), 
+                  inputfile.baf=paste(tumourname,"_mutantBAF.tab", sep=""), 
+                  inputfile.logr=logr_file, 
+                  dist_choice=clonality_dist_metric, 
+                  ascat_dist_choice=ascat_dist_metric, 
+                  min.ploidy=min_ploidy, 
+                  max.ploidy=max_ploidy, 
+                  min.rho=min_rho, 
+                  min.goodness=min_goodness, 
+                  uninformative_BAF_threshold=uninformative_BAF_threshold, 
+                  gamma_param=platform_gamma, 
+                  use_preset_rho_psi=F, 
+                  preset_rho=NA, 
+                  preset_psi=NA, 
+                  read_depth=30)
+  
+  # Go over all segments, determine which segements are a mixture of two states and fit a second CN state
+  callSubclones(sample.name=tumourname, 
+                baf.segmented.file=paste(tumourname, ".BAFsegmented.txt", sep=""), 
+                logr.file=logr_file, 
+                rho.psi.file=paste(tumourname, "_rho_and_psi.txt",sep=""), 
+                output.file=paste(tumourname,"_subclones.txt", sep=""), 
+                output.figures.prefix=paste(tumourname,"_subclones_chr", sep=""), 
+                output.gw.figures.prefix=paste(tumourname,"_BattenbergProfile", sep=""),
+                masking_output_file=paste(tumourname, "_segment_masking_details.txt", sep=""),
+                sv_breakpoints_file="NA",
+                chr_names=chrom_names, 
+                gamma=platform_gamma, 
+                segmentation.gamma=NA, 
+                siglevel=0.05, 
+                maxdist=0.01, 
+                noperms=1000,
+                calc_seg_baf_option=calc_seg_baf_option)
+  
+  # Make some post-hoc plots
+  make_posthoc_plots(samplename=tumourname, 
+                     logr_file=logr_file, 
+                     subclones_file=paste(tumourname, "_subclones.txt", sep=""), 
+                     rho_psi_file=paste(tumourname, "_rho_and_psi.txt", sep=""), 
+                     bafsegmented_file=paste(tumourname, ".BAFsegmented.txt", sep=""), 
+                     logrsegmented_file=paste(tumourname, ".logRsegmented.txt", sep=""), 
+                     allelecounts_file=allelecounts_file)
+  
+  # Save refit suggestions for a future rerun
+  cnfit_to_refit_suggestions(samplename=tumourname, 
+                             subclones_file=paste(tumourname, "_subclones.txt", sep=""),
+                             rho_psi_file=paste(tumourname, "_rho_and_psi.txt", sep=""),
+                             gamma_param=platform_gamma)
+  
+  
+  
+  
+    
+}
+
diff --git a/R/clonal_ascat.R b/R/clonal_ascat.R
index 7de439b..a7c96a5 100755
--- a/R/clonal_ascat.R
+++ b/R/clonal_ascat.R
@@ -298,14 +298,11 @@ calc_LogR_Pvalue <-function( LogR, maxdist_LogR, LogR_level ) # kjd 27-2-2014
 	
 }
 
-#' Helper function to estimate rho from a given copy number state and it's BAF and LogR
+#' Helper function to estimate rho from a given copy number state and it's BAF. The LogR is not used.
 #' @noRd
 estimate_rho <-function( LogR_value, BAFreq_value, nA_value, nB_value ) # kjd 10-3-2014
 {
-  temp_value = BAFreq_value * ( 2 - nA_value - nB_value )
-  temp_value = nB_value - 1 - temp_value
-  rho_value =  ( ( 2 * BAFreq_value ) - 1 ) / temp_value
-  
+  rho_value = (2*BAFreq_value-1)/(2*BAFreq_value-BAFreq_value*(nA_value+nB_value)-1+nA_value)
   return( rho_value )
   
 }
diff --git a/R/fitcopynumber.R b/R/fitcopynumber.R
index 2041685..99614d0 100644
--- a/R/fitcopynumber.R
+++ b/R/fitcopynumber.R
@@ -57,13 +57,13 @@ fit.copy.number = function(samplename, outputfile.prefix, inputfile.baf.segmente
   raw.BAF.data = raw.BAF.data[!is.na(raw.BAF.data[,3]),]
   raw.logR.data = raw.logR.data[!is.na(raw.logR.data[,3]),]
   
-  # Chromosome names are sometimes 'chr1', etc.
-  if(length(grep("chr",raw.BAF.data[1,1]))>0){
-    raw.BAF.data[,1] = gsub("chr","",raw.BAF.data[,1])
-  }
-  if(length(grep("chr",raw.logR.data[1,1]))>0){
-    raw.logR.data[,1] = gsub("chr","",raw.logR.data[,1])
-  }
+  ## Chromosome names are sometimes 'chr1', etc.
+  #if(length(grep("chr",raw.BAF.data[1,1]))>0){
+  #  raw.BAF.data[,1] = gsub("chr","",raw.BAF.data[,1])
+  #}
+  #if(length(grep("chr",raw.logR.data[1,1]))>0){
+  #  raw.logR.data[,1] = gsub("chr","",raw.logR.data[,1])
+  #}
   
   BAF.data = NULL
   logR.data = NULL
@@ -222,9 +222,9 @@ callSubclones = function(sample.name, baf.segmented.file, logr.file, rho.psi.fil
   }
 
   # Chromosome names are sometimes 'chr1', etc.
-  if(length(grep("chr",LogRvals[1,1]))>0){
-	      LogRvals[,1] = gsub("chr","",LogRvals[,1])
-  }
+  #if(length(grep("chr",LogRvals[1,1]))>0){
+#	      LogRvals[,1] = gsub("chr","",LogRvals[,1])
+  #}
   
   ctrans = c(1:length(chr_names))
   names(ctrans) = chr_names
@@ -267,7 +267,7 @@ callSubclones = function(sample.name, baf.segmented.file, logr.file, rho.psi.fil
   ################################################################################################
   # Collapse the BAFsegmented into breakpoints to be used in plotting
   segment_breakpoints = collapse_bafsegmented_to_segments(BAFvals)
-  if (!is.null(sv_breakpoints_file) & !sv_breakpoints_file=="NA") {
+  if (!is.null(sv_breakpoints_file) & !ifelse(is.null(sv_breakpoints_file), TRUE, sv_breakpoints_file=="NA") & !ifelse(is.null(sv_breakpoints_file), TRUE, is.na(sv_breakpoints_file))) {
     svs = read.table(sv_breakpoints_file, header=T, stringsAsFactors=F)
   }
   
@@ -277,7 +277,7 @@ callSubclones = function(sample.name, baf.segmented.file, logr.file, rho.psi.fil
     #if no points to plot, skip
     if (length(pos)==0) { next }
     
-    if (!is.null(sv_breakpoints_file) & !sv_breakpoints_file=="NA") {
+    if (!is.null(sv_breakpoints_file) & !ifelse(is.null(sv_breakpoints_file), TRUE, sv_breakpoints_file=="NA") & !ifelse(is.null(sv_breakpoints_file), TRUE, is.na(sv_breakpoints_file))) {
       svs_pos = svs[svs$chromosome==chr,]$position / 1000000
     } else {
       svs_pos = NULL
@@ -609,7 +609,9 @@ merge_segments = function(subclones, bafsegmented, logR, rho, psi, platform_gamm
       
       # Perform t-test on the BAFphased
       if (sum(!is.na(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i-1]])) > 10 & 
-          sum(!is.na(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]])) > 10) {
+          sum(!is.na(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]])) > 10 &
+          sum(!is.na(logR[logR$Chromosome==subclones$chr[i-1] & logR$Position>=subclones$startpos[i-1] & logR$Position<=subclones$endpos[i-1], 3])) > 10 &
+          sum(!is.na(logR[logR$Chromosome==subclones$chr[i] & logR$Position>=subclones$startpos[i] & logR$Position<=subclones$endpos[i], 3])) > 10) {
         baf_significant = t.test(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i-1]], 
                                  bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]])$p.value < 0.05
         logr_significant = t.test(logR[logR$Chromosome==subclones$chr[i-1] & logR$Position>=subclones$startpos[i-1] & logR$Position<=subclones$endpos[i-1], 3],
diff --git a/R/impute.R b/R/impute.R
index 01e542a..29356a4 100644
--- a/R/impute.R
+++ b/R/impute.R
@@ -112,3 +112,97 @@ combine.impute.output = function(inputfile.prefix, outputfile, is.male, imputein
   impute.output = concatenateImputeFiles(inputfile.prefix, all.boundaries)
   write.table(impute.output, file=outputfile, row.names=F, col.names=F, quote=F, sep=" ")
 }
+
+
+#' Construct haplotypes for a chromosome
+#' 
+#' This function takes preprocessed data and performs haplotype reconstruction.
+#' 
+#' @param chrom The chromosome for which to reconstruct haplotypes
+#' @param tumourname Identifier of the tumour, used to match data files on disk
+#' @param normalname Identifier of the normal, used to match data files on disk
+#' @param ismale Boolean, set to TRUE if the sample is male
+#' @param imputeinfofile Full path to the imputeinfo reference file
+#' @param problemloci Full path to the problematic loci reference file
+#' @param impute_exe Path to the impute executable (can be found if its in $PATH)
+#' @param min_normal_depth Minimal depth in the matched normal required for a SNP to be used
+#' @param chrom_names A vector containing the names of chromosomes to be included
+#' @param snp6_reference_info_file SNP6 only parameter Default: NA
+#' @param heterozygousFilter SNP6 only parameter Default: NA
+#' @author sd11
+#' @export
+run_haplotyping = function(chrom, tumourname, normalname, ismale, imputeinfofile, problemloci, impute_exe, min_normal_depth, chrom_names,
+                           snp6_reference_info_file=NA, heterozygousFilter=NA) {
+  
+  if (file.exists(paste(tumourname, "_alleleFrequencies_chr", chrom, ".txt", sep=""))) {
+    generate.impute.input.wgs(chrom=chrom,
+                              tumour.allele.counts.file=paste(tumourname,"_alleleFrequencies_chr", chrom, ".txt", sep=""),
+                              normal.allele.counts.file=paste(normalname,"_alleleFrequencies_chr", chrom, ".txt", sep=""),
+                              output.file=paste(tumourname, "_impute_input_chr", chrom, ".txt", sep=""),
+                              imputeinfofile=imputeinfofile,
+                              is.male=ismale,
+                              problemLociFile=problemloci,
+                              useLociFile=NA)
+  } else {
+    generate.impute.input.snp6(infile.germlineBAF=paste(tumourname, "_germlineBAF.tab", sep=""),
+                               infile.tumourBAF=paste(tumourname, "_mutantBAF.tab", sep=""),
+                               outFileStart=paste(tumourname, "_impute_input_chr", sep=""),
+                               chrom=chrom,
+                               chr_names=chrom_names,
+                               problemLociFile=problemloci,
+                               snp6_reference_info_file=snp6_reference_info_file,
+                               imputeinfofile=imputeinfofile,
+                               is.male=ismale,
+                               heterozygousFilter=heterozygousFilter)
+  }
+
+  # Run impute on the files
+  run.impute(inputfile=paste(tumourname, "_impute_input_chr", chrom, ".txt", sep=""),
+             outputfile.prefix=paste(tumourname, "_impute_output_chr", chrom, ".txt", sep=""),
+             is.male=ismale,
+             imputeinfofile=imputeinfofile,
+             impute.exe=impute_exe,
+             region.size=5000000,
+             chrom=chrom)
+
+  # As impute runs in windows across a chromosome we need to assemble the output
+  combine.impute.output(inputfile.prefix=paste(tumourname, "_impute_output_chr", chrom, ".txt", sep=""),
+                        outputfile=paste(tumourname, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
+                        is.male=ismale,
+                        imputeinfofile=imputeinfofile,
+                        region.size=5000000,
+                        chrom=chrom)
+
+  # If an allele counts file exists we assume this is a WGS sample and run the corresponding step, otherwise it must be SNP6
+  print(paste(tumourname, "_alleleFrequencies_chr", chrom, ".txt", sep=""))
+  print(file.exists(paste(tumourname, "_alleleFrequencies_chr", chrom, ".txt", sep="")))
+  if (file.exists(paste(tumourname, "_alleleFrequencies_chr", chrom, ".txt", sep=""))) {
+    # WGS - Transform the impute output into haplotyped BAFs
+    GetChromosomeBAFs(chrom=chrom,
+                      SNP_file=paste(tumourname, "_alleleFrequencies_chr", chrom, ".txt", sep=""),
+                      haplotypeFile=paste(tumourname, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
+                      samplename=tumourname,
+                      outfile=paste(tumourname, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+                      chr_names=chrom_names,
+                      minCounts=min_normal_depth)
+  } else {
+    print("SNP6 get BAFs")
+    # SNP6 - Transform the impute output into haplotyped BAFs
+    GetChromosomeBAFs_SNP6(chrom=chrom,
+                           alleleFreqFile=paste(tumourname, "_impute_input_chr", chrom, "_withAlleleFreq.csv", sep=""),
+                           haplotypeFile=paste(tumourname, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
+                           samplename=tumourname,
+                           outputfile=paste(tumourname, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+                           chr_names=chrom_names)
+  }
+
+  # Plot what we have until this point
+  plot.haplotype.data(haplotyped.baf.file=paste(tumourname, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+                      imageFileName=paste(tumourname,"_chr",chrom,"_heterozygousData.png",sep=""),
+                      samplename=tumourname,
+                      chrom=chrom,
+                      chr_names=chrom_names)
+
+  # Cleanup temp Impute output
+  unlink(paste(tumourname, "_impute_output_chr", chrom, "*K.txt*", sep=""))
+}
diff --git a/R/plotting.R b/R/plotting.R
index 699c48b..fb3c947 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -484,7 +484,7 @@ allele_ratio_plot = function(samplename, bafsegmented, logrsegmented, outputfile
   
   print("Plotting..")
   if (platform == "WGS") {
-  	sel = allelecounts[seq(1, nrow(allelecounts), 100),]
+  	sel = seq(1, nrow(allelecounts), 100)
   } else {
 	sel = rep(T, nrow(allelecounts))
   }
diff --git a/R/prepare_SNP6.R b/R/prepare_SNP6.R
index ce9512b..76e4eae 100644
--- a/R/prepare_SNP6.R
+++ b/R/prepare_SNP6.R
@@ -421,3 +421,47 @@ infer_gender_birdseed = function(birdseed_report_file) {
   z = read.table(birdseed_report_file, header=T)
   return(as.character(z$em.cluster.chrX.het.contrast_gender))
 }
+
+
+#' Prepare SNP6 data for haplotype construction
+#' 
+#' This function performs part of the Battenberg SNP6 pipeline: Extract BAF and logR from the CEL files 
+#' and performing GC content correction.
+#'
+#' @param tumour_cel_file Full path to a CEL file containing the tumour raw data
+#' @param normal_cel_file Full path to a CEL file containing the normal raw data
+#' @param tumourname Identifier to be used for tumour output files
+#' @param chrom_names A vector containing the names of chromosomes to be included
+#' @param snp6_reference_info_file Full path to the SNP6 reference info file
+#' @param apt.probeset.genotype.exe Full path to the apt.probeset.genotype executable (Default: expected in $PATH)
+#' @param apt.probeset.summarize.exe Full path to the apt.probeset.summarize executable (Default: expected in $PATH)
+#' @param norm.geno.clust.exe  Full path to the norm.geno.clust.exe executable (Default: expected in $PATH)
+#' @param birdseed_report_file Name of the birdseed output file. This is a temp output file of one of the internally called functions of which the name cannot be defined. Don't change this parameter. (Default: birdseed.report.txt)
+#' @author sd11
+#' @export
+prepare_snp6 = function(tumour_cel_file, normal_cel_file, tumourname, chrom_names, 
+                        snp6_reference_info_file, apt.probeset.genotype.exe="apt-probeset-genotype", 
+                        apt.probeset.summarize.exe="apt-probeset-summarize", norm.geno.clust.exe="normalize_affy_geno_cluster.pl",
+                        birdseed_report_file="birdseed.report.txt") {
+  
+  # Extract the LogR and BAF from both tumour and normal cel files.
+  cel2baf.logr(normal_cel_file=normal_cel_file,
+               tumour_cel_file=tumour_cel_file,
+               output_file=paste(tumourname, "_lrr_baf.txt", sep=""),
+               snp6_reference_info_file=snp6_reference_info_file,
+               apt.probeset.genotype.exe=apt.probeset.genotype.exe,
+               apt.probeset.summarize.exe=apt.probeset.summarize.exe,
+               norm.geno.clust.exe=norm.geno.clust.exe)
+
+  gc.correct(samplename=tumourname,
+             infile.logr.baf=paste(tumourname, "_lrr_baf.txt", sep=""),
+             outfile.tumor.LogR=paste(tumourname, "_mutantLogR.tab", sep=""),
+             outfile.tumor.BAF=paste(tumourname, "_mutantBAF.tab", sep=""),
+             outfile.normal.LogR=paste(tumourname, "_germlineLogR.tab", sep=""),
+             outfile.normal.BAF=paste(tumourname, "_germlineBAF.tab", sep=""),
+             outfile.probeBAF=paste(tumourname, "_probeBAF.txt", sep=""),
+             snp6_reference_info_file=snp6_reference_info_file,
+             birdseed_report_file=birdseed_report_file,
+             chr_names=chrom_names)
+  
+}
diff --git a/R/prepare_wgs.R b/R/prepare_wgs.R
index 61e56bb..7db9779 100644
--- a/R/prepare_wgs.R
+++ b/R/prepare_wgs.R
@@ -35,7 +35,7 @@ getAlleleCounts = function(bam.file, output.file, g1000.loci, min.base.qual=20,
 #' @param g1000file.prefix Prefix to where 1000 Genomes reference files can be found.
 #' @param minCounts Integer, minimum depth required for a SNP to be included (optional, default=NA).
 #' @param samplename String, name of the sample (optional, default=sample1).
-#' @param seed A seed to be set
+#' @param seed A seed to be set for when randomising the alleles.
 #' @author dw9, sd11
 #' @export
 getBAFsAndLogRs = function(tumourAlleleCountsFile.prefix, normalAlleleCountsFile.prefix, figuresFile.prefix, BAFnormalFile, BAFmutantFile, logRnormalFile, logRmutantFile, combinedAlleleCountsFile, chr_names, g1000file.prefix, minCounts=NA, samplename="sample1", seed=as.integer(Sys.time())) {
@@ -44,11 +44,19 @@ getBAFsAndLogRs = function(tumourAlleleCountsFile.prefix, normalAlleleCountsFile
   
   input_data = concatenateAlleleCountFiles(tumourAlleleCountsFile.prefix, ".txt", length(chr_names))
   normal_input_data = concatenateAlleleCountFiles(normalAlleleCountsFile.prefix, ".txt", length(chr_names))
-  allele_data = concatenateG1000SnpFiles(g1000file.prefix, ".txt", length(chr_names))
+  allele_data = concatenateG1000SnpFiles(g1000file.prefix, ".txt", length(chr_names), chr_names)
   
-  #save(file=paste(samplename, "_BAFLogR_concatenated_input.RData", sep=""), input_data, normal_input_data, allele_data)
-  #load(paste(samplename, "_BAFLogR_concatenated_input.RData", sep=""))
+  # Synchronise all the data frames
+  chrpos_allele = paste(allele_data[,1], "_", allele_data[,2], sep="")
+  chrpos_normal = paste(normal_input_data[,1], "_", normal_input_data[,2], sep="")
+  chrpos_tumour = paste(input_data[,1], "_", input_data[,2], sep="")
+  matched_data = Reduce(intersect, list(chrpos_allele, chrpos_normal, chrpos_tumour))
+
+  allele_data = allele_data[chrpos_allele %in% matched_data,]
+  normal_input_data = normal_input_data[chrpos_tumour %in% matched_data,]
+  input_data = input_data[chrpos_tumour %in% matched_data,]
   
+  # Clean up and reduce amount of unneeded data
   names(input_data)[1] = "CHR"
   names(normal_input_data)[1] = "CHR"
   
@@ -57,11 +65,11 @@ getBAFsAndLogRs = function(tumourAlleleCountsFile.prefix, normalAlleleCountsFile
   
   # Obtain depth for both alleles for tumour and normal
   len = nrow(normal_data)
-  normCount1 = normal_data[cbind(1:len,allele_data[,2])]
-  normCount2 = normal_data[cbind(1:len,allele_data[,3])]
+  normCount1 = normal_data[cbind(1:len,allele_data[,3])]
+  normCount2 = normal_data[cbind(1:len,allele_data[,4])]
   totalNormal = normCount1 + normCount2
-  mutCount1 = mutant_data[cbind(1:len,allele_data[,2])]
-  mutCount2 = mutant_data[cbind(1:len,allele_data[,3])]
+  mutCount1 = mutant_data[cbind(1:len,allele_data[,3])]
+  mutCount2 = mutant_data[cbind(1:len,allele_data[,4])]
   totalMutant = mutCount1 + mutCount2
 
   # Clean up a few unused variables to save some memory
@@ -76,10 +84,6 @@ getBAFsAndLogRs = function(tumourAlleleCountsFile.prefix, normalAlleleCountsFile
     
     totalNormal = totalNormal[indices]
     totalMutant = totalMutant[indices]
-    # These variables are cleaned
-    #normal_data = normal_data[indices,]
-    #mutant_data = mutant_data[indices,]
-    
     normCount1 = normCount1[indices]
     normCount2 = normCount2[indices]
     mutCount1 = mutCount1[indices]
@@ -118,7 +122,6 @@ getBAFsAndLogRs = function(tumourAlleleCountsFile.prefix, normalAlleleCountsFile
   write.table(alleleCounts, file=combinedAlleleCountsFile, row.names=F, quote=F, sep="\t")
   
   # Plot the raw data using ASCAT
-  #ascat.bc = ascat.loadData(logRmutantFile, BAFmutantFile, logRnormalFile, BAFnormalFile)
   # Manually create an ASCAT object, which saves reading in the above files again
   SNPpos = germline.BAF[,c("Chromosome", "Position")]
   ch = list()
@@ -136,7 +139,7 @@ getBAFsAndLogRs = function(tumourAlleleCountsFile.prefix, normalAlleleCountsFile
                   Tumor_LogR_segmented=NULL, Tumor_BAF_segmented=NULL, Tumor_counts=NULL, Germline_counts=NULL,
                   SNPpos=tumor.LogR[,1:2], chrs=chr_names, samples=c(samplename), chrom=split_genome(tumor.LogR[,1:2]),
                   ch=ch)
-  # TODO: On PD7422a this produces different plots than before (see streak on chrom 14)
+
   ASCAT::ascat.plotRawData(ascat.bc) #, parentDir=figuresFile.prefix)
 }
 
@@ -248,6 +251,7 @@ gc.correct.wgs = function(Tumour_LogR_file, outfile, correlations_outfile, gc_co
 
   Tumor_LogR = read.table(Tumour_LogR_file, stringsAsFactors=F, header=T)
 
+  print("Processing GC content data")
   GC_data = list()
   Tumor_LogR_new = list()
   for (chrindex in 1:length(chrom_names)) {
@@ -263,15 +267,15 @@ gc.correct.wgs = function(Tumour_LogR_file, outfile, correlations_outfile, gc_co
   Tumor_LogR = do.call(rbind, Tumor_LogR_new)
   rm(Tumor_LogR_new)
   GC_data = do.call(rbind, GC_data)
-  
+ 
   flag_nona = is.finite(Tumor_LogR[,3])
   corr = cor(GC_data[flag_nona, 3:ncol(GC_data)], Tumor_LogR[flag_nona,3], use="complete.obs")
   length = nrow(Tumor_LogR)
   
   corr = apply(corr, 1, function(x) sum(abs(x*length))/sum(length))
   index_1M = c(which(names(corr)=="X1M"), which(names(corr)=="X1Mb"))
-  maxGCcol_short = which(corr[1:(index_1M-1)]==max(corr[1:(index_1M-1)]))
-  maxGCcol_long = which(corr[index_1M:length(corr)]==max(corr[index_1M:length(corr)]))
+  maxGCcol_short = which(corr[1:(index_1M-1)]==max(corr[1:(index_1M-1)]))[1]
+  maxGCcol_long = which(corr[index_1M:length(corr)]==max(corr[index_1M:length(corr)]))[1]
   maxGCcol_long = maxGCcol_long+(index_1M-1)
   
   cat("weighted correlation: ",paste(names(corr),format(corr,digits=2), ";"),"\n")   
@@ -315,3 +319,71 @@ gc.correct.wgs = function(Tumour_LogR_file, outfile, correlations_outfile, gc_co
 	  write.table(corr, file=gsub(".txt", "_afterCorrection.txt", correlations_outfile), sep="\t", quote=F, row.names=F)
   }
 }
+
+#' Prepare WGS data for haplotype construction
+#' 
+#' This function performs part of the Battenberg WGS pipeline: Counting alleles, constructing BAF and logR 
+#' and performing GC content correction.
+#' 
+#' @param chrom_names A vector containing the names of chromosomes to be included
+#' @param tumourbam Full path to the tumour BAM file
+#' @param normalbam Full path to the normal BAM file
+#' @param tumourname Identifier to be used for tumour output files
+#' @param normalname Identifier to be used for normal output files
+#' @param g1000allelesprefix Prefix path to the 1000 Genomes alleles reference files
+#' @param g1000prefix Prefix path to the 1000 Genomes SNP reference files
+#' @param gccorrectprefix Prefix path to GC content reference data
+#' @param min_base_qual Minimum base quality required for a read to be counted
+#' @param min_map_qual Minimum mapping quality required for a read to be counted
+#' @param allelecounter_exe Path to the allele counter executable (can be found in $PATH)
+#' @param min_normal_depth Minimum depth required in the normal for a SNP to be included
+#' @param nthreads The number of paralel processes to run
+#' @param skip_allele_counting Flag, set to TRUE if allele counting is already complete (files are expected in the working directory on disk)
+#' @author sd11
+#' @export
+prepare_wgs = function(chrom_names, tumourbam, normalbam, tumourname, normalname, g1000allelesprefix, g1000prefix, gccorrectprefix, 
+                       min_base_qual, min_map_qual, allelecounter_exe, min_normal_depth, nthreads, skip_allele_counting) {
+  
+  requireNamespace("foreach")
+  requireNamespace("doParallel")
+  requireNamespace("parallel")
+  
+  if (!skip_allele_counting) {
+    # Obtain allele counts for 1000 Genomes locations for both tumour and normal
+    foreach::foreach(i=1:length(chrom_names)) %dopar% {
+      getAlleleCounts(bam.file=tumourbam,
+                      output.file=paste(tumourname,"_alleleFrequencies_chr", i, ".txt", sep=""),
+                      g1000.loci=paste(g1000allelesprefix, i, ".txt", sep=""),
+                      min.base.qual=min_base_qual,
+                      min.map.qual=min_map_qual,
+                      allelecounter.exe=allelecounter_exe)
+  
+      getAlleleCounts(bam.file=normalbam,
+                      output.file=paste(normalname,"_alleleFrequencies_chr", i, ".txt",  sep=""),
+                      g1000.loci=paste(g1000allelesprefix, i, ".txt", sep=""),
+                      min.base.qual=min_base_qual,
+                      min.map.qual=min_map_qual,
+                      allelecounter.exe=allelecounter_exe)
+    }
+  }
+
+  # Obtain BAF and LogR from the raw allele counts
+  getBAFsAndLogRs(tumourAlleleCountsFile.prefix=paste(tumourname,"_alleleFrequencies_chr", sep=""),
+                  normalAlleleCountsFile.prefix=paste(normalname,"_alleleFrequencies_chr", sep=""),
+                  figuresFile.prefix=paste(tumourname, "_", sep=''),
+                  BAFnormalFile=paste(tumourname,"_normalBAF.tab", sep=""),
+                  BAFmutantFile=paste(tumourname,"_mutantBAF.tab", sep=""),
+                  logRnormalFile=paste(tumourname,"_normalLogR.tab", sep=""),
+                  logRmutantFile=paste(tumourname,"_mutantLogR.tab", sep=""),
+                  combinedAlleleCountsFile=paste(tumourname,"_alleleCounts.tab", sep=""),
+                  chr_names=chrom_names,
+                  g1000file.prefix=g1000prefix,
+                  minCounts=min_normal_depth,
+                  samplename=tumourname)
+  # Perform GC correction
+  gc.correct.wgs(Tumour_LogR_file=paste(tumourname,"_mutantLogR.tab", sep=""),
+                 outfile=paste(tumourname,"_mutantLogR_gcCorrected.tab", sep=""),
+                 correlations_outfile=paste(tumourname, "_GCwindowCorrelations.txt", sep=""),
+                 gc_content_file_prefix=gccorrectprefix,
+                 chrom_names=chrom_names)
+}
diff --git a/R/util.R b/R/util.R
index f0c8c31..be77422 100644
--- a/R/util.R
+++ b/R/util.R
@@ -88,16 +88,17 @@ concatenateAlleleCountFiles = function(inputStart, inputEnd, no.chrs) {
 
 #' Function to concatenate 1000 Genomes SNP reference files
 #' @noRd
-concatenateG1000SnpFiles = function(inputStart, inputEnd, no.chrs) {
-  infiles = c()
+concatenateG1000SnpFiles = function(inputStart, inputEnd, no.chrs, chr_names) {
+  data = list()
   for(chrom in 1:no.chrs) {
     filename = paste(inputStart, chrom, inputEnd, sep="")
     # Only add files that exist and have data
     if(file.exists(filename) && file.info(filename)$size>0) {
-      infiles = c(infiles, filename)
+      # infiles = c(infiles, filename)
+      data[[chrom]] = cbind(chromosome=chr_names[chrom], read_table_generic(filename))
     }
   }
-  return(as.data.frame(do.call(rbind, lapply(infiles, FUN=function(x) { read_table_generic(x) }))))
+  return(as.data.frame(do.call(rbind, data)))
 }
 
 ########################################################################################
diff --git a/README.md b/README.md
index 37c4262..b6c9d70 100644
--- a/README.md
+++ b/README.md
@@ -10,32 +10,81 @@ Please visit the [cgpBattenberg page](https://github.com/cancerit/cgpBattenberg)
 
 The instructions below will install the latest stable Battenberg version. Please take this approach only when you'd like to do something not covered by cgpBattenberg.
 
-### Prerequisites
+### Standalone
+
+#### Prerequisites
 
 Installing from Github requires devtools and Battenberg requires readr, RColorBrewer and ASCAT. The pipeline requires doParallel. From the command line run:
 
-  > R -q -e 'source("http://bioconductor.org/biocLite.R"); biocLite(c("devtools", "readr", "doParallel", "ggplot2", "RColorBrewer", "gridExtra", "gtools"));'
-  
-  > R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")'
+```
+R -q -e 'source("http://bioconductor.org/biocLite.R"); biocLite(c("devtools", "readr", "doParallel", "ggplot2", "RColorBrewer", "gridExtra", "gtools"));'
+R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")'
+```
 
-### Installation from Github
+#### Installation from Github
 
 To install Battenberg, run the following from the command line:
 
-  > R -q -e 'devtools::install_github("Wedge-Oxford/battenberg")'
+```
+R -q -e 'devtools::install_github("Wedge-Oxford/battenberg")'
+```
 
-### Required reference files
+#### Required reference files
 
 Battenberg requires a number of reference files that should be downloaded.
 
-  * ftp://ftp.sanger.ac.uk/pub/teams/113/Battenberg/battenberg_impute_1000G_v1.tar.gz
+  * ftp://ftp.sanger.ac.uk/pub/teams/113/Battenberg/battenberg_1000genomesloci2012_v3.tar.gz
+  * ftp://ftp.sanger.ac.uk/pub/teams/113/Battenberg/battenberg_impute_1000G_v3.tar.gz
   * ftp://ftp.sanger.ac.uk/pub/teams/113/Battenberg/probloci_270415.txt.gz
   * ftp://ftp.sanger.ac.uk/pub/teams/113/Battenberg/battenberg_wgs_gc_correction_1000g_v3.tar.gz
   * ftp://ftp.sanger.ac.uk/pub/teams/113/Battenberg/battenberg_snp6_exe.tgz (SNP6 only)
   * ftp://ftp.sanger.ac.uk/pub/teams/113/Battenberg/battenberg_snp6_ref.tgz (SNP6 only)
   
-### Pipeline
+#### Pipeline
 
 Go into inst/example for example WGS and SNP6 R-only pipelines.
   
-  
\ No newline at end of file
+### Docker - experimental
+
+Battenberg can be run inside a Docker container. Please follow the instructions below.
+
+#### Installation
+
+```
+wget https://raw.githubusercontent.com/Wedge-Oxford/battenberg/dev/Dockerfile
+docker build -t battenberg:2.2.8 .
+```
+
+#### Download reference data
+
+To do
+
+#### Run interactively
+
+These commands run the Battenberg pipeline within a Docker container in interactive mode. This command assumes the data is available locally in `$PWD/data/pcawg/HCC1143_ds` and the reference files have been placed in `$PWD/battenberg_reference`
+
+```
+docker run -it -v `pwd`/data/pcawg/HCC1143_ds:/mnt/battenberg/ -v `pwd`/battenberg_reference:/opt/battenberg_reference battenberg:2.2.8
+```
+
+Within the Docker terminal run the pipeline, in this case on the ICGC PCAWG testing data available [here](https://s3-eu-west-1.amazonaws.com/wtsi-pancancer/testdata/HCC1143_ds.tar).
+
+```
+R CMD BATCH '--no-restore-data --no-save --args HCC1143 HCC1143_BL /mnt/battenberg/HCC1143_BL.bam /mnt/battenberg/HCC1143.bam FALSE /mnt/battenberg/' /usr/local/bin/battenberg_wgs.R /mnt/battenberg/battenberg.Rout
+```
+  
+### Building a release
+
+In RStudio: In the Build tab, click Check Package
+
+Then open the NAMESPACE file and edit:
+
+```
+S3method(plot,haplotype.data)
+```  
+
+to:
+
+```
+export(plot.haplotype.data)
+```
diff --git a/inst/example/battenberg_snp6.R b/inst/example/battenberg_snp6.R
index b2ffc39..59a2fdb 100644
--- a/inst/example/battenberg_snp6.R
+++ b/inst/example/battenberg_snp6.R
@@ -4,8 +4,10 @@ NORMALCEL = toString(args[2])
 TUMOURCEL = toString(args[3])
 RUN_DIR = toString(args[4])
 
+NORMALNAME = NA
+SKIP_PREPROCESSING = F
+SKIP_PHASING = F
 library(Battenberg)
-library(doParallel)
 
 ###############################################################################
 # 2017-10-03
@@ -14,7 +16,7 @@ library(doParallel)
 ###############################################################################
 
 # Parallelism parameters
-NTHREADS = 6
+NTHREADS = 8
 
 # General static
 IMPUTEINFOFILE = "/lustre/scratch116/casm/team113/sd11/reference/GenomeFiles/battenberg_impute/impute_info.txt"
@@ -33,6 +35,8 @@ BIRDSEED_REPORT_FILE = "birdseed.report.txt" # No control over the name of this
 PLATFORM_GAMMA = 0.55
 PHASING_GAMMA = 1
 SEGMENTATION_GAMMA = 10
+SEGMENTATIIN_KMIN = 3
+PHASING_KMIN = 1
 CLONALITY_DIST_METRIC = 0
 ASCAT_DIST_METRIC = 1
 MIN_PLOIDY = 1.6 #1.6
@@ -47,156 +51,194 @@ HETEROZYGOUSFILTER = "none"
 
 # Change to work directory and load the chromosome information
 setwd(RUN_DIR)
-chrom_names = get.chrom.names(IMPUTEINFOFILE, TRUE)
 
-# Setup for parallel computing
-clp = makeCluster(NTHREADS)
-registerDoParallel(clp)
-
-# Extract the LogR and BAF from both tumour and normal cel files.
-cel2baf.logr(normal_cel_file=NORMALCEL, 
-             tumour_cel_file=TUMOURCEL, 
-             output_file=paste(TUMOURNAME, "_lrr_baf.txt", sep=""), 
-             snp6_reference_info_file=SNP6_REF_INFO_FILE, 
-             apt.probeset.genotype.exe=APT_PROBESET_GENOTYPE_EXE, 
-             apt.probeset.summarize.exe=APT_PROBESET_SUMMARIZE_EXE, 
-             norm.geno.clust.exe=NORM_GENO_CLUST_EXE)
-
-gc.correct(samplename=TUMOURNAME, 
-           infile.logr.baf=paste(TUMOURNAME, "_lrr_baf.txt", sep=""), 
-           outfile.tumor.LogR=paste(TUMOURNAME, "_mutantLogR.tab", sep=""), 
-           outfile.tumor.BAF=paste(TUMOURNAME, "_mutantBAF.tab", sep=""), 
-           outfile.normal.LogR=paste(TUMOURNAME, "_germlineLogR.tab", sep=""), 
-           outfile.normal.BAF=paste(TUMOURNAME, "_germlineBAF.tab", sep=""), 
-           outfile.probeBAF=paste(TUMOURNAME, "_probeBAF.txt", sep=""),
-           snp6_reference_info_file=SNP6_REF_INFO_FILE,
+battenberg(tumourname=TUMOURNAME, 
+           normalname=NORMALNAME, 
+           tumour_data_file=TUMOURCEL, 
+           normal_data_file=NORMALCEL, 
+           imputeinfofile=IMPUTEINFOFILE, 
+           g1000prefix=G1000PREFIX, 
+           problemloci=PROBLEMLOCI, 
+           data_type="snp6",
+           impute_exe=IMPUTE_EXE,
+           nthreads=NTHREADS,
+           platform_gamma=PLATFORM_GAMMA,
+           phasing_gamma=PHASING_GAMMA,
+           segmentation_gamma=SEGMENTATION_GAMMA,
+           segmentation_kmin=SEGMENTATIIN_KMIN,
+           phasing_kmin=PHASING_KMIN,
+           clonality_dist_metric=CLONALITY_DIST_METRIC,
+           ascat_dist_metric=ASCAT_DIST_METRIC,
+           min_ploidy=MIN_PLOIDY,
+           max_ploidy=MAX_PLOIDY,
+           min_rho=MIN_RHO,
+           min_goodness=MIN_GOODNESS_OF_FIT,
+           uninformative_BAF_threshold=BALANCED_THRESHOLD,
+           calc_seg_baf_option=CALC_SEG_BAF_OPTION,
+           skip_preprocessing=SKIP_PREPROCESSING,
+           skip_phasing=SKIP_PHASING,
+           snp6_reference_info_file=SNP6_REF_INFO_FILE, 
+           apt.probeset.genotype.exe=APT_PROBESET_GENOTYPE_EXE, 
+           apt.probeset.summarize.exe=APT_PROBESET_SUMMARIZE_EXE,
+           norm.geno.clust.exe=NORM_GENO_CLUST_EXE, 
            birdseed_report_file=BIRDSEED_REPORT_FILE,
-           chr_names=chrom_names)
-
-# Infer what the gender is
-gender = infer_gender_birdseed(BIRDSEED_REPORT_FILE)
-is_male = gender == "male"
-chrom_names = get.chrom.names(IMPUTEINFOFILE, is_male)
-
-foreach(chrom=1:length(chrom_names), .export=c("generate.impute.input.snp6","run.impute","combine.impute.output","GetChromosomeBAFs_SNP6","plot.haplotype.data")) %dopar% {
-
-  # Transform input into a format that Impute2 takes
-  generate.impute.input.snp6(infile.germlineBAF=paste(TUMOURNAME, "_germlineBAF.tab", sep=""), 
-                             infile.tumourBAF=paste(TUMOURNAME, "_mutantBAF.tab", sep=""),
-                             outFileStart=paste(TUMOURNAME, "_impute_input_chr", sep=""), 
-                             chrom=chrom, 
-                             chr_names=chrom_names,
-                             problemLociFile=PROBLEMLOCI, 
-                             snp6_reference_info_file=SNP6_REF_INFO_FILE, 
-                             imputeinfofile=IMPUTEINFOFILE,
-                             is.male=is_male,
-                             heterozygousFilter=HETEROZYGOUSFILTER)
-  
-  # Run impute on the files
-  run.impute(inputfile=paste(TUMOURNAME, "_impute_input_chr", chrom, ".txt", sep=""),
-             outputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""),
-             is.male=is_male,
-             imputeinfofile=IMPUTEINFOFILE,
-             chrom=chrom,
-             impute.exe=IMPUTE_EXE)
-  
-  # As impute runs in windows across a chromosome we need to assemble the output
-  combine.impute.output(inputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""),
-                        outputfile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
-                        is.male=is_male,
-                        imputeinfofile=IMPUTEINFOFILE,
-                        region.size=5000000,
-                        chrom=chrom)
-
-  # Transform the impute output into haplotyped BAFs
-  GetChromosomeBAFs_SNP6(chrom=chrom,
-                         alleleFreqFile=paste(TUMOURNAME, "_impute_input_chr", chrom, "_withAlleleFreq.csv", sep=""),
-                         haplotypeFile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
-                         samplename=TUMOURNAME,
-                         outputfile=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
-                         chr_names=chrom_names)
-  
-  # Plot what we have until this point
-  plot.haplotype.data(haplotyped.baf.file=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
-                      imageFileName=paste(TUMOURNAME,"_chr",chrom_names[chrom],"_heterozygousData.png",sep=""), 
-                      samplename=TUMOURNAME, 
-                      chrom=chrom, 
-                      chr_names=chrom_names)
-  
-  # Cleanup temp Impute output
-  unlink(paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt_*K.txt*", sep=""))
-}
-
-# Kill the threads as from here its all single core
-stopCluster(clp)
-
-# Combine all the BAF output into a single file
-combine.baf.files(inputfile.prefix=paste(TUMOURNAME, "_chr", sep=""), 
-                  inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", 
-                  outputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
-                  no.chrs=length(chrom_names))
-
-# Segment the phased and haplotyped BAF data
-# Call segment.baf.phased.sv when SVs are available - not relevant for SNP6 data
-segment.baf.phased(samplename=TUMOURNAME,
-                   inputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""), 
-                   outputfile=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""),
-                   gamma=SEGMENTATION_GAMMA,
-                   phasegamma=PHASING_GAMMA,
-                   kmin=3,
-                   phasekmin=1,
-                   calc_seg_baf_option=CALC_SEG_BAF_OPTION)
-
-# Fit a clonal copy number profile
-fit.copy.number(samplename=TUMOURNAME,
-                outputfile.prefix=paste(TUMOURNAME, "_", sep=""),
-                inputfile.baf.segmented=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
-                inputfile.baf=paste(TUMOURNAME,"_mutantBAF.tab", sep=""), 
-                inputfile.logr=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
-                dist_choice=CLONALITY_DIST_METRIC, 
-                ascat_dist_choice=ASCAT_DIST_METRIC, 
-                min.ploidy=MIN_PLOIDY, 
-                max.ploidy=MAX_PLOIDY, 
-                min.rho=MIN_RHO, 
-                max.rho=MAX_RHO,
-                min.goodness=MIN_GOODNESS_OF_FIT, 
-                uninformative_BAF_threshold=BALANCED_THRESHOLD, 
-                gamma_param=PLATFORM_GAMMA, 
-                use_preset_rho_psi=F, 
-                preset_rho=NA, 
-                preset_psi=NA, 
-                read_depth=30)
-
-# Go over all segments, determine which segements are a mixture of two states and fit a second CN state
-callSubclones(sample.name=TUMOURNAME, 
-              baf.segmented.file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
-              logr.file=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
-              rho.psi.file=paste(TUMOURNAME, "_rho_and_psi.txt",sep=""), 
-              output.file=paste(TUMOURNAME,"_subclones.txt", sep=""), 
-              output.figures.prefix=paste(TUMOURNAME,"_subclones_chr", sep=""), 
-              output.gw.figures.prefix=paste(TUMOURNAME,"_BattenbergProfile", sep=""),
-              masking_output_file=paste(TUMOURNAME,"_segment_masking_details.txt", sep=""),
-              chr_names=chrom_names, 
-              gamma=PLATFORM_GAMMA, 
-              segmentation.gamma=NA, 
-              siglevel=0.05, 
-              maxdist=0.01, 
-              noperms=1000,
-              max_allowed_state=250, 
-              sv_breakpoints_file="NA",
-              calc_seg_baf_option=CALC_SEG_BAF_OPTION)
-
-# Make some post-hoc plots
-make_posthoc_plots(samplename=TUMOURNAME, 
-                   logr_file=paste(TUMOURNAME, "_mutantLogR.tab", sep=""), 
-                   subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""), 
-                   rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""), 
-                   bafsegmented_file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
-                   logrsegmented_file=paste(TUMOURNAME, ".logRsegmented.txt", sep=""), 
-                   allelecounts_file=NULL)
-
-# Save refit suggestions for a future rerun
-cnfit_to_refit_suggestions(samplename=TUMOURNAME, 
-                           subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""),
-                           rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""),
-                           gamma_param=PLATFORM_GAMMA)
+           heterozygousFilter=HETEROZYGOUSFILTER)
+
+
+
+
+# # Change to work directory and load the chromosome information
+# setwd(RUN_DIR)
+# chrom_names = get.chrom.names(IMPUTEINFOFILE, TRUE)
+# 
+# # Setup for parallel computing
+# clp = makeCluster(NTHREADS)
+# registerDoParallel(clp)
+# 
+# # Extract the LogR and BAF from both tumour and normal cel files.
+# cel2baf.logr(normal_cel_file=NORMALCEL, 
+#              tumour_cel_file=TUMOURCEL, 
+#              output_file=paste(TUMOURNAME, "_lrr_baf.txt", sep=""), 
+#              snp6_reference_info_file=SNP6_REF_INFO_FILE, 
+#              apt.probeset.genotype.exe=APT_PROBESET_GENOTYPE_EXE, 
+#              apt.probeset.summarize.exe=APT_PROBESET_SUMMARIZE_EXE, 
+#              norm.geno.clust.exe=NORM_GENO_CLUST_EXE)
+# 
+# gc.correct(samplename=TUMOURNAME, 
+#            infile.logr.baf=paste(TUMOURNAME, "_lrr_baf.txt", sep=""), 
+#            outfile.tumor.LogR=paste(TUMOURNAME, "_mutantLogR.tab", sep=""), 
+#            outfile.tumor.BAF=paste(TUMOURNAME, "_mutantBAF.tab", sep=""), 
+#            outfile.normal.LogR=paste(TUMOURNAME, "_germlineLogR.tab", sep=""), 
+#            outfile.normal.BAF=paste(TUMOURNAME, "_germlineBAF.tab", sep=""), 
+#            outfile.probeBAF=paste(TUMOURNAME, "_probeBAF.txt", sep=""),
+#            snp6_reference_info_file=SNP6_REF_INFO_FILE,
+#            birdseed_report_file=BIRDSEED_REPORT_FILE,
+#            chr_names=chrom_names)
+# 
+# # Infer what the gender is
+# gender = infer_gender_birdseed(BIRDSEED_REPORT_FILE)
+# is_male = gender == "male"
+# chrom_names = get.chrom.names(IMPUTEINFOFILE, is_male)
+# 
+# foreach(chrom=1:length(chrom_names), .export=c("generate.impute.input.snp6","run.impute","combine.impute.output","GetChromosomeBAFs_SNP6","plot.haplotype.data")) %dopar% {
+# 
+#   # Transform input into a format that Impute2 takes
+#   generate.impute.input.snp6(infile.germlineBAF=paste(TUMOURNAME, "_germlineBAF.tab", sep=""), 
+#                              infile.tumourBAF=paste(TUMOURNAME, "_mutantBAF.tab", sep=""),
+#                              outFileStart=paste(TUMOURNAME, "_impute_input_chr", sep=""), 
+#                              chrom=chrom, 
+#                              chr_names=chrom_names,
+#                              problemLociFile=PROBLEMLOCI, 
+#                              snp6_reference_info_file=SNP6_REF_INFO_FILE, 
+#                              imputeinfofile=IMPUTEINFOFILE,
+#                              is.male=is_male,
+#                              heterozygousFilter=HETEROZYGOUSFILTER)
+#   
+#   # Run impute on the files
+#   run.impute(inputfile=paste(TUMOURNAME, "_impute_input_chr", chrom, ".txt", sep=""),
+#              outputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""),
+#              is.male=is_male,
+#              imputeinfofile=IMPUTEINFOFILE,
+#              chrom=chrom,
+#              impute.exe=IMPUTE_EXE)
+#   
+#   # As impute runs in windows across a chromosome we need to assemble the output
+#   combine.impute.output(inputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""),
+#                         outputfile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
+#                         is.male=is_male,
+#                         imputeinfofile=IMPUTEINFOFILE,
+#                         region.size=5000000,
+#                         chrom=chrom)
+# 
+#   # Transform the impute output into haplotyped BAFs
+#   GetChromosomeBAFs_SNP6(chrom=chrom,
+#                          alleleFreqFile=paste(TUMOURNAME, "_impute_input_chr", chrom, "_withAlleleFreq.csv", sep=""),
+#                          haplotypeFile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
+#                          samplename=TUMOURNAME,
+#                          outputfile=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+#                          chr_names=chrom_names)
+#   
+#   # Plot what we have until this point
+#   plot.haplotype.data(haplotyped.baf.file=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+#                       imageFileName=paste(TUMOURNAME,"_chr",chrom_names[chrom],"_heterozygousData.png",sep=""), 
+#                       samplename=TUMOURNAME, 
+#                       chrom=chrom, 
+#                       chr_names=chrom_names)
+#   
+#   # Cleanup temp Impute output
+#   unlink(paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt_*K.txt*", sep=""))
+# }
+# 
+# # Kill the threads as from here its all single core
+# stopCluster(clp)
+# 
+# # Combine all the BAF output into a single file
+# combine.baf.files(inputfile.prefix=paste(TUMOURNAME, "_chr", sep=""), 
+#                   inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", 
+#                   outputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+#                   no.chrs=length(chrom_names))
+# 
+# # Segment the phased and haplotyped BAF data
+# # Call segment.baf.phased.sv when SVs are available - not relevant for SNP6 data
+# segment.baf.phased(samplename=TUMOURNAME,
+#                    inputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""), 
+#                    outputfile=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""),
+#                    gamma=SEGMENTATION_GAMMA,
+#                    phasegamma=PHASING_GAMMA,
+#                    kmin=3,
+#                    phasekmin=1,
+#                    calc_seg_baf_option=CALC_SEG_BAF_OPTION)
+# 
+# # Fit a clonal copy number profile
+# fit.copy.number(samplename=TUMOURNAME,
+#                 outputfile.prefix=paste(TUMOURNAME, "_", sep=""),
+#                 inputfile.baf.segmented=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
+#                 inputfile.baf=paste(TUMOURNAME,"_mutantBAF.tab", sep=""), 
+#                 inputfile.logr=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
+#                 dist_choice=CLONALITY_DIST_METRIC, 
+#                 ascat_dist_choice=ASCAT_DIST_METRIC, 
+#                 min.ploidy=MIN_PLOIDY, 
+#                 max.ploidy=MAX_PLOIDY, 
+#                 min.rho=MIN_RHO, 
+#                 max.rho=MAX_RHO,
+#                 min.goodness=MIN_GOODNESS_OF_FIT, 
+#                 uninformative_BAF_threshold=BALANCED_THRESHOLD, 
+#                 gamma_param=PLATFORM_GAMMA, 
+#                 use_preset_rho_psi=F, 
+#                 preset_rho=NA, 
+#                 preset_psi=NA, 
+#                 read_depth=30)
+# 
+# # Go over all segments, determine which segements are a mixture of two states and fit a second CN state
+# callSubclones(sample.name=TUMOURNAME, 
+#               baf.segmented.file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
+#               logr.file=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
+#               rho.psi.file=paste(TUMOURNAME, "_rho_and_psi.txt",sep=""), 
+#               output.file=paste(TUMOURNAME,"_subclones.txt", sep=""), 
+#               output.figures.prefix=paste(TUMOURNAME,"_subclones_chr", sep=""), 
+#               output.gw.figures.prefix=paste(TUMOURNAME,"_BattenbergProfile", sep=""),
+#               masking_output_file=paste(TUMOURNAME,"_segment_masking_details.txt", sep=""),
+#               chr_names=chrom_names, 
+#               gamma=PLATFORM_GAMMA, 
+#               segmentation.gamma=NA, 
+#               siglevel=0.05, 
+#               maxdist=0.01, 
+#               noperms=1000,
+#               max_allowed_state=250, 
+#               sv_breakpoints_file="NA",
+#               calc_seg_baf_option=CALC_SEG_BAF_OPTION)
+# 
+# # Make some post-hoc plots
+# make_posthoc_plots(samplename=TUMOURNAME, 
+#                    logr_file=paste(TUMOURNAME, "_mutantLogR.tab", sep=""), 
+#                    subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""), 
+#                    rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""), 
+#                    bafsegmented_file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
+#                    logrsegmented_file=paste(TUMOURNAME, ".logRsegmented.txt", sep=""), 
+#                    allelecounts_file=NULL)
+# 
+# # Save refit suggestions for a future rerun
+# cnfit_to_refit_suggestions(samplename=TUMOURNAME, 
+#                            subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""),
+#                            rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""),
+#                            gamma_param=PLATFORM_GAMMA)
diff --git a/inst/example/battenberg_wgs.R b/inst/example/battenberg_wgs.R
index 97daeaa..8fc2cdc 100644
--- a/inst/example/battenberg_wgs.R
+++ b/inst/example/battenberg_wgs.R
@@ -6,17 +6,20 @@ TUMOURBAM = toString(args[4])
 IS.MALE = as.logical(args[5])
 RUN_DIR = toString(args[6])
 
+SKIP_ALLELECOUNTING = F
+SKIP_PREPROCESSING = F
+SKIP_PHASING = F
+
 library(Battenberg)
-library(doParallel)
 
 ###############################################################################
-# 2017-10-03
-# A pure R Battenberg v2.2.7 WGS pipeline implementation.
+# 2017-11-05
+# A pure R Battenberg v2.2.8 WGS pipeline implementation.
 # sd11@sanger.ac.uk
 ###############################################################################
 
 # Parallelism parameters
-NTHREADS = 6
+NTHREADS = 8
 
 # General static
 IMPUTEINFOFILE = "/lustre/scratch116/casm/team113/sd11/reference/GenomeFiles/battenberg_impute_v3/impute_info.txt"
@@ -29,6 +32,8 @@ IMPUTE_EXE = "impute2"
 PLATFORM_GAMMA = 1
 PHASING_GAMMA = 1
 SEGMENTATION_GAMMA = 10
+SEGMENTATIIN_KMIN = 3
+PHASING_KMIN = 1
 CLONALITY_DIST_METRIC = 0
 ASCAT_DIST_METRIC = 1
 MIN_PLOIDY = 1.6
@@ -45,168 +50,208 @@ CALC_SEG_BAF_OPTION = 1
 ALLELECOUNTER = "alleleCounter"
 PROBLEMLOCI = "/lustre/scratch116/casm/team113/sd11/reference/GenomeFiles/battenberg_probloci/probloci_270415.txt.gz"
 
-chrom_names = get.chrom.names(IMPUTEINFOFILE, IS.MALE)
-
-# Setup for parallel computing
-clp = makeCluster(NTHREADS)
-registerDoParallel(clp)
-
-# Obtain allele counts for 1000 Genomes locations for both tumour and normal
-foreach(i=1:length(chrom_names), .export=c("getAlleleCounts")) %dopar% {
-  getAlleleCounts(bam.file=TUMOURBAM,
-                  output.file=paste(TUMOURNAME,"_alleleFrequencies_chr", i, ".txt", sep=""),
-                  g1000.loci=paste(G1000PREFIX_AC, i, ".txt", sep=""),
-                  min.base.qual=MIN_BASE_QUAL,
-                  min.map.qual=MIN_MAP_QUAL,
-                  allelecounter.exe=ALLELECOUNTER)
-  
-  getAlleleCounts(bam.file=NORMALBAM,
-                  output.file=paste(NORMALNAME,"_alleleFrequencies_chr", i, ".txt",  sep=""),
-                  g1000.loci=paste(G1000PREFIX_AC, i, ".txt", sep=""),
-                  min.base.qual=MIN_BASE_QUAL,
-                  min.map.qual=MIN_MAP_QUAL,
-                  allelecounter.exe=ALLELECOUNTER)
-
-}
-# Obtain BAF and LogR from the raw allele counts
-getBAFsAndLogRs(tumourAlleleCountsFile.prefix=paste(TUMOURNAME,"_alleleFrequencies_chr", sep=""), 
-                normalAlleleCountsFile.prefix=paste(NORMALNAME,"_alleleFrequencies_chr", sep=""),
-                figuresFile.prefix=paste(TUMOURNAME, "_", sep=''),
-                BAFnormalFile=paste(TUMOURNAME,"_normalBAF.tab", sep=""), 
-                BAFmutantFile=paste(TUMOURNAME,"_mutantBAF.tab", sep=""), 
-                logRnormalFile=paste(TUMOURNAME,"_normalLogR.tab", sep=""), 
-                logRmutantFile=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
-                combinedAlleleCountsFile=paste(TUMOURNAME,"_alleleCounts.tab", sep=""),
-                chr_names=chrom_names, 
-                g1000file.prefix=G1000PREFIX, 
-                minCounts=MIN_NORMAL_DEPTH, 
-                samplename=TUMOURNAME)
-# Perform GC correction
-gc.correct.wgs(Tumour_LogR_file=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
-               outfile=paste(TUMOURNAME,"_mutantLogR_gcCorrected.tab", sep=""),
-               correlations_outfile=paste(TUMOURNAME, "_GCwindowCorrelations.txt", sep=""),
-               gc_content_file_prefix=GCCORRECTPREFIX, 
-               chrom_names=chrom_names)
-
-
-# These steps are independent and can be run in parallel. This could be done through multi-threading or splitting these up into separate jobs on a cluster.
-foreach(chrom=1:length(chrom_names), .export=c("generate.impute.input.wgs","run.impute","combine.impute.output","GetChromosomeBAFs","plot.haplotype.data")) %dopar% {
-  print(chrom)
-  # Transform the allele counts into something that impute understands
-  generate.impute.input.wgs(chrom=chrom, 
-                            tumour.allele.counts.file=paste(TUMOURNAME,"_alleleFrequencies_chr", chrom, ".txt", sep=""), 
-                            normal.allele.counts.file=paste(NORMALNAME,"_alleleFrequencies_chr", chrom, ".txt", sep=""), 
-                            output.file=paste(TUMOURNAME, "_impute_input_chr", chrom, ".txt", sep=""), 
-                            imputeinfofile=IMPUTEINFOFILE,
-                            is.male=IS.MALE,
-                            problemLociFile=PROBLEMLOCI, 
-                            useLociFile=NA)
-
-  # Run impute on the files
-  run.impute(inputfile=paste(TUMOURNAME, "_impute_input_chr", chrom, ".txt", sep=""), 
-             outputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""), 
-             is.male=IS.MALE, 
-             imputeinfofile=IMPUTEINFOFILE, 
-             impute.exe=IMPUTE_EXE, 
-             region.size=5000000, 
-             chrom=chrom)
-  
-  # As impute runs in windows across a chromosome we need to assemble the output
-  combine.impute.output(inputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""), 
-                        outputfile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""), 
-                        is.male=IS.MALE, 
-                        imputeinfofile=IMPUTEINFOFILE, 
-                        region.size=5000000, 
-                        chrom=chrom)
-
-  # Transform the impute output into haplotyped BAFs
-  GetChromosomeBAFs(chrom=chrom, 
-                    SNP_file=paste(TUMOURNAME, "_alleleFrequencies_chr", chrom, ".txt", sep=""), 
-                    haplotypeFile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""), 
-                    samplename=TUMOURNAME, 
-                    outfile=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
-                    chr_names=chrom_names, 
-                    minCounts=MIN_NORMAL_DEPTH)
-
-  # Plot what we have until this point
-  plot.haplotype.data(haplotyped.baf.file=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
-                      imageFileName=paste(TUMOURNAME,"_chr",chrom,"_heterozygousData.png",sep=""), 
-                      samplename=TUMOURNAME, 
-                      chrom=chrom, 
-                      chr_names=chrom_names)
-
-  # Cleanup temp Impute output
-  unlink(paste(TUMOURNAME, "_impute_output_chr", chrom, "*K.txt*", sep=""))
-}
-
-# Kill the threads as from here its all single core
-stopCluster(clp)
-
-# Combine all the BAF output into a single file
-combine.baf.files(inputfile.prefix=paste(TUMOURNAME, "_chr", sep=""), 
-                  inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", 
-                  outputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
-                  no.chrs=length(chrom_names))
-
-# Segment the phased and haplotyped BAF data
-segment.baf.phased(samplename=TUMOURNAME,
-                     inputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""), 
-                     outputfile=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""),
-                     gamma=SEGMENTATION_GAMMA,
-                     phasegamma=PHASING_GAMMA,
-                     kmin=3,
-                     phasekmin=1,
-                     calc_seg_baf_option=CALC_SEG_BAF_OPTION)
+# Change to work directory and load the chromosome information
+setwd(RUN_DIR)
 
-# Fit a clonal copy number profile
-fit.copy.number(samplename=TUMOURNAME,
-                outputfile.prefix=paste(TUMOURNAME, "_", sep=""),
-                inputfile.baf.segmented=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
-                inputfile.baf=paste(TUMOURNAME,"_mutantBAF.tab", sep=""), 
-                inputfile.logr=paste(TUMOURNAME,"_mutantLogR_gcCorrected.tab", sep=""), 
-                dist_choice=CLONALITY_DIST_METRIC, 
-                ascat_dist_choice=ASCAT_DIST_METRIC, 
-                min.ploidy=MIN_PLOIDY, 
-                max.ploidy=MAX_PLOIDY, 
-                min.rho=MIN_RHO, 
-                min.goodness=MIN_GOODNESS_OF_FIT, 
-                uninformative_BAF_threshold=BALANCED_THRESHOLD, 
-                gamma_param=PLATFORM_GAMMA, 
-                use_preset_rho_psi=F, 
-                preset_rho=NA, 
-                preset_psi=NA, 
-                read_depth=30)
+battenberg(tumourname=TUMOURNAME, 
+           normalname=NORMALNAME, 
+           tumour_data_file=TUMOURBAM, 
+           normal_data_file=NORMALBAM, 
+           ismale=IS.MALE, 
+           imputeinfofile=IMPUTEINFOFILE, 
+           g1000prefix=G1000PREFIX, 
+           g1000allelesprefix=G1000PREFIX_AC, 
+           gccorrectprefix=GCCORRECTPREFIX, 
+           problemloci=PROBLEMLOCI, 
+           data_type="wgs",
+           impute_exe=IMPUTE_EXE,
+           allelecounter_exe=ALLELECOUNTER,
+           nthreads=NTHREADS,
+           platform_gamma=PLATFORM_GAMMA,
+           phasing_gamma=PHASING_GAMMA,
+           segmentation_gamma=SEGMENTATION_GAMMA,
+           segmentation_kmin=SEGMENTATIIN_KMIN,
+           phasing_kmin=PHASING_KMIN,
+           clonality_dist_metric=CLONALITY_DIST_METRIC,
+           ascat_dist_metric=ASCAT_DIST_METRIC,
+           min_ploidy=MIN_PLOIDY,
+           max_ploidy=MAX_PLOIDY,
+           min_rho=MIN_RHO,
+           min_goodness=MIN_GOODNESS_OF_FIT,
+           uninformative_BAF_threshold=BALANCED_THRESHOLD,
+           min_normal_depth=MIN_NORMAL_DEPTH,
+           min_base_qual=MIN_BASE_QUAL,
+           min_map_qual=MIN_MAP_QUAL,
+           calc_seg_baf_option=CALC_SEG_BAF_OPTION,
+           skip_allele_counting=SKIP_ALLELECOUNTING,
+           skip_preprocessing=SKIP_PREPROCESSING,
+           skip_phasing=SKIP_PHASING)
 
-# Go over all segments, determine which segements are a mixture of two states and fit a second CN state
-callSubclones(sample.name=TUMOURNAME, 
-              baf.segmented.file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
-              logr.file=paste(TUMOURNAME,"_mutantLogR_gcCorrected.tab", sep=""), 
-              rho.psi.file=paste(TUMOURNAME, "_rho_and_psi.txt",sep=""), 
-              output.file=paste(TUMOURNAME,"_subclones.txt", sep=""), 
-              output.figures.prefix=paste(TUMOURNAME,"_subclones_chr", sep=""), 
-              output.gw.figures.prefix=paste(TUMOURNAME,"_BattenbergProfile", sep=""),
-              masking_output_file=paste(TUMOURNAME, "_segment_masking_details.txt", sep=""),
-              sv_breakpoints_file="NA",
-              chr_names=chrom_names, 
-              gamma=PLATFORM_GAMMA, 
-              segmentation.gamma=NA, 
-              siglevel=0.05, 
-              maxdist=0.01, 
-              noperms=1000,
-              calc_seg_baf_option=CALC_SEG_BAF_OPTION)
 
-# Make some post-hoc plots
-make_posthoc_plots(samplename=TUMOURNAME, 
-                   logr_file=paste(TUMOURNAME, "_mutantLogR_gcCorrected.tab", sep=""), 
-                   subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""), 
-                   rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""), 
-                   bafsegmented_file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
-                   logrsegmented_file=paste(TUMOURNAME, ".logRsegmented.txt", sep=""), 
-                   allelecounts_file=paste(TUMOURNAME, "_alleleCounts.tab", sep=""))
 
-# Save refit suggestions for a future rerun
-cnfit_to_refit_suggestions(samplename=TUMOURNAME, 
-                           subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""),
-                           rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""),
-                           gamma_param=PLATFORM_GAMMA)
 
+# chrom_names = get.chrom.names(IMPUTEINFOFILE, IS.MALE)
+# 
+# # Setup for parallel computing
+# clp = makeCluster(NTHREADS)
+# registerDoParallel(clp)
+# 
+# # Obtain allele counts for 1000 Genomes locations for both tumour and normal
+# foreach(i=1:length(chrom_names), .export=c("getAlleleCounts")) %dopar% {
+#   getAlleleCounts(bam.file=TUMOURBAM,
+#                   output.file=paste(TUMOURNAME,"_alleleFrequencies_chr", i, ".txt", sep=""),
+#                   g1000.loci=paste(G1000PREFIX_AC, i, ".txt", sep=""),
+#                   min.base.qual=MIN_BASE_QUAL,
+#                   min.map.qual=MIN_MAP_QUAL,
+#                   allelecounter.exe=ALLELECOUNTER)
+#   
+#   getAlleleCounts(bam.file=NORMALBAM,
+#                   output.file=paste(NORMALNAME,"_alleleFrequencies_chr", i, ".txt",  sep=""),
+#                   g1000.loci=paste(G1000PREFIX_AC, i, ".txt", sep=""),
+#                   min.base.qual=MIN_BASE_QUAL,
+#                   min.map.qual=MIN_MAP_QUAL,
+#                   allelecounter.exe=ALLELECOUNTER)
+# 
+# }
+# # Obtain BAF and LogR from the raw allele counts
+# getBAFsAndLogRs(tumourAlleleCountsFile.prefix=paste(TUMOURNAME,"_alleleFrequencies_chr", sep=""), 
+#                 normalAlleleCountsFile.prefix=paste(NORMALNAME,"_alleleFrequencies_chr", sep=""),
+#                 figuresFile.prefix=paste(TUMOURNAME, "_", sep=''),
+#                 BAFnormalFile=paste(TUMOURNAME,"_normalBAF.tab", sep=""), 
+#                 BAFmutantFile=paste(TUMOURNAME,"_mutantBAF.tab", sep=""), 
+#                 logRnormalFile=paste(TUMOURNAME,"_normalLogR.tab", sep=""), 
+#                 logRmutantFile=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
+#                 combinedAlleleCountsFile=paste(TUMOURNAME,"_alleleCounts.tab", sep=""),
+#                 chr_names=chrom_names, 
+#                 g1000file.prefix=G1000PREFIX, 
+#                 minCounts=MIN_NORMAL_DEPTH, 
+#                 samplename=TUMOURNAME)
+# # Perform GC correction
+# gc.correct.wgs(Tumour_LogR_file=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
+#                outfile=paste(TUMOURNAME,"_mutantLogR_gcCorrected.tab", sep=""),
+#                correlations_outfile=paste(TUMOURNAME, "_GCwindowCorrelations.txt", sep=""),
+#                gc_content_file_prefix=GCCORRECTPREFIX, 
+#                chrom_names=chrom_names)
+# 
+# 
+# # These steps are independent and can be run in parallel. This could be done through multi-threading or splitting these up into separate jobs on a cluster.
+# foreach(chrom=1:length(chrom_names), .export=c("generate.impute.input.wgs","run.impute","combine.impute.output","GetChromosomeBAFs","plot.haplotype.data")) %dopar% {
+#   print(chrom)
+#   # Transform the allele counts into something that impute understands
+#   generate.impute.input.wgs(chrom=chrom, 
+#                             tumour.allele.counts.file=paste(TUMOURNAME,"_alleleFrequencies_chr", chrom, ".txt", sep=""), 
+#                             normal.allele.counts.file=paste(NORMALNAME,"_alleleFrequencies_chr", chrom, ".txt", sep=""), 
+#                             output.file=paste(TUMOURNAME, "_impute_input_chr", chrom, ".txt", sep=""), 
+#                             imputeinfofile=IMPUTEINFOFILE,
+#                             is.male=IS.MALE,
+#                             problemLociFile=PROBLEMLOCI, 
+#                             useLociFile=NA)
+# 
+#   # Run impute on the files
+#   run.impute(inputfile=paste(TUMOURNAME, "_impute_input_chr", chrom, ".txt", sep=""), 
+#              outputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""), 
+#              is.male=IS.MALE, 
+#              imputeinfofile=IMPUTEINFOFILE, 
+#              impute.exe=IMPUTE_EXE, 
+#              region.size=5000000, 
+#              chrom=chrom)
+#   
+#   # As impute runs in windows across a chromosome we need to assemble the output
+#   combine.impute.output(inputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""), 
+#                         outputfile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""), 
+#                         is.male=IS.MALE, 
+#                         imputeinfofile=IMPUTEINFOFILE, 
+#                         region.size=5000000, 
+#                         chrom=chrom)
+# 
+#   # Transform the impute output into haplotyped BAFs
+#   GetChromosomeBAFs(chrom=chrom, 
+#                     SNP_file=paste(TUMOURNAME, "_alleleFrequencies_chr", chrom, ".txt", sep=""), 
+#                     haplotypeFile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""), 
+#                     samplename=TUMOURNAME, 
+#                     outfile=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+#                     chr_names=chrom_names, 
+#                     minCounts=MIN_NORMAL_DEPTH)
+# 
+#   # Plot what we have until this point
+#   plot.haplotype.data(haplotyped.baf.file=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+#                       imageFileName=paste(TUMOURNAME,"_chr",chrom,"_heterozygousData.png",sep=""), 
+#                       samplename=TUMOURNAME, 
+#                       chrom=chrom, 
+#                       chr_names=chrom_names)
+# 
+#   # Cleanup temp Impute output
+#   unlink(paste(TUMOURNAME, "_impute_output_chr", chrom, "*K.txt*", sep=""))
+# }
+# 
+# # Kill the threads as from here its all single core
+# stopCluster(clp)
+# 
+# # Combine all the BAF output into a single file
+# combine.baf.files(inputfile.prefix=paste(TUMOURNAME, "_chr", sep=""), 
+#                   inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", 
+#                   outputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
+#                   no.chrs=length(chrom_names))
+# 
+# # Segment the phased and haplotyped BAF data
+# segment.baf.phased(samplename=TUMOURNAME,
+#                      inputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""), 
+#                      outputfile=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""),
+#                      gamma=SEGMENTATION_GAMMA,
+#                      phasegamma=PHASING_GAMMA,
+#                      kmin=3,
+#                      phasekmin=1,
+#                      calc_seg_baf_option=CALC_SEG_BAF_OPTION)
+# 
+# # Fit a clonal copy number profile
+# fit.copy.number(samplename=TUMOURNAME,
+#                 outputfile.prefix=paste(TUMOURNAME, "_", sep=""),
+#                 inputfile.baf.segmented=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
+#                 inputfile.baf=paste(TUMOURNAME,"_mutantBAF.tab", sep=""), 
+#                 inputfile.logr=paste(TUMOURNAME,"_mutantLogR_gcCorrected.tab", sep=""), 
+#                 dist_choice=CLONALITY_DIST_METRIC, 
+#                 ascat_dist_choice=ASCAT_DIST_METRIC, 
+#                 min.ploidy=MIN_PLOIDY, 
+#                 max.ploidy=MAX_PLOIDY, 
+#                 min.rho=MIN_RHO, 
+#                 min.goodness=MIN_GOODNESS_OF_FIT, 
+#                 uninformative_BAF_threshold=BALANCED_THRESHOLD, 
+#                 gamma_param=PLATFORM_GAMMA, 
+#                 use_preset_rho_psi=F, 
+#                 preset_rho=NA, 
+#                 preset_psi=NA, 
+#                 read_depth=30)
+# 
+# # Go over all segments, determine which segements are a mixture of two states and fit a second CN state
+# callSubclones(sample.name=TUMOURNAME, 
+#               baf.segmented.file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
+#               logr.file=paste(TUMOURNAME,"_mutantLogR_gcCorrected.tab", sep=""), 
+#               rho.psi.file=paste(TUMOURNAME, "_rho_and_psi.txt",sep=""), 
+#               output.file=paste(TUMOURNAME,"_subclones.txt", sep=""), 
+#               output.figures.prefix=paste(TUMOURNAME,"_subclones_chr", sep=""), 
+#               output.gw.figures.prefix=paste(TUMOURNAME,"_BattenbergProfile", sep=""),
+#               masking_output_file=paste(TUMOURNAME, "_segment_masking_details.txt", sep=""),
+#               sv_breakpoints_file="NA",
+#               chr_names=chrom_names, 
+#               gamma=PLATFORM_GAMMA, 
+#               segmentation.gamma=NA, 
+#               siglevel=0.05, 
+#               maxdist=0.01, 
+#               noperms=1000,
+#               calc_seg_baf_option=CALC_SEG_BAF_OPTION)
+# 
+# # Make some post-hoc plots
+# make_posthoc_plots(samplename=TUMOURNAME, 
+#                    logr_file=paste(TUMOURNAME, "_mutantLogR_gcCorrected.tab", sep=""), 
+#                    subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""), 
+#                    rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""), 
+#                    bafsegmented_file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
+#                    logrsegmented_file=paste(TUMOURNAME, ".logRsegmented.txt", sep=""), 
+#                    allelecounts_file=paste(TUMOURNAME, "_alleleCounts.tab", sep=""))
+# 
+# # Save refit suggestions for a future rerun
+# cnfit_to_refit_suggestions(samplename=TUMOURNAME, 
+#                            subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""),
+#                            rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""),
+#                            gamma_param=PLATFORM_GAMMA)
+# 
diff --git a/man/battenberg.Rd b/man/battenberg.Rd
new file mode 100644
index 0000000..a8d75d9
--- /dev/null
+++ b/man/battenberg.Rd
@@ -0,0 +1,108 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/battenberg.R
+\name{battenberg}
+\alias{battenberg}
+\title{Run the Battenberg pipeline}
+\usage{
+battenberg(tumourname, normalname, tumour_data_file, normal_data_file,
+  imputeinfofile, g1000prefix, problemloci, gccorrectprefix = NA,
+  g1000allelesprefix = NA, ismale = NA, data_type = "wgs",
+  impute_exe = "impute2", allelecounter_exe = "alleleCounter",
+  nthreads = 8, platform_gamma = 1, phasing_gamma = 1,
+  segmentation_gamma = 10, segmentation_kmin = 3, phasing_kmin = 1,
+  clonality_dist_metric = 0, ascat_dist_metric = 1, min_ploidy = 1.6,
+  max_ploidy = 4.8, min_rho = 0.1, min_goodness = 0.63,
+  uninformative_BAF_threshold = 0.51, min_normal_depth = 10,
+  min_base_qual = 20, min_map_qual = 35, calc_seg_baf_option = 1,
+  skip_allele_counting = F, skip_preprocessing = F, skip_phasing = F,
+  snp6_reference_info_file = NA,
+  apt.probeset.genotype.exe = "apt-probeset-genotype",
+  apt.probeset.summarize.exe = "apt-probeset-summarize",
+  norm.geno.clust.exe = "normalize_affy_geno_cluster.pl",
+  birdseed_report_file = "birdseed.report.txt", heterozygousFilter = "none")
+}
+\arguments{
+\item{tumourname}{Tumour identifier, this is used as a prefix for the output files. If allele counts are supplied separately, they are expected to have this identifier as prefix.}
+
+\item{normalname}{Matched normal identifier, this is used as a prefix for the output files. If allele counts are supplied separately, they are expected to have this identifier as prefix.}
+
+\item{tumour_data_file}{A BAM or CEL file for the tumour}
+
+\item{normal_data_file}{A BAM or CEL file for the normal}
+
+\item{imputeinfofile}{Full path to a Battenberg impute info file with pointers to Impute2 reference data}
+
+\item{g1000prefix}{Full prefix path to 1000 Genomes SNP loci data, as part of the Battenberg reference data}
+
+\item{problemloci}{Full path to a problem loci file that contains SNP loci that should be filtered out}
+
+\item{gccorrectprefix}{Full prefix path to GC content files, as part of the Battenberg reference data, not required for SNP6 data (Default: NA)}
+
+\item{g1000allelesprefix}{Full prefix path to 1000 Genomes SNP alleles data, as part of the Battenberg reference data, not required for SNP6 data (Default: NA)}
+
+\item{ismale}{A boolean set to TRUE if the donor is male, set to FALSE if female, not required for SNP6 data (Default: NA)}
+
+\item{data_type}{String that contains either wgs or snp6 depending on the supplied input data (Default: wgs)}
+
+\item{impute_exe}{Pointer to the Impute2 executable (Default: impute2, i.e. expected in $PATH)}
+
+\item{allelecounter_exe}{Pointer to the alleleCounter executable (Default: alleleCounter, i.e. expected in $PATH)}
+
+\item{nthreads}{The number of concurrent processes to use while running the Battenberg pipeline (Default: 8)}
+
+\item{platform_gamma}{Platform scaling factor, suggestions are set to 1 for wgs and to 0.55 for snp6 (Default: 1)}
+
+\item{phasing_gamma}{Gamma parameter used when correcting phasing mistakes (Default: 1)}
+
+\item{segmentation_gamma}{The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)}
+
+\item{segmentation_kmin}{Kmin represents the minimum number of probes/SNPs that a segment should consist of (Default: 3)}
+
+\item{phasing_kmin}{Kmin used when correcting for phasing mistakes (Default: 3)}
+
+\item{clonality_dist_metric}{Distance metric to use when choosing purity/ploidy combinations (Default: 0)}
+
+\item{ascat_dist_metric}{Distance metric to use when choosing purity/ploidy combinations (Default: 1)}
+
+\item{min_ploidy}{Minimum ploidy to be considered (Default: 1.6)}
+
+\item{max_ploidy}{Maximum ploidy to be considered (Default: 4.8)}
+
+\item{min_rho}{Minimum purity to be considered (Default: 0.1)}
+
+\item{min_goodness}{Minimum goodness of fit required for a purity/ploidy combination to be accepted as a solution (Default: 0.63)}
+
+\item{uninformative_BAF_threshold}{The threshold beyond which BAF becomes uninformative (Default: 0.51)}
+
+\item{min_normal_depth}{Minimum depth required in the matched normal for a SNP to be considered as part of the wgs analysis (Default: 10)}
+
+\item{min_base_qual}{Minimum base quality required for a read to be counted when allele counting (Default: 20)}
+
+\item{min_map_qual}{Minimum mapping quality required for a read to be counted when allele counting (Default: 35)}
+
+\item{calc_seg_baf_option}{Sets way to calculate BAF per segment: 1=mean, 2=median (Default: 1)}
+
+\item{skip_allele_counting}{Provide TRUE when allele counting can be skipped (i.e. its already done) (Default: FALSE)}
+
+\item{skip_preprocessing}{Provide TRUE when preprocessing is already complete (Default: FALSE)}
+
+\item{skip_phasing}{Provide TRUE when phasing is already complete (Default: FALSE)}
+
+\item{snp6_reference_info_file}{Reference files for the SNP6 pipeline only (Default: NA)}
+
+\item{apt.probeset.genotype.exe}{Helper tool for extracting data from CEL files, SNP6 pipeline only (Default: apt-probeset-genotype)}
+
+\item{apt.probeset.summarize.exe}{Helper tool for extracting data from CEL files, SNP6 pipeline only (Default: apt-probeset-summarize)}
+
+\item{norm.geno.clust.exe}{Helper tool for extracting data from CEL files, SNP6 pipeline only (Default: normalize_affy_geno_cluster.pl)}
+
+\item{birdseed_report_file}{Sex inference output file, SNP6 pipeline only (Default: birdseed.report.txt)}
+
+\item{heterozygousFilter}{Legacy option to set a heterozygous SNP filter, SNP6 pipeline only (Default: "none")}
+}
+\description{
+Run the Battenberg pipeline
+}
+\author{
+sd11
+}
diff --git a/man/getBAFsAndLogRs.Rd b/man/getBAFsAndLogRs.Rd
index 2ffea31..1e36064 100644
--- a/man/getBAFsAndLogRs.Rd
+++ b/man/getBAFsAndLogRs.Rd
@@ -34,7 +34,7 @@ getBAFsAndLogRs(tumourAlleleCountsFile.prefix, normalAlleleCountsFile.prefix,
 
 \item{samplename}{String, name of the sample (optional, default=sample1).}
 
-\item{seed}{A seed to be set}
+\item{seed}{A seed to be set for when randomising the alleles.}
 }
 \description{
 Obtain BAF and LogR from the allele counts
diff --git a/man/prepare_snp6.Rd b/man/prepare_snp6.Rd
new file mode 100644
index 0000000..dfa23bc
--- /dev/null
+++ b/man/prepare_snp6.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/prepare_SNP6.R
+\name{prepare_snp6}
+\alias{prepare_snp6}
+\title{Prepare SNP6 data for haplotype construction}
+\usage{
+prepare_snp6(tumour_cel_file, normal_cel_file, tumourname, chrom_names,
+  snp6_reference_info_file,
+  apt.probeset.genotype.exe = "apt-probeset-genotype",
+  apt.probeset.summarize.exe = "apt-probeset-summarize",
+  norm.geno.clust.exe = "normalize_affy_geno_cluster.pl",
+  birdseed_report_file = "birdseed.report.txt")
+}
+\arguments{
+\item{tumour_cel_file}{Full path to a CEL file containing the tumour raw data}
+
+\item{normal_cel_file}{Full path to a CEL file containing the normal raw data}
+
+\item{tumourname}{Identifier to be used for tumour output files}
+
+\item{chrom_names}{A vector containing the names of chromosomes to be included}
+
+\item{snp6_reference_info_file}{Full path to the SNP6 reference info file}
+
+\item{apt.probeset.genotype.exe}{Full path to the apt.probeset.genotype executable (Default: expected in $PATH)}
+
+\item{apt.probeset.summarize.exe}{Full path to the apt.probeset.summarize executable (Default: expected in $PATH)}
+
+\item{norm.geno.clust.exe}{Full path to the norm.geno.clust.exe executable (Default: expected in $PATH)}
+
+\item{birdseed_report_file}{Name of the birdseed output file. This is a temp output file of one of the internally called functions of which the name cannot be defined. Don't change this parameter. (Default: birdseed.report.txt)}
+}
+\description{
+This function performs part of the Battenberg SNP6 pipeline: Extract BAF and logR from the CEL files 
+and performing GC content correction.
+}
+\author{
+sd11
+}
diff --git a/man/prepare_wgs.Rd b/man/prepare_wgs.Rd
new file mode 100644
index 0000000..638d030
--- /dev/null
+++ b/man/prepare_wgs.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/prepare_wgs.R
+\name{prepare_wgs}
+\alias{prepare_wgs}
+\title{Prepare WGS data for haplotype construction}
+\usage{
+prepare_wgs(chrom_names, tumourbam, normalbam, tumourname, normalname,
+  g1000allelesprefix, g1000prefix, gccorrectprefix, min_base_qual, min_map_qual,
+  allelecounter_exe, min_normal_depth, nthreads, skip_allele_counting)
+}
+\arguments{
+\item{chrom_names}{A vector containing the names of chromosomes to be included}
+
+\item{tumourbam}{Full path to the tumour BAM file}
+
+\item{normalbam}{Full path to the normal BAM file}
+
+\item{tumourname}{Identifier to be used for tumour output files}
+
+\item{normalname}{Identifier to be used for normal output files}
+
+\item{g1000allelesprefix}{Prefix path to the 1000 Genomes alleles reference files}
+
+\item{g1000prefix}{Prefix path to the 1000 Genomes SNP reference files}
+
+\item{gccorrectprefix}{Prefix path to GC content reference data}
+
+\item{min_base_qual}{Minimum base quality required for a read to be counted}
+
+\item{min_map_qual}{Minimum mapping quality required for a read to be counted}
+
+\item{allelecounter_exe}{Path to the allele counter executable (can be found in $PATH)}
+
+\item{min_normal_depth}{Minimum depth required in the normal for a SNP to be included}
+
+\item{nthreads}{The number of paralel processes to run}
+
+\item{skip_allele_counting}{Flag, set to TRUE if allele counting is already complete (files are expected in the working directory on disk)}
+}
+\description{
+This function performs part of the Battenberg WGS pipeline: Counting alleles, constructing BAF and logR 
+and performing GC content correction.
+}
+\author{
+sd11
+}
diff --git a/man/run_haplotyping.Rd b/man/run_haplotyping.Rd
new file mode 100644
index 0000000..3e727eb
--- /dev/null
+++ b/man/run_haplotyping.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/impute.R
+\name{run_haplotyping}
+\alias{run_haplotyping}
+\title{Construct haplotypes for a chromosome}
+\usage{
+run_haplotyping(chrom, tumourname, normalname, ismale, imputeinfofile,
+  problemloci, impute_exe, min_normal_depth, chrom_names,
+  snp6_reference_info_file = NA, heterozygousFilter = NA)
+}
+\arguments{
+\item{chrom}{The chromosome for which to reconstruct haplotypes}
+
+\item{tumourname}{Identifier of the tumour, used to match data files on disk}
+
+\item{normalname}{Identifier of the normal, used to match data files on disk}
+
+\item{ismale}{Boolean, set to TRUE if the sample is male}
+
+\item{imputeinfofile}{Full path to the imputeinfo reference file}
+
+\item{problemloci}{Full path to the problematic loci reference file}
+
+\item{impute_exe}{Path to the impute executable (can be found if its in $PATH)}
+
+\item{min_normal_depth}{Minimal depth in the matched normal required for a SNP to be used}
+
+\item{chrom_names}{A vector containing the names of chromosomes to be included}
+
+\item{snp6_reference_info_file}{SNP6 only parameter Default: NA}
+
+\item{heterozygousFilter}{SNP6 only parameter Default: NA}
+}
+\description{
+This function takes preprocessed data and performs haplotype reconstruction.
+}
+\author{
+sd11
+}