From 8c36f4718cf4e3775d716b10e749cf85074a7f8c Mon Sep 17 00:00:00 2001
From: Thierry Gosselin <thierrygosselin@icloud.com>
Date: Tue, 29 Nov 2016 17:40:44 -0500
Subject: [PATCH] **v.0.4.6**

* I'm pleased to announce that `stackr` parallel mode now works with **Windows**!
Nothing to install, just need to choose the number of CPU,
the rest is done automatically.
* `haplo2colony` is deprecated. Use the new function called `write_colony`!
* `write_colony`: works similarly to the deprecated function `haplo2colony`,
      * with the major advantage that it's no longer restricted to STACKS
      haplotypes file.
      * The function is using the `tidy_genomic_data` module
      to import files. So you can choose one of the 10 input file formats supported
      by `stackr`!
      * other benefits also include the possibility to efficiently test MAF,
      snp.ld, haplotypes/snp approach, whitelist of markes,
      blacklist of individuals, blacklist of genotypes, etc. with the buit-it
      arguments.
      * the function only **keeps markers in common** between populations/groups
      and **is removing monomorphic markers**.
      * **Note:** there are several *defaults* in the function and
      it's a complicated file format, so make sure to read the function
      documentation, please, and `COLONY` manual.
---
 DESCRIPTION         |   4 +-
 NAMESPACE           |  11 +-
 R/.DS_Store         | Bin 6148 -> 6148 bytes
 R/write_colony.R    | 649 ++++++++++++++++++++++++++++++++++++++++++++
 README.Rmd          |  26 +-
 README.md           |  17 +-
 inst/CITATION       |   2 +-
 man/write_colony.Rd | 320 ++++++++++++++++++++++
 8 files changed, 1018 insertions(+), 11 deletions(-)
 create mode 100644 R/write_colony.R
 create mode 100644 man/write_colony.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index c3e1244..8af42b8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: stackr
 Type: Package
 Title: GBS/RADseq Data Exploration, Manipulation and Visualization using R
-Version: 0.4.5
-Date: 2016-11-18
+Version: 0.4.6
+Date: 2016-11-29
 Encoding: UTF-8
 Authors@R: c(
   person("Thierry", "Gosselin", email = "thierrygosselin@icloud.com", role = c("aut", "cre")),
diff --git a/NAMESPACE b/NAMESPACE
index 801a5bc..303d6c7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -27,7 +27,6 @@ export(filter_snp_number)
 export(fis_summary)
 export(genomic_converter)
 export(genotypes_summary)
-export(haplo2colony)
 export(ibdg_fh)
 export(individuals2strata)
 export(keep_common_markers)
@@ -79,6 +78,7 @@ export(whitelist_loci_snp)
 export(whitelist_snp_vcf)
 export(write_arlequin)
 export(write_betadiv)
+export(write_colony)
 export(write_genepop)
 export(write_genind)
 export(write_genlight)
@@ -90,10 +90,8 @@ export(write_vcf)
 import(adegenet)
 import(dplyr)
 import(ggplot2)
-import(lazyeval)
 import(parallel)
 import(readr)
-import(reshape2)
 import(strataG)
 import(stringi)
 importFrom(adegenet,chromosome)
@@ -116,6 +114,7 @@ importFrom(dplyr,bind_cols)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,coalesce)
 importFrom(dplyr,contains)
+importFrom(dplyr,count)
 importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
 importFrom(dplyr,filter_)
@@ -144,8 +143,12 @@ importFrom(dplyr,ungroup)
 importFrom(lazyeval,interp)
 importFrom(magrittr,"%>%")
 importFrom(methods,new)
+importFrom(parallel,clusterExport)
 importFrom(parallel,detectCores)
+importFrom(parallel,makeCluster)
 importFrom(parallel,mclapply)
+importFrom(parallel,parLapply)
+importFrom(parallel,stopCluster)
 importFrom(purrr,detect_index)
 importFrom(purrr,discard)
 importFrom(purrr,flatten)
@@ -158,6 +161,7 @@ importFrom(readr,read_delim)
 importFrom(readr,read_lines)
 importFrom(readr,read_tsv)
 importFrom(readr,write_delim)
+importFrom(readr,write_file)
 importFrom(readr,write_tsv)
 importFrom(reshape2,dcast)
 importFrom(reshape2,melt)
@@ -202,6 +206,7 @@ importFrom(tidyr,unite_)
 importFrom(utils,combn)
 importFrom(utils,count.fields)
 importFrom(utils,packageVersion)
+importFrom(utils,sessionInfo)
 importFrom(utils,write.table)
 importFrom(vcfR,extract.gt)
 importFrom(vcfR,read.vcfR)
diff --git a/R/.DS_Store b/R/.DS_Store
index 6481cd87c17f2e79436091e9596a987f630f7fa4..1ce8379f50bf6c4e86a03b83a2a50bdd323411c0 100644
GIT binary patch
delta 35
qcmZoMXffCj&9-?u8w(>ZA454q5kn?J2}3GF{NxNa{mrx4GX(&yZV7S#

delta 35
qcmZoMXffCj&Bn{ekin41P{5GGkk4Q=Ifq?;b2J+Z<L24ynF0W!#R&HR

diff --git a/R/write_colony.R b/R/write_colony.R
new file mode 100644
index 0000000..cc5acf1
--- /dev/null
+++ b/R/write_colony.R
@@ -0,0 +1,649 @@
+# From STACKS haplotypes file write COLONY input files to the working directory
+
+#' @name write_colony
+#' @title Write a \code{COLONY} input file from 9 genomic formats 
+#' @description Write a \code{COLONY} input file from several genomic formats, 
+#' look into \pkg{stackr} \code{\link{tidy_genomic_data}} for supported input
+#' files.
+
+#' @inheritParams tidy_genomic_data
+
+#' @param sample.markers (number) \code{COLONY} can take a long time to run,
+#' use a random subsample of your markers to speed test \code{COLONY}
+#' e.g. \code{sample.markers = 500} to use only 500 randomly chosen markers.
+#' Default: \code{sample.markers = NULL}, will use all markers.
+
+#' @param allele.freq (optional, string) Allele frequency can be computed from
+#' a select group.
+#' e.g. \code{allele.freq = "QUE"} or \code{allele.freq = c("QUE", "ONT")}.
+#' Using \code{allele.freq = "overall"} will use all the samples to compute the
+#' allele frequency. 
+#' Default: \code{allele.freq = NULL}, will not compute allele frequency.
+
+#' @param inbreeding (boolean) 0/1 no inbreeding/inbreeding.
+#' Default: \code{inbreeding = 0}
+#' @param mating.sys.males (boolean) Mating system in males.
+#' 0/1 polygyny/monogyny.
+#' Default: \code{mating.sys.males = 0}.
+#' @param mating.sys.females (boolean) Mating system in females.
+#' 0/1 polygyny/monogyny.
+#' Default: \code{mating.sys.females = 0}.
+#' @param clone (boolean) Should clones and duplicated individuals be inferred.
+#' 0/1, yes/no. Default: \code{clone = 0}.
+#' @param run.length (integer) Length of run. 1 (short), 2 (medium), 3 (long),
+#' 4 (very long). Start with short or medium run and consider longer run if your
+#' estimates probability are not stable or really good.
+#' Default: \code{run.length = 2}.
+#' @param analysis (integer) Analysis method. 
+#' 0 (Pairwise-Likelihood Score), 1 (Full Likelihood),
+#' 2 (combined Pairwise-Likelihood Score and Full Likelihood).
+#' Default: \code{analysis = 1}.
+#' @param allelic.dropout Locus allelic dropout rate.
+#' Default : \code{allelic.dropout = 0}.
+#' @param error.rate Locus error rate.
+#' Default:\code{error.rate = 0.02}.
+#' @param print.all.colony.opt (logical) Should all \code{COLONY} options be printed in the file.
+#' 
+#' 
+#' \strong{This require manual curation, for the file to work directly with \code{COLONY}}. 
+#' Default = \code{print.all.colony.opt = FALSE}.
+
+
+#' @inheritParams stackr_imputations_module
+
+#' @param filename Name of the acronym for filenaming in the working directory.
+
+
+#' @note 
+#' 
+#' \code{common.markers} argument should be left to the default:  \code{common.markers = TRUE}.
+#' 
+#' 
+#' \code{monomorphic.out} argument should be left to the default:  \code{monomorphic.out = TRUE}.
+#' 
+#' 
+#' \strong{using a VCF with several SNP per haplotypes/read for input?}
+#' 
+#' Please consider keeping just 1 SNP per read, \code{COLONY} doesn't like this
+#' kind of linkage... use the argument \code{snp.ld}.
+
+#' @details \strong{It is highly recommended to read (twice!) the user guide distributed with
+#' \code{COLONY} to find out the details for input and output of the software.}
+#'  
+#' Not all options are provided here. 
+#' 
+#' But to ease the process, all the required options to properly run \code{COLONY}
+#' will be printed in the file written in your working directory. 
+#' Change the values accordingly and wisely.
+#' 
+#' 
+#' \strong{Imputations}
+#' The imputations using Random Forest requires more time to compute 
+#' and can take several minutes and hours depending on the size of 
+#' the dataset and polymorphism of the species used. e.g. with a 
+#' low polymorphic taxa, and a data set containing 30\% missing data, 
+#' 5 000 haplotypes loci and 500 individuals will require 15 min.
+#' For details about imputations, look into \pkg{stackr} \code{\link{stackr_imputations_module}} 
+
+#' @return A \code{COLONY} file in your working directory (2 if you selected imputations arguments...)
+
+#' @export
+#' @rdname write_colony
+
+#' @importFrom dplyr select distinct group_by ungroup rename arrange tally filter mutate summarise semi_join mutate_all sample_n count
+#' @importFrom stringi stri_join stri_replace_all_fixed stri_extract_all_fixed stri_replace_all_regex stri_sub stri_pad_left stri_count_fixed stri_replace_na stri_trim_right
+#' @importFrom tibble as_data_frame has_name
+#' @importFrom tidyr spread gather unite
+#' @importFrom utils write.table
+#' @importFrom readr write_file 
+#' @importFrom parallel detectCores
+#' @importFrom purrr flatten_chr
+
+#' @references Catchen JM, Amores A, Hohenlohe PA et al. (2011) 
+#' Stacks: Building and Genotyping Loci De Novo From Short-Read Sequences. 
+#' G3, 1, 171-182.
+#' @references Catchen JM, Hohenlohe PA, Bassham S, Amores A, Cresko WA (2013) 
+#' Stacks: an analysis tool set for population genomics. 
+#' Molecular Ecology, 22, 3124-3140.
+#' @references Ishwaran H. and Kogalur U.B. (2015). Random Forests for Survival,
+#'  Regression and Classification (RF-SRC), R package version 1.6.1.
+#' @references Ishwaran H. and Kogalur U.B. (2007). Random survival forests
+#' for R. R News 7(2), 25-31.
+#' @references Ishwaran H., Kogalur U.B., Blackstone E.H. and Lauer M.S. (2008).
+#' Random survival forests. Ann. Appl. Statist. 2(3), 841--860.
+#' @references Jones OR, Wang J (2010) COLONY: a program for parentage and 
+#' sibship inference from multilocus genotype data. 
+#' Molecular Ecology Resources, 10, 551–555.
+#' @references Wang J (2012) Computationally Efficient Sibship and 
+#' Parentage Assignment from Multilocus Marker Data. Genetics, 191, 183–194.
+
+
+#' @seealso \code{COLONY} is available on Jinliang Wang web site 
+#' \url{http://www.zsl.org/science/software/colony}
+#' 
+#' \code{randomForestSRC} is available on 
+#' CRAN \url{http://cran.r-project.org/web/packages/randomForestSRC/} 
+#' and github \url{https://github.com/ehrlinger/randomForestSRC}
+
+#' @examples
+#' \dontrun{
+#' tidy.vcf <- tidy_genomic_data(
+#' data = "batch_1.vcf",
+#' whitelist.markers = "whitelist.vcf.txt",
+#' snp.ld = NULL,
+#' common.markers = TRUE,
+#' blacklist.id = "blacklist.id.treefrog.tsv",
+#' strata = "strata.treefrog.tsv",
+#' pop.levels = c("PAN", "COS")
+#' )
+#' }
+
+
+#' @author Thierry Gosselin \email{thierrygosselin@@icloud.com}
+
+write_colony <- function(
+  data,
+  strata = NULL,
+  pop.levels = NULL,
+  pop.labels = NULL,
+  blacklist.id = NULL,
+  blacklist.genotype = NULL,
+  whitelist.markers = NULL,
+  monomorphic.out = TRUE,
+  snp.ld = NULL,
+  common.markers = TRUE,
+  maf.thresholds = NULL,
+  maf.pop.num.threshold = 1,
+  maf.approach = "SNP",
+  maf.operator = "OR",
+  max.marker = NULL,
+  sample.markers = NULL,
+  pop.select = NULL,
+  allele.freq = NULL,
+  inbreeding = 0,
+  mating.sys.males = 0,
+  mating.sys.females = 0,
+  clone = 0,
+  run.length =2,
+  analysis = 1,
+  allelic.dropout = 0,
+  error.rate = 0.02,
+  print.all.colony.opt = FALSE,
+  imputation.method = NULL,
+  impute = "genotype",
+  imputations.group = "populations",
+  num.tree = 100,
+  iteration.rf = 10,
+  split.number = 100,
+  verbose = FALSE,
+  parallel.core = parallel::detectCores() - 1,
+  filename = NULL
+) {
+  cat("#######################################################################\n")
+  cat("######################## stackr::write_colony ########################\n")
+  cat("#######################################################################\n")
+  timing <- proc.time()
+  
+  # Checking for missing and/or default arguments-------------------------------
+  if (missing(data)) stop("Input file missing")
+  if (!is.null(pop.levels) & is.null(pop.labels)) pop.labels <- pop.levels
+  if (!is.null(pop.labels) & is.null(pop.levels)) stop("pop.levels is required if you use pop.labels")
+  
+  # Filename -------------------------------------------------------------------
+  # Get date and time to have unique filenaming
+  if (is.null(filename)) {
+    filename <- stringi::stri_join(
+      "stackr_colony_",
+      stringi::stri_replace_all_fixed(
+        Sys.time(),
+        pattern = " EDT",
+        replacement = "",
+        vectorize_all = FALSE
+      ) %>% 
+        stringi::stri_replace_all_fixed(
+          str = .,
+          pattern = c("-", " ", ":"),
+          replacement = c("", "@", ""),
+          vectorize_all = FALSE
+        ) %>% 
+        stringi::stri_sub(str = ., from = 1, to = 13)
+    )
+    
+    if (!is.null(imputation.method)) {
+      filename.imp <- stringi::stri_replace_all_fixed(
+        str = filename,
+        pattern = "stackr_colony_",
+        replacement = "stackr_colony_imputed_",
+        vectorize_all = FALSE
+      )
+    }
+  } else {
+    if (!is.null(imputation.method)) {
+      filename.imp <- stringi::stri_join(filename, "_imputed")
+    }
+  }
+  
+  # Import----------------------------------------------------------------------
+  data.type <- detect_genomic_format(data = data)
+  message(paste("File type: ", data.type))
+  
+  # Strata argument required for VCF and haplotypes files
+  if (data.type == "vcf.file" & is.null(strata)) stop("strata argument is required")
+  
+  message("Importing data...")
+  if (data.type == "tbl_df") {
+    input <- stackr::tidy_wide(data = data, import.metadata = TRUE)
+    # For long tidy format, switch LOCUS to MARKERS column name, if found MARKERS not found
+    if (tibble::has_name(input, "LOCUS") && !tibble::has_name(input, "MARKERS")) {
+      input <- dplyr::rename(.data = input, MARKERS = LOCUS)
+    }
+  } else {
+    input <- suppressMessages(
+      stackr::tidy_genomic_data(
+        data = data,
+        vcf.metadata = FALSE,
+        blacklist.id = blacklist.id,
+        blacklist.genotype = blacklist.genotype,
+        whitelist.markers = whitelist.markers,
+        monomorphic.out = TRUE,
+        max.marker = max.marker,
+        snp.ld = snp.ld,
+        common.markers = TRUE,
+        maf.thresholds = maf.thresholds,
+        maf.pop.num.threshold = maf.pop.num.threshold,
+        maf.approach = maf.approach,
+        maf.operator = maf.operator,
+        strata = strata,
+        pop.levels = pop.levels,
+        pop.labels = pop.labels,
+        pop.select = pop.select,
+        filename = NULL, 
+        verbose = FALSE
+      )
+    )
+  }
+  
+  input$GT <- stringi::stri_replace_all_fixed(
+    str = input$GT,
+    pattern = c("/", ":", "_", "-", "."),
+    replacement = c("", "", "", "", ""),
+    vectorize_all = FALSE
+  )
+  pop.levels <- levels(input$POP_ID)
+  pop.labels <- pop.levels
+  
+  # Subsampling markers --------------------------------------------------------
+  if (!is.null(sample.markers)) {
+    message(stringi::stri_join("Randomly subsampling ", sample.markers, " markers..."))
+    # sample.markers <- 500 # test
+    markers.list <- dplyr::distinct(input, MARKERS) %>% 
+      dplyr::sample_n(tbl = ., size = sample.markers, replace = FALSE)
+    
+    input <- suppressWarnings(dplyr::semi_join(input, markers.list, by = "MARKERS"))
+    markers.list <- NULL
+  }
+  
+  # Generate colony without imputations ----------------------------------------
+  message("Generating COLONY file without imputations...\n")
+  stackr_colony(data = input, filename = filename)
+  
+  # Imputations-----------------------------------------------------------------
+  if (!is.null(imputation.method)) {
+    message("\nMap-independent imputations...\n\n")
+    input.imp <- stackr::stackr_imputations_module(
+      data = dplyr::select(.data = input, MARKERS, POP_ID, INDIVIDUALS, GT),
+      imputation.method = imputation.method,
+      impute = impute,
+      imputations.group = imputations.group,
+      num.tree = num.tree,
+      iteration.rf = iteration.rf,
+      split.number = split.number,
+      verbose = verbose,
+      parallel.core = parallel.core,
+      filename = NULL
+    )
+    message("\n\nGenerating COLONY file WITH imputations...\n")
+    stackr_colony(data = input.imp, filename = filename.imp)
+    
+  } # End imputations  
+  
+  message("COLONY file(s) written in the working directory")
+  res <- "COLONY file(s) written in the working directory"
+  # Imputations: colony with imputed haplotypes using Random Forest ------------
+  timing <- proc.time() - timing
+  message(stringi::stri_join("Computation time: ", round(timing[[3]]), " sec"))
+  cat("############################## completed ##############################\n")
+  return(res)
+}
+
+# stackr_colony internal function ----------------------------------------------
+# @keywords internal
+# @param data the tidy data to transform into a colony file
+# @param filename name of the file written in the directory
+# @param ... other argument passed to the remaining part of the code
+
+stackr_colony <- function(
+  data,
+  filename,
+  allele.freq = NULL,
+  inbreeding = 0,
+  mating.sys.males = 0,
+  mating.sys.females = 0,
+  clone = 0,
+  run.length =2,
+  analysis = 1,
+  allelic.dropout = 0,
+  error.rate = 0.02,
+  print.all.colony.opt = FALSE,
+  ...) {
+  # data <- input #test
+  
+  # Separate the alleles -------------------------------------------------------
+  input <- dplyr::select(.data = data, MARKERS, POP_ID, INDIVIDUALS, GT) %>%
+    dplyr::mutate(
+      A1 = stringi::stri_sub(GT, 1, 3),
+      A2 = stringi::stri_sub(GT, 4,6)
+    ) %>% 
+    dplyr::select(-GT) %>%
+    tidyr::gather(data = ., key = ALLELE_GROUP, value = ALLELES, -c(MARKERS, INDIVIDUALS, POP_ID)) %>% 
+    dplyr::mutate(ALLELES = as.numeric(ALLELES)) %>%
+    dplyr::arrange(MARKERS, POP_ID, INDIVIDUALS)
+  
+  # Allele frequency per locus--------------------------------------------------
+  if (!is.null(allele.freq)) {
+    message("Computing allele frequency")
+    if (allele.freq != "overall") {
+      input.alleles <- dplyr::filter(.data = input, POP_ID %in% allele.freq) %>% 
+        dplyr::filter(ALLELES != 0)
+    } else {
+      input.alleles <- dplyr::filter(.data = input, ALLELES != 0)
+    }
+    
+    allele.per.locus <- dplyr::distinct(input.alleles, MARKERS, ALLELES) %>%
+      dplyr::count(x = ., MARKERS) %>% 
+      dplyr::arrange(MARKERS) %>%
+      dplyr::select(n) %>% 
+      purrr::flatten_chr(.)
+    
+    # If someone finds a beter and faster wayto do all this, I'm all in!
+    freq <- input.alleles %>%
+      dplyr::group_by(MARKERS, ALLELES) %>%
+      dplyr::tally(.) %>%
+      dplyr::ungroup() %>%
+      dplyr::group_by(MARKERS) %>%
+      dplyr::mutate(FREQ = round(n/sum(n), 2)) %>% 
+      dplyr::select(MARKERS, ALLELES, FREQ) %>% 
+      dplyr::arrange(MARKERS) %>% 
+      dplyr::mutate(GROUP = seq(1, n(), by = 1)) %>% 
+      dplyr::mutate_all(.tbl = ., .funs = as.character) %>% 
+      tidyr::gather(data = ., key = ALLELES_FREQ, value = VALUE, -c(MARKERS, GROUP)) %>%
+      dplyr::mutate(ALLELES_FREQ = factor(ALLELES_FREQ, levels = c("ALLELES", "FREQ"), ordered = TRUE)) %>%
+      dplyr::group_by(MARKERS, ALLELES_FREQ) %>% 
+      tidyr::spread(data = ., key = GROUP, value = VALUE) %>% 
+      dplyr::ungroup(.) %>%
+      tidyr::unite(data = ., col = INFO, -c(MARKERS, ALLELES_FREQ), sep = " ") %>% 
+      dplyr::mutate(
+        INFO = stringi::stri_replace_all_regex(str = INFO, pattern = "NA", replacement = "", vectorize_all = FALSE),
+        INFO = stringi::stri_trim_right(str = INFO, pattern = "\\P{Wspace}")
+      ) %>%
+      dplyr::select(-MARKERS, -ALLELES_FREQ)
+    input.alleles <- NULL
+  }
+  
+  markers.name <- tibble::as_data_frame(t(dplyr::distinct(input, MARKERS) %>% dplyr::arrange(MARKERS)))
+  marker.num <- ncol(markers.name)
+  
+  input <- tidyr::unite(data = input, col = MARKERS.ALLELE_GROUP, MARKERS, ALLELE_GROUP, sep = ".") %>%
+    dplyr::group_by(POP_ID, INDIVIDUALS) %>% 
+    tidyr::spread(data = ., key = MARKERS.ALLELE_GROUP, value = ALLELES) %>% 
+    dplyr::arrange(POP_ID, INDIVIDUALS) %>%
+    dplyr::ungroup(.) %>% 
+    dplyr::select(-POP_ID) %>% 
+    dplyr::mutate_all(.tbl = ., .funs = as.character)
+  
+  # Line 1 = Dataset name-------------------------------------------------------
+  dataset.opt <- "`My first COLONY run`                ! Dataset name\n"
+  readr::write_file(x = dataset.opt, path = filename, append = FALSE)
+  
+  # Line 2 = Output filename----------------------------------------------------
+  colony.output.filename <- stringi::stri_replace_all_fixed(
+    filename,
+    pattern = ".dat",
+    replacement = "")
+  colony.output.filename <- paste(colony.output.filename, "         ! Output file name\n")
+  readr::write_file(x = colony.output.filename, path = filename, append = TRUE)
+
+  # Line 3 = Offspring number---------------------------------------------------
+  off.num.opt <- paste(nrow(input), "                                  ! Number of offspring in the sample\n", sep = "")
+  readr::write_file(x = off.num.opt, path = filename, append = TRUE)
+  
+  # Line 4 = Number of loci-----------------------------------------------------
+  marker.num.opt <- paste(marker.num, "                                 ! Number of loci\n", sep = "")
+  readr::write_file(x = marker.num.opt, path = filename, append = TRUE)
+  
+  # Line 5 = Seed random number generator  -------------------------------------
+  seed.opt <- "1234                                 ! Seed for random number generator\n"
+  readr::write_file(x = seed.opt, path = filename, append = TRUE)
+  
+  # Line 6 = Updating allele frequency------------------------------------------
+  update.allele.freq.opt <- "0                                    ! 0/1=Not updating/updating allele frequency\n"
+  readr::write_file(x = update.allele.freq.opt, path = filename, append = TRUE)
+  
+  # Line 7 = Dioecious/Monoecious species---------------------------------------
+  dioecious.opt <- "2                                    ! 2/1=Dioecious/Monoecious species\n"
+  readr::write_file(x = dioecious.opt, path = filename, append = TRUE)
+  
+  # Line 8 = inbreeding---------------------------------------------------------
+  inbreeding.opt <- paste(inbreeding, "                                    ! 0/1=No inbreeding/inbreeding\n", sep = "") 
+  readr::write_file(x = inbreeding.opt, path = filename, append = TRUE)
+  
+  # Line 9 = Ploidy------------------------------------------------------------
+  ploidy.opt <- "0                                    ! 0/1=Diploid species/HaploDiploid species\n"
+  readr::write_file(x = ploidy.opt, path = filename, append = TRUE)
+  
+  # Line 10 = Mating system (polygamous: 0, monogamous: 1).---------------------
+  mating.opt <- paste(mating.sys.males,"  ", mating.sys.females, "                                 ! 0/1=Polygamy/Monogamy for males & females\n", sep = "") 
+  readr::write_file(x = mating.opt, path = filename, append = TRUE)
+  
+  # Line 11 = Clone inference---------------------------------------------------
+  clone.opt <- paste(clone, "                                    ! 0/1=Clone inference =No/Yes\n", sep = "") 
+  readr::write_file(x = clone.opt, path = filename, append = TRUE)
+  
+  # Line 12 = Sibship size scaling----------------------------------------------
+  sib.size.scal.opt <- "1                                    ! 0/1=Full sibship size scaling =No/Yes\n"
+  readr::write_file(x = sib.size.scal.opt, path = filename, append = TRUE)
+  
+  # Line 13 = Sibship prior indicator (Integer), average paternal sibship size (Real, optional), average maternal sibship size (Real, optional)
+  sib.prior.opt <- "0 0 0                                ! 0, 1, 2, 3 = No, weak, medium, strong sibship size prior; mean paternal & maternal sibship size\n"
+  readr::write_file(x = sib.prior.opt, path = filename, append = TRUE)
+  
+  # Line 14 = Population allele frequency indicator-----------------------------
+  if (is.null(allele.freq)) {
+    allele.freq.ind.opt <- "0                                    ! 0/1=Unknown/Known population allele frequency\n"
+  } else {
+    allele.freq.ind.opt <- "1                                    ! 0/1=Unknown/Known population allele frequency\n"
+  }
+  readr::write_file(x = allele.freq.ind.opt, path = filename, append = TRUE)
+  
+  
+  # Line 15 and more------------------------------------------------------------
+  # Numbers of alleles per locus (Integer, optional).
+  # Required when the Population allele frequency indicator is set to 1
+  # Alleles and their frequencies per locus (Integer, Real, optional)
+  # If someone finds a beter to do all this, I'm all in!
+  
+  if (!is.null(allele.freq)) {
+    readr::write_file(x = paste0(paste0(allele.per.locus, collapse = " "), "  !Number of alleles per locus\n"), path = filename, append = TRUE)
+    utils::write.table(x = freq, file = filename, append = TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
+  }
+  
+  # Number of runs--------------------------------------------------------------
+  num.run.opt <- "1                                    ! Number of runs\n"
+  readr::write_file(x = num.run.opt, path = filename, append = TRUE)
+  
+  # Length of run --------------------------------------------------------------
+  # give a value of 1, 2, 3, 4 to indicate short, medium, long, very long run
+  run.length.opt <- paste(run.length, "                                    ! Length of run\n", sep = "") 
+  readr::write_file(x = run.length.opt, path = filename, append = TRUE)
+  
+  # Monitor method (Time in second)---------------------------------------------
+  monitor.met.opt <- "0                                    ! 0/1=Monitor method by Iterate\n"
+  readr::write_file(x = monitor.met.opt, path = filename, append = TRUE)
+  
+  
+  # Monitor interval (Time in second)-------------------------------------------
+  monitor.int.opt <- "10000                                ! Monitor interval in Iterate\n"
+  readr::write_file(x = monitor.int.opt, path = filename, append = TRUE)
+  
+  # WindowsGUI/DOS, 0 when running Colony in DOS mode or on other platforms-----
+  windows.gui.opt <- "0                                    ! non-Windows version\n"
+  readr::write_file(x = windows.gui.opt, path = filename, append = TRUE)
+  
+  # Analysis method : ----------------------------------------------------------
+  # 0, 1 or 2 for Pairwise-Likelihood score (PLS), full likelihood method (FL),
+  # or the FL and PLS combined method (FPLS). More on these methods are explained 
+  # above in the Windows GUI data input section.
+  analysis.opt <- paste(analysis, "                                    ! Analysis 0 (Pairwise-Likelihood Score), 1 (Full Likelihood), 2 (combined Pairwise-Likelihood Score and Full Likelihood)\n", sep = "") 
+  readr::write_file(x = analysis.opt, path = filename, append = TRUE)
+  
+  # Precision-------------------------------------------------------------------
+  precision.opt <- "3                                    ! 1/2/3=low/medium/high Precision for Full likelihood\n"
+  readr::write_file(x = precision.opt, path = filename, append = TRUE)
+  
+  # Marker IDs/Names------------------------------------------------------------
+  # snp.id <- seq(from = 1, to = marker.number, by = 1)
+  # markers.name.opt <- as.character(c(markers.name, "        ! Marker IDs\n"))
+  # readr::write_file(x = markers.name.opt, path = filename, append = TRUE)
+  markers.name.opt <- stringi::stri_join(markers.name, collapse = " ")
+  markers.name.opt <- stringi::stri_join(markers.name.opt, "! Marker IDs\n")
+  readr::write_file(x = markers.name.opt, path = filename, append = TRUE)
+  
+  # Marker types ---------------------------------------------------------------
+  # marker.type (codominant/dominant)
+  marker.type.opt <- stringi::stri_join(rep(0, marker.num), collapse = " ")
+  marker.type.opt <- stringi::stri_join(marker.type.opt, "  ! Marker types, 0/1=Codominant/Dominant\n")
+  readr::write_file(x = marker.type.opt, path = filename, append = TRUE)
+  
+  # Allelic dropout rates-------------------------------------------------------
+  dropout <- stringi::stri_join(rep(allelic.dropout, marker.num), collapse = " ")
+  dropout <- stringi::stri_join(dropout, "     ! Allelic dropout rate at each locus\n")
+  readr::write_file(x = dropout, path = filename, append = TRUE)
+  
+  # Error rates-----------------------------------------------------------------
+  error <- stringi::stri_join(rep(error.rate, marker.num), collapse = " ")
+  error <- stringi::stri_join(error, "     ! False allele rate\n\n")
+  readr::write_file(x = error, path = filename, append = TRUE)
+  
+  # Offspring IDs and genotype--------------------------------------------------
+  utils::write.table(x = input, file = filename, sep = " ", append = TRUE, col.names = FALSE, row.names = FALSE, quote = FALSE)
+  
+  # Probabilities that the father and mother of an offspring are included-------
+  # in the candidate males and females. The two numbers must be provided even if
+  # there are no candidate males or/and females.
+  prob.opt <- "\n\n0  0                                 ! Prob. of dad/mum included in the candidates\n"
+  readr::write_file(x = prob.opt, path = filename, append = TRUE)
+  
+  # Numbers of candidate males and females--------------------------------------
+  candidate.opt <- "0 0                                  ! Numbers of candidate males & females\n"
+  readr::write_file(x = candidate.opt, path = filename, append = TRUE)
+  
+  # PRINT ALL REMAINING COLONY OPTIONS -----------------------------------------
+  if (print.all.colony.opt) {
+    message("Printing all COLONY options...")
+    # Candidate male IDs/names and genotypes------------------------------------
+    candidate.male.id.geno.opt <- "!Candidate male ID and genotypes\n"
+    readr::write_file(x = candidate.male.id.geno.opt, path = filename, append = TRUE)
+    # Candidate female IDs/names and genotypes ----------------------------------
+    candidate.female.id.geno.opt <- "!Candidate female ID and genotypes\n"
+    readr::write_file(x = candidate.female.id.geno.opt, path = filename, append = TRUE)
+  }
+  
+  # Number of offspring with known paternity------------------------------------
+  known.paternity.opt <- "0                                    ! Number of offspring with known father\n"
+  readr::write_file(x = known.paternity.opt, path = filename, append = TRUE)
+  
+  # Known offspring-father dyad-------------------------------------------------
+  if (print.all.colony.opt) {
+    known.father.dyad.opt <- "! Offspring ID and known father ID (Known offspring-father dyad)\n"
+    readr::write_file(x = known.father.dyad.opt, path = filename, append = TRUE)
+  }
+  
+  # Number of offspring with known maternity------------------------------------
+  known.maternity.opt <- "0                                    ! Number of offspring with known mother\n"
+  readr::write_file(x = known.maternity.opt, path = filename, append = TRUE)
+  
+  # Known offspring-mother dyad-------------------------------------------------
+  if (print.all.colony.opt) {
+    known.mother.dyad.opt <- "! Offspring ID and known mother ID (Known offspring-mother dyad)\n"
+    readr::write_file(x = known.mother.dyad.opt, path = filename, append = TRUE)
+  }
+  
+  # Number of known paternal sibships (Integer)---------------------------------
+  known.paternal.sibships.opt <- "0                                    ! Number of known paternal sibships\n"
+  readr::write_file(x = known.paternal.sibships.opt, path = filename, append = TRUE)
+  
+  
+  # Paternal sibship size and members (Integer, String, optional).--------------
+  if (print.all.colony.opt) {
+    known.paternal.sibships.size.opt <- "! Paternal sibship size and members\n"
+    readr::write_file(x = known.paternal.sibships.size.opt, path = filename, append = TRUE)
+  }
+  
+  # Number of known maternal sibships (Integer)---------------------------------
+  known.maternal.sibships.opt <- "0                                    ! Number of known maternal sibships\n"
+  readr::write_file(x = known.maternal.sibships.opt, path = filename, append = TRUE)
+  
+  
+  # Maternal sibship size and members (Integer, String, optional).--------------
+  if (print.all.colony.opt) {
+    known.maternal.sibships.size.opt <- "! Maternal sibship size and members\n"
+    readr::write_file(x = known.maternal.sibships.size.opt, path = filename, append = TRUE)
+  }
+  
+  # Number of offspring with known excluded paternity (Integer). ---------------
+  offspring.known.excl.paternity.opt <- "0                                    ! Number of offspring with known excluded fathers\n"
+  readr::write_file(x = offspring.known.excl.paternity.opt, path = filename, append = TRUE)
+  
+  
+  # Excluded paternity ---------------------------------------------------------
+  if (print.all.colony.opt) {
+    excl.paternity.opt <- "! Offspring ID, number of excluded fathers, and excluded father IDs\n"
+    readr::write_file(x = excl.paternity.opt, path = filename, append = TRUE)
+  }
+  
+  # Number of offspring with known excluded maternity (Integer). ---------------
+  offspring.known.excl.maternity.opt <- "0                                    ! Number of offspring with known excluded mothers\n"
+  readr::write_file(x = offspring.known.excl.maternity.opt, path = filename, append = TRUE)
+  
+  
+  # Excluded maternity----------------------------------------------------------
+  if (print.all.colony.opt) {
+    excl.maternity.opt <- "! Offspring ID, number of excluded mothers, and excluded father IDs\n"
+    readr::write_file(x = excl.maternity.opt, path = filename, append = TRUE)
+  }
+  
+  # Number of offspring with known excluded paternal sibships-------------------
+  offspring.known.excl.paternal.sibships.opt <- "0                                    ! Number of offspring with known excluded paternal sibships\n"
+  readr::write_file(x = offspring.known.excl.paternal.sibships.opt, path = filename, append = TRUE)
+  
+  # Excluded paternal siblings--------------------------------------------------
+  if (print.all.colony.opt) {
+    excluded.paternal.siblings.opt <- "! Excluded paternal siblings\n"
+    readr::write_file(x = excluded.paternal.siblings.opt, path = filename, append = TRUE)
+  }
+  
+  # Number of offspring with known excluded maternal sibships-------------------
+  offspring.known.excl.maternal.sibships.opt <- "0                                    ! Number of offspring with known excluded maternal sibships\n"
+  readr::write_file(x = offspring.known.excl.maternal.sibships.opt, path = filename, append = TRUE)
+  
+  # Excluded maternal siblings--------------------------------------------------
+  if (print.all.colony.opt) {
+    excluded.maternal.siblings.opt <- "! Excluded maternal siblings\n"
+    readr::write_file(x = excluded.maternal.siblings.opt, path = filename, append = TRUE)
+  }
+}# End stackr_colony
+
+
+
+
+
diff --git a/README.Rmd b/README.Rmd
index 3788830..7e8f4f8 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -57,14 +57,14 @@ library(stackr) # to load
 | Caracteristics | Description |
 |:-------------------|:--------------------------------------------------------|
 | **Import** | Various supported genomic file formats in `tidy_genomic_format` and/or their separate module:<br> [VCF](https://samtools.github.io/hts-specs/) (Danecek et al., 2011)<br>[PLINK tped/tfam](http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr) (Purcell et al., 2007)<br>[genind](https://github.com/thibautjombart/adegenet) (Jombart et al., 2010; Jombart and Ahmed, 2011)<br>  [genlight](https://github.com/thibautjombart/adegenet) (Jombart et al., 2010; Jombart and Ahmed, 2011), also in `tidy_genlight`<br>[strataG gtypes](https://github.com/EricArcher/strataG) (Archer et al., 2016)<br>[Genepop](http://genepop.curtin.edu.au) (Raymond and Rousset, 1995; Rousset, 2008), also in `tidy_genepop`<br>[STACKS haplotype file](http://catchenlab.life.illinois.edu/stacks/) (Catchen et al., 2011, 2013)<br> [hierfstat](https://github.com/jgx65/hierfstat) (Goudet, 2005), also in `tidy_fstat`<br>Dataframes of genotypes in wide or long/tidy format, also in `tidy_wide`|
-| **Output** |Export genomic data out of **stackr** using `genomic_converter` or the separate modules (12 formats):<br>`write_vcf`: [VCF](https://samtools.github.io/hts-specs/) (Danecek et al., 2011)<br>`write_plink`: [PLINK tped/tfam](http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr) (Purcell et al., 2007)<br>`write_genind`: [adegenet genind and genlight](https://github.com/thibautjombart/adegenet) (Jombart et al., 2010; Jombart and Ahmed, 2011)<br>`write_genlight`: [genlight](https://github.com/thibautjombart/adegenet) (Jombart et al., 2010; Jombart and Ahmed, 2011)<br>`write_gtypes`: [strataG gtypes](https://github.com/EricArcher/strataG)<br>`write_genepop`: [Genepop](http://genepop.curtin.edu.au) (Raymond and Rousset, 1995; Rousset, 2008)<br>[STACKS haplotype file](http://catchenlab.life.illinois.edu/stacks/) (Catchen et al., 2011, 2013)<br>`write_betadiv`: [betadiv](http://adn.biol.umontreal.ca/~numericalecology/Rcode/) (Lamy, 2015)<br> `vcf2dadi`: [δaδi](http://gutengroup.mcb.arizona.edu/software/) (Gutenkunst et al., 2009)<br> `write_structure`: [structure](http://pritchardlab.stanford.edu/structure.html) (Pritchard et al., 2000)<br> `write_arlequin`: [Arlequin](http://cmpg.unibe.ch/software/arlequin35/) (Excoffier et al. 2005)<br> `write_hierfstat`: [hierfstat](https://github.com/jgx65/hierfstat) (Goudet, 2005)<br>Dataframes of genotypes in wide or long/tidy format|
+| **Output** |Export genomic data out of **stackr** using `genomic_converter` or the separate modules (12 formats):<br>`write_vcf`: [VCF](https://samtools.github.io/hts-specs/) (Danecek et al., 2011)<br>`write_plink`: [PLINK tped/tfam](http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr) (Purcell et al., 2007)<br>`write_genind`: [adegenet genind and genlight](https://github.com/thibautjombart/adegenet) (Jombart et al., 2010; Jombart and Ahmed, 2011)<br>`write_genlight`: [genlight](https://github.com/thibautjombart/adegenet) (Jombart et al., 2010; Jombart and Ahmed, 2011)<br>`write_gtypes`: [strataG gtypes](https://github.com/EricArcher/strataG)<br>`write_colony`: [COLONY](https://www.zsl.org/science/software/colony) (Jones and Wang, 2010; Wang, 2012)<br>`write_genepop`: [Genepop](http://genepop.curtin.edu.au) (Raymond and Rousset, 1995; Rousset, 2008)<br>[STACKS haplotype file](http://catchenlab.life.illinois.edu/stacks/) (Catchen et al., 2011, 2013)<br>`write_betadiv`: [betadiv](http://adn.biol.umontreal.ca/~numericalecology/Rcode/) (Lamy, 2015)<br> `vcf2dadi`: [δaδi](http://gutengroup.mcb.arizona.edu/software/) (Gutenkunst et al., 2009)<br> `write_structure`: [structure](http://pritchardlab.stanford.edu/structure.html) (Pritchard et al., 2000)<br> `write_arlequin`: [Arlequin](http://cmpg.unibe.ch/software/arlequin35/) (Excoffier et al. 2005)<br> `write_hierfstat`: [hierfstat](https://github.com/jgx65/hierfstat) (Goudet, 2005)<br>Dataframes of genotypes in wide or long/tidy format|
 |**Conversion function**| `genomic_converter` import/export genomic formats mentioned above. The function is also integrated with usefull filters, blacklist and whitelist.|
 |**Outliers detection**|`detect_duplicate_genomes`: Detect and remove duplicate individuals from your dataset <br>`detect_mixed_genomes`: Detect and remove potentially mixed individuals<br>`summary_haplotype` and `filter_snp_number`: Discard of outlier markers with *de novo* assembly artifact (e.g. markers with an extreme number of SNP per haplotype or with irregular number of alleles)|
 |**Pattern of missingness**|`missing_visualization`: Visualize patterns of missing data. Find patterns associated with different variables of your study (lanes, chips, sequencers, populations, sample sites, reads/samples, homozygosity, etc)|
 |**Filters**| Alleles, genotypes, markers, individuals and populations can be filtered and/or selected in several ways:<br><br>`filter_coverage`: Discard markers based on read depth (coverage) of alleles and genotypes<br>`filter_genotype_likelihood`: Discard markers based on genotype likelihood<br>`filter_individual`: Discard markers based on proportion of genotyped individuals<br>`filter_population`: Discard markers based on proportion of genotyped populations<br>`filter_het`: Discard markers based on the observed heterozygosity (Het obs)<br>`filter_fis`: Discard markers based on the inbreeding coefficient (Fis)<br>`filter_hw`: Discard markers based on the Hardy-Weinberg Equilibrium expectations (HWE)<br>`filter_maf`: Discard markers based on local or global (overall) minor allele frequency|
 |**Imputations**|**Map-independent** imputations of missing genotypes.<br>Using **Random Forest** or the most frequent category.<br> Imputations can be conducted **overall samples** or **by populations**.<br><br>Imputations are integrated in several functions and in a separate module: `stackr_imputations_module`|
 |**[ggplot2](http://ggplot2.org)-based plotting**|Visualize distribution of important metric and statistics and create publication-ready figures|
-|**Parallel**|Codes designed and optimized for fast computations running imputations, iterations, etc. in parallel|
+|**Parallel**|Codes designed and optimized for fast computations running imputations, iterations, etc. in parallel. Works with all OS: Linux, Mac and now PC!|
 
 [More in stackr workflow below](https://github.com/thierrygosselin/stackr#stackr-workflow)
 
@@ -120,6 +120,28 @@ citation("stackr")
 ## New features
 Change log, version, new features and bug history now lives in the [NEWS.md file](https://github.com/thierrygosselin/stackr/blob/master/NEWS.md)
 
+**v.0.4.6**
+
+* I'm pleased to announce that `stackr` parallel mode now works with **Windows**!
+Nothing to install, just need to choose the number of CPU,
+the rest is done automatically.
+* `haplo2colony` is deprecated. Use the new function called `write_colony`!
+* `write_colony`: works similarly to the deprecated function `haplo2colony`,
+      * with the major advantage that it's no longer restricted to STACKS 
+      haplotypes file. 
+      * The function is using the `tidy_genomic_data` module
+      to import files. So you can choose one of the 10 input file formats supported
+      by `stackr`!
+      * other benefits also include the possibility to efficiently test MAF, 
+      snp.ld, haplotypes/snp approach, whitelist of markes, 
+      blacklist of individuals, blacklist of genotypes, etc. with the buit-it
+      arguments.
+      * the function only **keeps markers in common** between populations/groups
+      and **is removing monomorphic markers**.
+      * **Note:** there are several *defaults* in the function and
+      it's a complicated file format, so make sure to read the function
+      documentation, please, and `COLONY` manual.
+
 
 **v.0.4.5**
 
diff --git a/README.md b/README.md
index 3a6a7c5..4329d81 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 [![Travis-CI Build Status](https://travis-ci.org/thierrygosselin/stackr.svg?branch=master)](https://travis-ci.org/thierrygosselin/stackr) [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/thierrygosselin/stackr?branch=master&svg=true)](https://ci.appveyor.com/project/thierrygosselin/stackr) [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/stackr)](http://cran.r-project.org/package=stackr) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) [![DOI](https://zenodo.org/badge/14548/thierrygosselin/stackr.svg)](https://zenodo.org/badge/latestdoi/14548/thierrygosselin/stackr)
 
-[![packageversion](https://img.shields.io/badge/Package%20version-0.4.5-orange.svg)](commits/master) [![Last-changedate](https://img.shields.io/badge/last%20change-2016--11--27-brightgreen.svg)](/commits/master)
+[![packageversion](https://img.shields.io/badge/Package%20version-0.4.6-orange.svg)](commits/master) [![Last-changedate](https://img.shields.io/badge/last%20change-2016--11--29-brightgreen.svg)](/commits/master)
 
 ------------------------------------------------------------------------
 
@@ -40,7 +40,7 @@ library(stackr) # to load
 </tr>
 <tr class="even">
 <td align="left"><strong>Output</strong></td>
-<td align="left">Export genomic data out of <strong>stackr</strong> using <code>genomic_converter</code> or the separate modules (12 formats):<br><code>write_vcf</code>: <a href="https://samtools.github.io/hts-specs/">VCF</a> (Danecek et al., 2011)<br><code>write_plink</code>: <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr">PLINK tped/tfam</a> (Purcell et al., 2007)<br><code>write_genind</code>: <a href="https://github.com/thibautjombart/adegenet">adegenet genind and genlight</a> (Jombart et al., 2010; Jombart and Ahmed, 2011)<br><code>write_genlight</code>: <a href="https://github.com/thibautjombart/adegenet">genlight</a> (Jombart et al., 2010; Jombart and Ahmed, 2011)<br><code>write_gtypes</code>: <a href="https://github.com/EricArcher/strataG">strataG gtypes</a><br><code>write_genepop</code>: <a href="http://genepop.curtin.edu.au">Genepop</a> (Raymond and Rousset, 1995; Rousset, 2008)<br><a href="http://catchenlab.life.illinois.edu/stacks/">STACKS haplotype file</a> (Catchen et al., 2011, 2013)<br><code>write_betadiv</code>: <a href="http://adn.biol.umontreal.ca/~numericalecology/Rcode/">betadiv</a> (Lamy, 2015)<br> <code>vcf2dadi</code>: <a href="http://gutengroup.mcb.arizona.edu/software/">δaδi</a> (Gutenkunst et al., 2009)<br> <code>write_structure</code>: <a href="http://pritchardlab.stanford.edu/structure.html">structure</a> (Pritchard et al., 2000)<br> <code>write_arlequin</code>: <a href="http://cmpg.unibe.ch/software/arlequin35/">Arlequin</a> (Excoffier et al. 2005)<br> <code>write_hierfstat</code>: <a href="https://github.com/jgx65/hierfstat">hierfstat</a> (Goudet, 2005)<br>Dataframes of genotypes in wide or long/tidy format</td>
+<td align="left">Export genomic data out of <strong>stackr</strong> using <code>genomic_converter</code> or the separate modules (12 formats):<br><code>write_vcf</code>: <a href="https://samtools.github.io/hts-specs/">VCF</a> (Danecek et al., 2011)<br><code>write_plink</code>: <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr">PLINK tped/tfam</a> (Purcell et al., 2007)<br><code>write_genind</code>: <a href="https://github.com/thibautjombart/adegenet">adegenet genind and genlight</a> (Jombart et al., 2010; Jombart and Ahmed, 2011)<br><code>write_genlight</code>: <a href="https://github.com/thibautjombart/adegenet">genlight</a> (Jombart et al., 2010; Jombart and Ahmed, 2011)<br><code>write_gtypes</code>: <a href="https://github.com/EricArcher/strataG">strataG gtypes</a><br><code>write_colony</code>: <a href="https://www.zsl.org/science/software/colony">COLONY</a> (Jones and Wang, 2010; Wang, 2012)<br><code>write_genepop</code>: <a href="http://genepop.curtin.edu.au">Genepop</a> (Raymond and Rousset, 1995; Rousset, 2008)<br><a href="http://catchenlab.life.illinois.edu/stacks/">STACKS haplotype file</a> (Catchen et al., 2011, 2013)<br><code>write_betadiv</code>: <a href="http://adn.biol.umontreal.ca/~numericalecology/Rcode/">betadiv</a> (Lamy, 2015)<br> <code>vcf2dadi</code>: <a href="http://gutengroup.mcb.arizona.edu/software/">δaδi</a> (Gutenkunst et al., 2009)<br> <code>write_structure</code>: <a href="http://pritchardlab.stanford.edu/structure.html">structure</a> (Pritchard et al., 2000)<br> <code>write_arlequin</code>: <a href="http://cmpg.unibe.ch/software/arlequin35/">Arlequin</a> (Excoffier et al. 2005)<br> <code>write_hierfstat</code>: <a href="https://github.com/jgx65/hierfstat">hierfstat</a> (Goudet, 2005)<br>Dataframes of genotypes in wide or long/tidy format</td>
 </tr>
 <tr class="odd">
 <td align="left"><strong>Conversion function</strong></td>
@@ -68,7 +68,7 @@ library(stackr) # to load
 </tr>
 <tr class="odd">
 <td align="left"><strong>Parallel</strong></td>
-<td align="left">Codes designed and optimized for fast computations running imputations, iterations, etc. in parallel</td>
+<td align="left">Codes designed and optimized for fast computations running imputations, iterations, etc. in parallel. Works with all OS: Linux, Mac and now PC!</td>
 </tr>
 </tbody>
 </table>
@@ -127,6 +127,17 @@ New features
 
 Change log, version, new features and bug history now lives in the [NEWS.md file](https://github.com/thierrygosselin/stackr/blob/master/NEWS.md)
 
+**v.0.4.6**
+
+-   I'm pleased to announce that `stackr` parallel mode now works with **Windows**! Nothing to install, just need to choose the number of CPU, the rest is done automatically.
+-   `haplo2colony` is deprecated. Use the new function called `write_colony`!
+-   `write_colony`: works similarly to the deprecated function `haplo2colony`,
+    -   with the major advantage that it's no longer restricted to STACKS haplotypes file.
+    -   The function is using the `tidy_genomic_data` module to import files. So you can choose one of the 10 input file formats supported by `stackr`!
+    -   other benefits also include the possibility to efficiently test MAF, snp.ld, haplotypes/snp approach, whitelist of markes, blacklist of individuals, blacklist of genotypes, etc. with the buit-it arguments.
+    -   the function only **keeps markers in common** between populations/groups and **is removing monomorphic markers**.
+    -   **Note:** there are several *defaults* in the function and it's a complicated file format, so make sure to read the function documentation, please, and `COLONY` manual.
+
 **v.0.4.5**
 
 -   temporary fix to `tidy_genomic_data` to read unconventional Tassel VCF
diff --git a/inst/CITATION b/inst/CITATION
index bca4d7e..df0c8c3 100644
--- a/inst/CITATION
+++ b/inst/CITATION
@@ -7,5 +7,5 @@ year = "2016",
 author = personList(as.person("Thierry Gosselin"), as.person("Louis Bernatchez")),
 url = "https://github.com/thierrygosselin/stackr",
 doi = "http://dx.doi.org/10.5281/zenodo.154432",
-textVersion = paste("Thierry Gosselin, Louis Bernatchez, (2016). stackr: GBS/RAD Data Exploration, Manipulation and Visualization using R. R package version 0.4.5 https://github.com/thierrygosselin/stackr. doi : 10.5281/zenodo.154432")
+textVersion = paste("Thierry Gosselin, Louis Bernatchez, (2016). stackr: GBS/RAD Data Exploration, Manipulation and Visualization using R. R package version 0.4.6 https://github.com/thierrygosselin/stackr. doi : 10.5281/zenodo.154432")
 )
diff --git a/man/write_colony.Rd b/man/write_colony.Rd
new file mode 100644
index 0000000..c2e2754
--- /dev/null
+++ b/man/write_colony.Rd
@@ -0,0 +1,320 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_colony.R
+\name{write_colony}
+\alias{write_colony}
+\title{Write a \code{COLONY} input file from 9 genomic formats}
+\usage{
+write_colony(data, strata = NULL, pop.levels = NULL, pop.labels = NULL,
+  blacklist.id = NULL, blacklist.genotype = NULL,
+  whitelist.markers = NULL, monomorphic.out = TRUE, snp.ld = NULL,
+  common.markers = TRUE, maf.thresholds = NULL, maf.pop.num.threshold = 1,
+  maf.approach = "SNP", maf.operator = "OR", max.marker = NULL,
+  sample.markers = NULL, pop.select = NULL, allele.freq = NULL,
+  inbreeding = 0, mating.sys.males = 0, mating.sys.females = 0,
+  clone = 0, run.length = 2, analysis = 1, allelic.dropout = 0,
+  error.rate = 0.02, print.all.colony.opt = FALSE,
+  imputation.method = NULL, impute = "genotype",
+  imputations.group = "populations", num.tree = 100, iteration.rf = 10,
+  split.number = 100, verbose = FALSE,
+  parallel.core = parallel::detectCores() - 1, filename = NULL)
+}
+\arguments{
+\item{data}{9 options: biallelic and haplotypic vcf
+(to make vcf population ready, see details below),
+plink, stacks haplotype file, genind (library(adegenet)), 
+genlight (library(adegenet)), gtypes (library(strataG)), genepop, 
+and a data frame in wide format. 
+\emph{See details} of \code{\link{tidy_genomic_data}}.}
+
+\item{strata}{(optional/required) Required for VCF and haplotypes files, 
+optional for the other file formats supported. 
+
+The strata file is a tab delimited file with 2 columns with header:
+\code{INDIVIDUALS} and \code{STRATA}. With a 
+data frame of genotypes the strata is the INDIVIDUALS and POP_ID columns, with
+PLINK files, the \code{tfam} first 2 columns are used. 
+If a \code{strata} file is specified, the strata file will have
+precedence. The \code{STRATA} column can be any hierarchical grouping. 
+To create a strata file see \code{\link[stackr]{individuals2strata}}.
+If you have already run 
+\href{http://catchenlab.life.illinois.edu/stacks/}{stacks} on your data, 
+the strata file is similar to a stacks `population map file`, make sure you 
+have the required column names (\code{INDIVIDUALS} and \code{STRATA}).
+Default: \code{strata = NULL}.}
+
+\item{pop.levels}{(optional, string) This refers to the levels in a factor. In this 
+case, the id of the pop.
+Use this argument to have the pop ordered your way instead of the default 
+alphabetical or numerical order. e.g. \code{pop.levels = c("QUE", "ONT", "ALB")} 
+instead of the default \code{pop.levels = c("ALB", "ONT", "QUE")}. 
+White spaces in population names are replaced by underscore.
+Default: \code{pop.levels = NULL}.}
+
+\item{pop.labels}{(optional, string) Use this argument to rename/relabel
+your pop or combine your pop. e.g. To combine \code{"QUE"} and \code{"ONT"} 
+into a new pop called \code{"NEW"}:
+(1) First, define the levels for your pop with \code{pop.levels} argument: 
+\code{pop.levels = c("QUE", "ONT", "ALB")}. 
+(2) then, use \code{pop.labels} argument: 
+\code{pop.levels = c("NEW", "NEW", "ALB")}.
+To rename \code{"QUE"} to \code{"TAS"}:
+\code{pop.labels = c("TAS", "ONT", "ALB")}.
+Default: \code{pop.labels = NULL}. If you find this too complicated, there is also the
+\code{strata} argument that can do the same thing, see below.
+White spaces in population names are replaced by underscore.}
+
+\item{blacklist.id}{(optional) A blacklist with individual ID and
+a column header 'INDIVIDUALS'. The blacklist is in the working directory
+(e.g. "blacklist.txt").
+Default: \code{blacklist.id = NULL}.}
+
+\item{blacklist.genotype}{(optional) Useful to erase genotype with below 
+average quality, e.g. genotype with more than 2 alleles in diploid likely 
+sequencing errors or genotypes with poor genotype likelihood or coverage. 
+The blacklist has a minimum of 2 column headers (markers and individuals). 
+Markers can be 1 column (CHROM or LOCUS or POS), 
+a combination of 2 (e.g. CHROM and POS or CHROM and LOCUS or LOCUS and POS) or 
+all 3 (CHROM, LOCUS, POS). The markers columns must be designated: CHROM (character
+or integer) and/or LOCUS (integer) and/or POS (integer). The id column designated
+INDIVIDUALS (character) columns header. The blacklist must be in the working 
+directory (e.g. "blacklist.genotype.txt"). For de novo VCF, CHROM column 
+with 'un' need to be changed to 1. 
+Default: \code{blacklist.genotype = NULL} for no blacklist of 
+genotypes to erase.}
+
+\item{whitelist.markers}{(optional) A whitelist containing CHROM (character
+or integer) and/or LOCUS (integer) and/or
+POS (integer) columns header. To filter by chromosome and/or locus and/or by snp.
+The whitelist is in the working directory (e.g. "whitelist.txt").
+de novo CHROM column with 'un' need to be changed to 1. 
+In the VCF, the column ID is the LOCUS identification.
+Default \code{whitelist.markers = NULL} for no whitelist of markers.}
+
+\item{monomorphic.out}{(optional) Should the monomorphic 
+markers present in the dataset be filtered out ? 
+Default: \code{monomorphic.out = TRUE}.}
+
+\item{snp.ld}{(optional) \strong{For VCF file only}. 
+SNP short distance linkage disequilibrium pruning. With anonymous markers from
+RADseq/GBS de novo discovery, you can minimize linkage disequilibrium (LD) by
+choosing among these 3 options: \code{"random"} selection, \code{"first"} or
+\code{"last"} SNP on the same short read/haplotype. For long distance linkage
+disequilibrium pruning, see details below.
+Default: \code{snp.ld = NULL}.}
+
+\item{common.markers}{(optional) Logical. Default: \code{common.markers = TRUE}, 
+will only keep markers in common (genotyped) between all the populations.}
+
+\item{maf.thresholds}{(string, double, optional) String with 
+local/populations and global/overall maf thresholds, respectively.
+e.g. \code{maf.thresholds = c(0.05, 0.1)} for a local maf threshold 
+of 0.05 and a global threshold of 0.1. Available for VCF, PLINK and data frame 
+files.
+Default: \code{maf.thresholds = NULL}.}
+
+\item{maf.pop.num.threshold}{(integer, optional) When maf thresholds are used,
+this argument is for the number of pop required to pass the maf thresholds
+to keep the locus. Default: \code{maf.pop.num.threshold = 1}}
+
+\item{maf.approach}{(character, optional). 
+\code{maf.approach = "haplotype"} : looks at the minimum MAF found on the 
+read/haplotype. Using this option will discard all the markers/snp on 
+that read based on the thresholds chosen. This method is only available 
+for VCF and haplotype files, or tidy data frame from those file types.
+\code{maf.approach = "SNP"} : treats all the SNP on the same 
+haplotype/read as independent. Doesn't work with haplotype file, 
+but does work for all other file type.
+Default is \code{maf.approach = "SNP"}.}
+
+\item{maf.operator}{(character, optional) \code{maf.operator = "AND"} or 
+default \code{maf.operator = "OR"}.
+When filtering over LOCUS or SNP, do you want the local \code{"AND"}
+global MAF to pass the thresholds, or ... you want the local \code{"OR"}
+global MAF to pass the thresholds, to keep the marker?}
+
+\item{max.marker}{An optional integer useful to subsample marker number in 
+large PLINK file. e.g. if the data set 
+contains 200 000 markers and \code{max.marker = 10000} 10000 markers are
+subsampled randomly from the 200000 markers. Use \code{whitelist.markers} to
+keep specific markers.
+Default: \code{max.marker = NULL}.}
+
+\item{sample.markers}{(number) \code{COLONY} can take a long time to run,
+use a random subsample of your markers to speed test \code{COLONY}
+e.g. \code{sample.markers = 500} to use only 500 randomly chosen markers.
+Default: \code{sample.markers = NULL}, will use all markers.}
+
+\item{pop.select}{(string, optional) Selected list of populations for
+the analysis. e.g. \code{pop.select = c("QUE", "ONT")} to select \code{QUE}
+and \code{ONT} population samples (out of 20 pops).
+Default: \code{pop.select = NULL}}
+
+\item{allele.freq}{(optional, string) Allele frequency can be computed from
+a select group.
+e.g. \code{allele.freq = "QUE"} or \code{allele.freq = c("QUE", "ONT")}.
+Using \code{allele.freq = "overall"} will use all the samples to compute the
+allele frequency. 
+Default: \code{allele.freq = NULL}, will not compute allele frequency.}
+
+\item{inbreeding}{(boolean) 0/1 no inbreeding/inbreeding.
+Default: \code{inbreeding = 0}}
+
+\item{mating.sys.males}{(boolean) Mating system in males.
+0/1 polygyny/monogyny.
+Default: \code{mating.sys.males = 0}.}
+
+\item{mating.sys.females}{(boolean) Mating system in females.
+0/1 polygyny/monogyny.
+Default: \code{mating.sys.females = 0}.}
+
+\item{clone}{(boolean) Should clones and duplicated individuals be inferred.
+0/1, yes/no. Default: \code{clone = 0}.}
+
+\item{run.length}{(integer) Length of run. 1 (short), 2 (medium), 3 (long),
+4 (very long). Start with short or medium run and consider longer run if your
+estimates probability are not stable or really good.
+Default: \code{run.length = 2}.}
+
+\item{analysis}{(integer) Analysis method. 
+0 (Pairwise-Likelihood Score), 1 (Full Likelihood),
+2 (combined Pairwise-Likelihood Score and Full Likelihood).
+Default: \code{analysis = 1}.}
+
+\item{allelic.dropout}{Locus allelic dropout rate.
+Default : \code{allelic.dropout = 0}.}
+
+\item{error.rate}{Locus error rate.
+Default:\code{error.rate = 0.02}.}
+
+\item{print.all.colony.opt}{(logical) Should all \code{COLONY} options be printed in the file.
+
+
+\strong{This require manual curation, for the file to work directly with \code{COLONY}}. 
+Default = \code{print.all.colony.opt = FALSE}.}
+
+\item{imputation.method}{(character, optional) 
+Methods available for map-independent imputations of missing genotype: 
+(1) \code{"max"} to use the most frequent category for imputations.
+(2) \code{"rf"} using Random Forest algorithm. 
+Default: no imputation \code{imputation.method = NULL}.}
+
+\item{impute}{(character, optional) Imputation on missing genotype 
+\code{impute = "genotype"} or alleles \code{impute = "allele"}.
+Default: \code{"genotype"}.}
+
+\item{imputations.group}{(character, optional) \code{"global"} or \code{"populations"}.
+Should the imputations be computed globally or by populations. If you choose
+global, turn the verbose to \code{TRUE}, to see progress.
+Default = \code{"populations"}.}
+
+\item{num.tree}{(integer, optional) The number of trees to grow in Random Forest. 
+Default: \code{num.tree = 100}.}
+
+\item{iteration.rf}{(integer, optional) The number of iterations of missing data algorithm
+in Random Forest. 
+Default: \code{iteration.rf = 10}.}
+
+\item{split.number}{(integer, optional) Non-negative integer value used to specify
+random splitting in Random Forest. 
+Default: \code{split.number = 100}.}
+
+\item{verbose}{(optional, logical) When \code{verbose = TRUE} 
+the function is a little more chatty during execution.
+Default: \code{verbose = FALSE}.}
+
+\item{parallel.core}{(optional) The number of core for OpenMP shared-memory parallel
+programming of Random Forest imputations. For more info on how to install the
+OpenMP version see \code{\link[randomForestSRC]{randomForestSRC-package}}.
+If not selected \code{detectCores()-1} is used as default.}
+
+\item{filename}{Name of the acronym for filenaming in the working directory.}
+}
+\value{
+A \code{COLONY} file in your working directory (2 if you selected imputations arguments...)
+}
+\description{
+Write a \code{COLONY} input file from several genomic formats, 
+look into \pkg{stackr} \code{\link{tidy_genomic_data}} for supported input
+files.
+}
+\details{
+\strong{It is highly recommended to read (twice!) the user guide distributed with
+\code{COLONY} to find out the details for input and output of the software.}
+ 
+Not all options are provided here. 
+
+But to ease the process, all the required options to properly run \code{COLONY}
+will be printed in the file written in your working directory. 
+Change the values accordingly and wisely.
+
+
+\strong{Imputations}
+The imputations using Random Forest requires more time to compute 
+and can take several minutes and hours depending on the size of 
+the dataset and polymorphism of the species used. e.g. with a 
+low polymorphic taxa, and a data set containing 30\% missing data, 
+5 000 haplotypes loci and 500 individuals will require 15 min.
+For details about imputations, look into \pkg{stackr} \code{\link{stackr_imputations_module}}
+}
+\note{
+\code{common.markers} argument should be left to the default:  \code{common.markers = TRUE}.
+
+
+\code{monomorphic.out} argument should be left to the default:  \code{monomorphic.out = TRUE}.
+
+
+\strong{using a VCF with several SNP per haplotypes/read for input?}
+
+Please consider keeping just 1 SNP per read, \code{COLONY} doesn't like this
+kind of linkage... use the argument \code{snp.ld}.
+}
+\examples{
+\dontrun{
+tidy.vcf <- tidy_genomic_data(
+data = "batch_1.vcf",
+whitelist.markers = "whitelist.vcf.txt",
+snp.ld = NULL,
+common.markers = TRUE,
+blacklist.id = "blacklist.id.treefrog.tsv",
+strata = "strata.treefrog.tsv",
+pop.levels = c("PAN", "COS")
+)
+}
+}
+\author{
+Thierry Gosselin \email{thierrygosselin@icloud.com}
+}
+\references{
+Catchen JM, Amores A, Hohenlohe PA et al. (2011) 
+Stacks: Building and Genotyping Loci De Novo From Short-Read Sequences. 
+G3, 1, 171-182.
+
+Catchen JM, Hohenlohe PA, Bassham S, Amores A, Cresko WA (2013) 
+Stacks: an analysis tool set for population genomics. 
+Molecular Ecology, 22, 3124-3140.
+
+Ishwaran H. and Kogalur U.B. (2015). Random Forests for Survival,
+ Regression and Classification (RF-SRC), R package version 1.6.1.
+
+Ishwaran H. and Kogalur U.B. (2007). Random survival forests
+for R. R News 7(2), 25-31.
+
+Ishwaran H., Kogalur U.B., Blackstone E.H. and Lauer M.S. (2008).
+Random survival forests. Ann. Appl. Statist. 2(3), 841--860.
+
+Jones OR, Wang J (2010) COLONY: a program for parentage and 
+sibship inference from multilocus genotype data. 
+Molecular Ecology Resources, 10, 551–555.
+
+Wang J (2012) Computationally Efficient Sibship and 
+Parentage Assignment from Multilocus Marker Data. Genetics, 191, 183–194.
+}
+\seealso{
+\code{COLONY} is available on Jinliang Wang web site 
+\url{http://www.zsl.org/science/software/colony}
+
+\code{randomForestSRC} is available on 
+CRAN \url{http://cran.r-project.org/web/packages/randomForestSRC/} 
+and github \url{https://github.com/ehrlinger/randomForestSRC}
+}
+