diff --git a/src/SnowTools/AlignedContig.h b/src/SnowTools/AlignedContig.h
new file mode 100644
index 000000000..23c6b2d92
--- /dev/null
+++ b/src/SnowTools/AlignedContig.h
@@ -0,0 +1,294 @@
+#ifndef ALIGNED_CONTIG_H
+#define ALIGNED_CONTIG_H
+
+#include <algorithm>
+
+#include "SnowTools/GenomicRegion.h"
+#include "SnowTools/GenomicRegionCollection.h"
+#include "SnowTools/SnowUtils.h"
+
+#include "BamToolsUtils.h"
+#include "BreakPoint.h"
+
+using SnowTools::GRC;
+
+typedef std::vector<BamTools::CigarOp> CigarOpVec;
+typedef std::unordered_map<std::string, size_t> CigarMap;
+
+class AlignedContig;
+typedef std::unordered_map<std::string, AlignedContig> ContigMap;
+typedef std::vector<AlignedContig> AlignedContigVec;
+
+/*! This class contains a single alignment fragment from a contig to
+ * the reference. For a multi-part mapping of a contig to the reference,
+ * an object of this class represents just a single fragment from that alignment.
+ */
+struct AlignmentFragment {
+
+  friend AlignedContig;
+
+  /*! Construct an AlignmentFragment from a BWA alignment
+   * @param const reference to a BamAlignment
+   * @param const reference to a GenomicRegion window where this contig was assembled from
+   */
+  AlignmentFragment(const BamTools::BamAlignment &talign, const SnowTools::GenomicRegion &window, const CigarMap &nmap, const CigarMap &tmap);
+  
+  //! sort AlignmentFragment objects by start position
+  bool operator < (const AlignmentFragment& str) const { return (start < str.start); }
+
+  //! print the AlignmentFragment
+  friend std::ostream& operator<<(std::ostream &out, const AlignmentFragment& c); 
+
+  /*! @function
+   * @abstract Parse an alignment frag for a breakpoint
+   * @param reference to a BreakPoint to be created
+   * @return boolean informing whether there was a remaining indel break
+   */
+  bool parseIndelBreak(BreakPoint &bp);
+
+  /*! @function check if breakpoints match any cigar strings direct from the reads
+   * This is important in case the assembly missed a read (esp normal) that indicates
+   * that there is an indel. Basically this function says that if assembly calls a breakpoint
+   * and it agrees with a read alignment breakpoint, combine the info.
+   * @param const CigarMap reference containing hash with key=chr_breakpos_indeltype, val=tumor count
+   * @param const CigarMap reference containing hash with key=chr_breakpos_indeltype, val=normal count
+   */
+  void indelCigarMatches(const CigarMap &nmap, const CigarMap &tmap);  
+
+  BPVec indel_breaks; /**< indel variants on this alignment */
+
+  CigarOpVec cigar; /**< cigar oriented to assembled orientation */
+
+  private:
+
+  BamTools::BamAlignment align; /**< BWA alignment to reference */
+
+  size_t idx = 0; // index of the cigar where the last indel was taken from 
+
+  int break1 = -1; // 0-based breakpoint 1 on contig 
+  int break2 = -1; /**< 0-based breakpoint 2 on contig */
+  int gbreak1 = -1; /**< 0-based breakpoint 1 on reference chr */
+  int gbreak2 = -1; /**< 0-based breakpoint 1 on reference chr */
+  
+  size_t start; /**< the start position of this alignment on the reference. */
+
+  bool local = false; /**< boolean to note whether this fragment aligns to same location is was assembled from */
+
+  std::string m_name; // name of the entire contig
+  
+  std::string m_seq; // sequence of the entire contig
+
+};
+
+// define a way to order the contigs by start
+/*struct AlignmentOrdering {
+  inline bool operator() (const AlignmentFragment& struct1, const AlignmentFragment& struct2) {
+    return (struct1.start < struct2.start);
+  }
+  };*/
+
+//! vector of AlignmentFragment objects
+typedef std::vector<AlignmentFragment> AlignmentFragmentVector;
+
+/*! Contains the mapping of an aligned contig to the reference genome,
+ * along with pointer to all of the reads aligned to this contig, and a 
+ * store of all of the breakpoints associated with this contig
+ */
+class AlignedContig {
+
+  friend AlignmentFragment;
+
+ public:  
+
+  /*! Constructor which parses an alignment record from BWA (a potentially multi-line SAM record)
+   * @param const reference to a string representing a SAM alignment (contains newlines if multi-part alignment)
+   * @param const pointer to a BamReader, which is used to convert chr ids to strings (e.g. X, Y)
+   * @param const reference to a SnowTools::GenomicRegion window specifying where in the reference this contig was assembled from.
+   */
+  AlignedContig(const std::string &sam, const BamTools::BamReader * reader, const SnowTools::GenomicRegion &twindow, 
+		const CigarMap &nmap, const CigarMap &tmap);
+
+  std::string samrecord; /**< the original SAM record */
+
+  SnowTools::GenomicRegion window; /**< reference window from where this contig was assembled */
+
+  /*! @function Determine if this contig has identical breaks and is better than another.
+   * @param const reference to another AlignedContig
+   * @return bool bool returning true iff this contig has identical info has better MAPQ, or equal MAPQ but longer */
+  bool isWorse(const AlignedContig &ac) const;
+  
+  /*! @function
+    @abstract  Get whether the query is on the reverse strand
+    @param  b  pointer to an alignment
+    @return    boolean true if query is on the reverse strand
+  */
+  void addAlignment(const BamTools::BamAlignment &align, const SnowTools::GenomicRegion &window, 
+		    const CigarMap &nmap, const CigarMap &tmap);
+
+  //! add a discordant cluster that maps to same regions as this contig
+  void addDiscordantCluster(DiscordantCluster dc) { m_dc.push_back(dc); } 
+
+  /*! @function loop through the vector of DiscordantCluster objects
+   * associated with this contig and print
+   */
+  std::string printDiscordantClusters() const;
+
+  //! return the name of the contig
+  std::string getContigName() const { assert(m_align.size()); return m_align[0].align.Name; }
+
+  /*! @function get the maximum mapping quality from all alignments
+   * @return int max mapq
+   */
+  int getMaxMapq() const { 
+    int m = -1;
+    for (auto& i : m_align)
+      if (i.align.MapQuality > m)
+	m = i.align.MapQuality;
+    return m;
+  }
+
+  /*! @function get the minimum mapping quality from all alignments
+   * @return int min mapq
+   */
+  int getMinMapq() const { 
+    int m = 1000;
+    for (auto& i : m_align)
+      if (i.align.MapQuality < m)
+	m = i.align.MapQuality;
+    return m;
+  }
+
+  /*! @function loop through all of the breakpoints and
+   * calculate the split read support for each. Requires 
+   * alignedReads to have been run first (will error if not run).
+   */
+  void splitCoverage();
+  
+  /*! Checks if any of the indel breaks are in a blacklist. If so, mark the
+   * breakpoints of the indels for skipping. That is, hasMinmal() will return false;
+   * @param grv The blaclist regions
+   */
+  void blacklist(GRC& grv);
+
+  /*! @function if this is a Discovar contig, extract
+   * the tumor and normal read support
+   * @param reference to int to fill for normal support
+   * @param reference to int to fill for tumor support
+   * @return boolean reporting if this is a discovar name
+   */
+  bool parseDiscovarName(size_t &tumor, size_t &normal);
+
+  /*! @function dump the contigs to a fasta
+   * @param ostream to write to
+   */
+  void printContigFasta(std::ofstream &os) const;
+
+  /*! @function set the breakpoints on the reference by combining multi-mapped contigs
+   */
+  void setMultiMapBreakPairs();
+
+  /*! @function align reads to contig and modify their tags to show contig mapping
+   * Currently this function will attempt a SmithWaterman alignment for all reads
+   * that don't have an exact mapping to the contig.
+   * @param bav Vector of read smart pointers to align. Modifies their SW tag
+   */
+  void alignReadsToContigs(ReadVec &bav);
+
+  //! return the contig sequence as it came off the assembler
+  std::string getSequence() const { assert(m_seq.length()); return m_seq; }
+
+  //! detemine if the contig contains a subsequence
+  bool hasSubSequence(const std::string& subseq) const { 
+    return (m_seq.find(subseq) != std::string::npos);
+  }
+  
+  //! print this contig
+  friend std::ostream& operator<<(std::ostream &out, const AlignedContig &ac);
+
+  /*! @function query if this contig contains a potential variant (indel or multi-map)
+   * @return true if there is multimapping or an indel
+   */
+  bool hasVariant() const;
+
+  /*! @function retrieves all of the breakpoints by combining indels with global mutli-map break
+   * @return vector of ind
+   */
+  std::vector<BreakPoint> getAllBreakPoints() const;
+  std::vector<const BreakPoint*> getAllBreakPointPointers() const ;
+
+  std::vector<BreakPoint> m_local_breaks; // store all of the multi-map BreakPoints for this contigs 
+
+  BreakPoint m_global_bp;  // store the single spanning BreakPoing for this contig e
+
+  ReadVec m_bamreads; // store smart pointers to all of the reads that align to this contig 
+
+  bool m_skip = false; // flag to specify that we should minimally process and simply dump to contigs_all.sam 
+
+  AlignmentFragmentVector m_align; // store all of the individual alignment fragments 
+
+ private:
+
+  bool m_hasvariant = false; // flag to specify whether this alignment has some potential varaint (eg indel)
+
+  bool m_tried_align_reads = false; // flag to specify whether we tried to align reads.
+
+  std::string m_seq = ""; // sequence of contig as it came off of assembler
+
+  std::vector<DiscordantCluster> m_dc; // collection of all discordant clusters that map to same location as this contig
+
+};
+
+struct PlottedRead {
+
+  int pos;
+  std::string seq;
+  std::string info;
+
+  bool operator<(const PlottedRead& pr) const {
+    return (pos < pr.pos);
+  }
+
+};
+
+typedef std::vector<PlottedRead> PlottedReadVector;
+
+struct PlottedReadLine {
+
+  std::vector<PlottedRead*> read_vec;
+  int available = 0;
+  int contig_len = 0;
+
+  void addRead(PlottedRead *r) {
+    read_vec.push_back(r);
+    available = r->pos + r->seq.length() + 5;
+  }
+
+  bool readFits(PlottedRead &r) {
+    return (r.pos >= available);
+  }
+
+  friend std::ostream& operator<<(std::ostream& out, const PlottedReadLine &r) {
+    int last_loc = 0;
+    for (auto& i : r.read_vec) {
+      assert(i->pos - last_loc >= 0);
+      out << std::string(i->pos - last_loc, ' ') << i->seq;
+      last_loc = i->pos + i->seq.length();
+    }
+    int name_buff = r.contig_len - last_loc;
+    assert(name_buff < 10000);
+    out << std::string(max(name_buff, 5), ' ');
+    for (auto& i : r.read_vec) { // add the data
+      out << i->info << ",";
+    }
+    return out;
+  }
+
+};
+
+typedef std::vector<PlottedReadLine> PlottedReadLineVector;
+
+
+
+#endif
+
+
diff --git a/src/SnowTools/BWAWrapper.h b/src/SnowTools/BWAWrapper.h
new file mode 100644
index 000000000..8514b5cf8
--- /dev/null
+++ b/src/SnowTools/BWAWrapper.h
@@ -0,0 +1,101 @@
+#ifndef SNOWTOOLS_BWAWRAPPER_H__
+#define SNOWTOOLS_BWAWRAPPER_H__
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <algorithm>
+#include <memory>
+
+#include "SnowTools/BamRead.h"
+
+#define MEM_F_SOFTCLIP  0x200
+
+extern "C" {
+  #include "bwa/bwa.h"
+  #include "bwa/bntseq.h"
+  #include "bwa/kseq.h"
+  #include <stdlib.h>
+  #include "bwa/utils.h"
+  #include "bwa/bwamem.h"
+  //#include "bwa/is.c"
+  int is_bwt(ubyte_t *T, int n);
+
+}
+
+KSEQ_DECLARE(gzFile)
+
+using SnowTools::BamRead;
+
+namespace SnowTools {
+
+  /** Structure to hold unaligned sequence (name and bases)
+   */
+  struct USeq {
+    std::string name;
+    std::string seq;
+  };
+  typedef std::vector<USeq> USeqVector;
+ 
+/** Calls BWA-MEM on sequence queries and returns aligned reads, all in memory 
+ */
+class BWAWrapper {
+
+ public:
+
+  /** Create a new BWAWrapper
+   */
+  BWAWrapper() { 
+    memopt = mem_opt_init();
+    memopt->flag |= MEM_F_SOFTCLIP;
+  }
+
+  ~BWAWrapper() { 
+    
+    if (idx)
+      bwa_idx_destroy(idx);
+    if (memopt)
+      free(memopt);
+  }
+
+  void alignSingleSequence(const std::string& seq);
+
+  /** Construct a new bwa index for this object. 
+   */
+  void constructIndex(const USeqVector& v);
+
+  /** Retrieve a bwa index object from disk
+   */
+  void retrieveIndex(const std::string& file);
+
+  /** Dump the stored index to files 
+   */
+  void writeIndexToFiles(const std::string& index_name);
+
+ private:
+
+  mem_opt_t * memopt;
+
+  bwaidx_t* idx = 0;
+
+  // overwrite the bwa bwt_pac2pwt function
+  bwt_t *__bwt_pac2bwt(const uint8_t *pac, int bwt_seq_lenr);
+
+  // add an anns (chr annotation structure) 
+  bntann1_t* __add_to_anns(const std::string& name, const std::string& seq, bntann1_t * ann, size_t offset);
+
+  // overwrite the bwa-mem add1 function, which takes a sequence and adds to pac
+  uint8_t* __add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_pac, int *m_seqs, int *m_holes, bntamb1_t **q);
+
+  // make the pac structure (2-bit encoded packed sequence)
+  uint8_t* __make_pac(const USeqVector& v, bool for_only, bool write_file);
+
+  void __write_pac_to_file(const std::string& file);
+
+  std::string __print_bns();
+};
+
+}
+
+
+#endif
diff --git a/src/SnowTools/BamRead.h b/src/SnowTools/BamRead.h
new file mode 100644
index 000000000..50cf217d4
--- /dev/null
+++ b/src/SnowTools/BamRead.h
@@ -0,0 +1,142 @@
+#ifndef SNOWTOOLS_READ_H__
+#define SNOWTOOLS_READ_H__
+
+#include <cstdint>
+
+#include "SnowTools/HTSTools.h"
+#include <iostream>
+
+namespace SnowTools {
+
+/** Class to store and interact with an HTSLib bam1_t read.
+ *
+ * HTSLibrary reads are stored in the bam1_t struct. Memory allocation
+ * is taken care of by bam1_t init, and deallocation by destroy_bam1. This
+ * class is a C++ interface that automatically takes care of memory management
+ * for these C allocs/deallocs. The only member of BamRead is a bam1_t object.
+ * Alloc/dealloc is taken care of by the constructor and destructor.
+ */
+class BamRead {
+
+ public:
+  
+  /** Construct an empty BamRead by calling bam_init1() 
+   */
+  BamRead();
+
+  /** Destory a BamRead by calling bam_destroy1(b)
+   */
+  ~BamRead();
+
+  /** BamRead is aligned on reverse strand */
+  inline bool ReverseFlag() const { return (b->core.flag&BAM_FREVERSE) != 0; }
+
+  /** Mate of BamRead is aligned on reverse strand */
+  inline bool MateReverseFlag() const { return (b->core.flag&BAM_FMREVERSE) != 0; }
+  
+  /** Get the alignment position */
+  inline int32_t Position() const { return b->core.pos; }
+  
+  /** Get the alignment position of the mate */
+  inline int32_t MatePosition() const { return b->core.mpos; }
+  
+  /** Get the chromosome ID of the read */
+  inline int32_t ChrID() const { return b->core.tid; }
+  
+  /** Get the chrosome ID of the mate read */
+  inline int32_t MateChrID() const { return b->core.mtid; }
+  
+  /** Check if this read is mapped*/
+  inline bool MappedFlag() const { return !(b->core.flag&BAM_FUNMAP); }
+  
+  /** Check if this reads mate is mapped*/
+  inline bool MateMappedFlag() const { return !(b->core.flag&BAM_FMUNMAP); }
+  
+  /** Get the mapping quality */
+  inline int32_t MapQuality() const { return b->core.qual; }
+  
+  /** Get the number of cigar fields */
+  inline int32_t CigarSize() const { return b->core.n_cigar; }
+  
+  /** Check if this read is first in pair */
+  inline bool FirstFlag() const { return (b->core.flag&BAM_FREAD1); }
+  
+  /** Get the qname of this read as a string */
+  inline std::string Qname() const { return std::string(bam_get_qname(b)); }
+  
+  /** Get the qname of this read as a char array */
+  inline char* QnameChar() const { return bam_get_qname(b); }
+  
+  /** Get the full alignment flag for this read */
+  inline int32_t AlignmentFlag() const { return b->core.flag; }
+  
+  /** Get the insert size for this read */
+  inline int32_t InsertSize() const { return b->core.isize; } 
+  
+  /** Get the number of query bases of this read (aka length) */
+  inline int32_t Length() const { return b->core.l_qseq; }
+  
+  void SetMapQuality(int32_t m) { b->core.qual = m; } 
+
+  /** Set the query name */
+  void SetQname(std::string n);
+
+  /** Set the sequence name */
+  void SetSequence(std::string s);
+
+  friend std::ostream& operator<<(std::ostream& out, const BamRead &r);
+
+  /** Get the sequence of this read as a string */
+  inline std::string Sequence() const { 
+    uint8_t * p = bam_get_seq(b);
+    char c[b->core.l_qseq+1]; // need +1 for \0 at end
+    for (int32_t i = 0; i < b->core.l_qseq; ++i)
+      c[i] = BASES[bam_seqi(p, i)];
+    c[b->core.l_qseq] = '\0'; // null terminate for proper std::string construciton
+    return std::string(c);
+  }
+  
+  /** Get the number of soft clipped bases */
+  inline int32_t NumSoftClip() const {
+      int32_t p = 0;
+      uint32_t* c = bam_get_cigar(b);
+      for (int32_t i = 0; i < b->core.n_cigar; ++i)
+	if (c[i] & BAM_CSOFT_CLIP)
+	  p += bam_cigar_oplen(c[i]);
+      return p;
+    }
+
+  /** Get the number of clipped bases (hard clipped and soft clipped) */
+  inline int32_t NumClip() const {
+    int32_t p = 0;
+    uint32_t* c = bam_get_cigar(b);
+    for (int32_t i = 0; i < b->core.n_cigar; ++i)
+      if ( (c[i] & BAM_CSOFT_CLIP) || (c[i] & BAM_CHARD_CLIP) )
+	p += bam_cigar_oplen(c[i]);
+    return p;
+  }
+  
+  /** Get a string (Z) tag 
+   * @param tag Name of the tag. e.g. "XP"
+   * @return The value stored in the tag. Returns empty string if it does not exist.
+   */
+  inline std::string GetZTag(const std::string& tag) const {
+    uint8_t* p = bam_aux_get(b,tag.c_str());
+    if (!p)
+      return "";
+    return std::string(bam_aux2Z(p));
+    
+  }
+  
+  //std::string toSam(bam_hdr_t* h) const;
+  
+  private:
+
+  //bam_hdr_h *hdr;
+  bam1_t * b;
+  
+
+};
+
+}
+#endif
diff --git a/src/SnowTools/BamStats.h b/src/SnowTools/BamStats.h
new file mode 100644
index 000000000..2b65911d9
--- /dev/null
+++ b/src/SnowTools/BamStats.h
@@ -0,0 +1,85 @@
+#ifndef SNOWTOOLS_BAMSTATS_H__
+#define SNOWTOOLS_BAMSTATS_H__
+
+#include <unordered_map>
+#include <cstdint>
+#include <iostream>
+
+#include "SnowTools/Histogram.h"
+#include "SnowTools/HTSTools.h"
+
+namespace SnowTools{
+
+  /** Store information pertaining to a given read group *
+   *
+   * This class will collect statistics on number of: read, supplementary reads, unmapped reads, qcfail reads, duplicate reads.
+   * It will also create Histogram objects to store counts of: mapq, nm, isize, clip, mean phred score, length
+   */
+class BamReadGroup {
+
+  friend class BamStats;
+  
+ public:
+
+  /** Construct an empty BamReadGroup */
+  BamReadGroup() {}
+
+  /** Construct an empty BamReadGroup for the specified read group
+   * @param name Name of the read group
+   */
+  BamReadGroup(const std::string& name);
+
+  /** Display basic information about this read group
+   */
+  friend std::ostream& operator<<(std::ostream& out, const BamReadGroup& rg);
+
+  /** Add a BamRead to this read group */
+  void addRead(Read &r);
+
+ private:
+
+  size_t reads;
+  size_t supp;
+  size_t unmap;  
+  size_t qcfail;
+  size_t duplicate;
+
+  Histogram mapq;
+  Histogram nm;
+  Histogram isize;
+  Histogram clip;
+  Histogram phred;
+  Histogram len;
+
+  std::string m_name;
+
+};
+
+/** Class to store statistics on a BAM file.
+ *
+ * BamStats currently stores a map of BamReadGroup objects. Bam statistics
+ * are collected then on a read-group basis, but can be output in aggregate. See
+ * BamReadGroup for description of relevant BAM statistics.
+ */
+class BamStats
+{
+
+ public:
+  
+  /** Loop through the BamReadGroup objections and print them */
+  friend std::ostream& operator<<(std::ostream& out, const BamStats& qc);
+
+  /** Add a read by finding which read group it belongs to and calling the 
+   * addRead function for that BamReadGroup.
+   */
+  void addRead(Read &r);
+
+ private:
+  
+  std::unordered_map<std::string, BamReadGroup> m_group_map;
+
+};
+
+}
+
+#endif
diff --git a/src/SnowTools/BamWalker.h b/src/SnowTools/BamWalker.h
new file mode 100644
index 000000000..6b56c4ccb
--- /dev/null
+++ b/src/SnowTools/BamWalker.h
@@ -0,0 +1,259 @@
+#ifndef SNOWTOOLS_BAM_WALKER_H__
+#define SNOWTOOLS_BAM_WALKER_H__
+
+#include <cassert>
+
+#include "SnowTools/MiniRules.h"
+#include "SnowTools/GenomicRegion.h"
+#include "SnowTools/GenomicRegionCollection.h"
+
+#include "SnowTools/HTSTools.h"
+#include "SnowTools/SnowUtils.h"
+
+// Phred score transformations
+inline int char2phred(char b) {
+  uint8_t v = b;
+  assert(v >= 33);
+  return v - 33;
+}
+
+// from samtools
+inline char *samfaipath(const char *fn_ref)
+{
+  char *fn_list = 0;
+  if (fn_ref == 0) return 0;
+  fn_list = (char*)calloc(strlen(fn_ref) + 5, 1);
+  strcat(strcpy(fn_list, fn_ref), ".fai");
+  if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
+    std::cerr << "ERROR: Cannot read the index file for CRAM read/write" << std::endl;
+  }
+  return fn_list;
+}
+
+namespace SnowTools {
+
+/////////////// 
+// Hold read counts
+//////////////
+struct ReadCount {
+
+  int keep = 0;
+  int total = 0;
+  
+  int percent () const {
+    int perc  = SnowTools::percentCalc<int>(keep, total); 
+    return perc;
+  }
+
+  std::string totalString() const {
+    return SnowTools::AddCommas<int>(total);
+  }
+
+  std::string keepString() const {
+    return SnowTools::AddCommas<int>(keep);
+  }
+
+};
+
+/** Walk along a BAM or along BAM regions and stream in/out reads
+ */
+class BamWalker {
+
+ public:
+
+  /** Construct a new BamWalker for streaming data in and streaming
+   * out to a new BAM file.
+   *
+   * This constructor will open the BAM file and read the header. 
+   * It will also open the out BAM file.
+   */
+  //BamWalker(const std::string& in, const std::string& out);
+
+  /** Construct a new BamWalker for reading a BAM
+   */
+  BamWalker(const std::string& in);
+
+  /** Construct an empty BamWalker */
+  BamWalker() {}
+
+  /** Destroy a BamWalker and close all connections to the BAM 
+   * 
+   * Calling the destructor will take care of all of the C-style dealloc
+   * calls required within HTSlib to close a BAM or SAM file. 
+   */
+  ~BamWalker() {
+    __close_read_bam();
+    __close_write_bam();
+  }
+
+  /** Set a part of the BAM to walk.
+   *
+   * This will set the BAM pointer to the given region.
+   * @param gp Location to point the BAM to
+   */
+  void setBamWalkerRegion(const GenomicRegion& gp);
+
+  /** Set up multiple regions. Overwrites current regions. 
+   * 
+   * This will set the BAM pointer to the first element of the
+   * input list.
+   * @param grv Set of location to point BAM to
+   */
+  void setBamWalkerRegions(const GenomicRegionVector& grv);
+
+  /** Create the index file for the output bam in BAI format.
+   *
+   * This will make a call to HTSlib bam_index_build for the output file.
+   */
+  void MakeIndex();
+
+  /** Print a run-time message to stdout.
+   *
+   * Prints a message about all of the reads that have been visited, and informaiton
+   * about the current read
+   */
+  void printRuntimeMessage(const ReadCount &rc_main, const Read &r) const;
+
+  /** Print out some basic info about this walker, 
+   * including Minz0iRules
+   */
+  friend std::ostream& operator<<(std::ostream& out, const BamWalker& b);
+
+  /** Open a BAM file for streaming in
+   */
+  bool OpenReadBam(const std::string& bam);
+
+  /** Open a BAM file for streaming out
+   */
+  bool OpenWriteBam(const std::string& bam);
+
+  /** Pass a MiniRules script to the BAM.
+   * 
+   * This will call the constructor of MiniRulesCollection, and 
+   * parse the provides rules and add it as a rule set to this BamWalker.
+   * @param rules A string of rules, or a file pointing to a rules script
+   */
+  void SetMiniRulesCollection(const std::string& rules);
+  
+  /** Explicitly provide a MiniRulesCollection to this BamWalker
+   */
+  void SetMiniRulesCollection(const MiniRulesCollection& mr) { m_mr = mr; }
+
+  /** Retrieve the next read from the BAM.
+   *
+   * If a MiniRulesCollection is defined for this BAM
+   * will grab the next valid read.
+   * r Read to fill with data
+   * rule bool identifying if this read passed the rules
+   * @return true if the next read is available
+   */
+  bool GetNextRead(Read &r, bool& rule);
+
+  /** Write an alignment to the output BAM file 
+   * @param r The BamRead to save
+   */
+  void WriteAlignment(Read &r);
+
+  /** Return the MiniRulesCollection object used by this BamWalker
+   */
+  const MiniRulesCollection& GetMiniRulesCollection() const { return m_mr; }
+
+  /** Send the counts for passed rules to a file */
+  void MiniRulesToFile(const std::string& file) const { m_mr.countsToFile(file); }
+
+  /** Set the BamWalker to count reads for all rules */
+  void setCountAllRules() { m_mr.m_fall_through = true; }
+
+
+  std::string m_in;
+  std::string m_out;
+  MiniRulesCollection m_mr;
+
+  /** Set a flag to say if we should print reads to stdout
+   */
+  void setStdout() { fop = sam_open("-", "w"); }
+
+  /** Set a flag to say if we should print reads to CRAM format
+   */
+  void setCram(const std::string& out, const std::string& ref) { 
+    m_out = out;
+    fop = sam_open(m_out.c_str(), "wc"); 
+    if (!fop) {
+      std::cerr << "!!!\n!!!\n!!!\nCannot open CRAM file for writing. Will try BAM next. File: " <<  m_out << std::endl;
+      return;
+    }
+
+    // need to open reference for CRAM writing 
+    char* fn_list = samfaipath(ref.c_str());
+    if (fn_list) {
+      if (hts_set_fai_filename(fop, fn_list) != 0) {
+	fprintf(stderr, "Failed to use reference \"%s\".\n", fn_list);
+      }
+    } else {
+      std::cerr << "Failed to get the reference for CRAM compression" << std::endl;
+    }
+    m_print_header = true; 
+  }
+
+  void setPrintHeader() { 
+    m_print_header = true;
+  }
+
+  /** Set the output bam to remove all alignment tags */
+  void setStripAllTags() { m_strip_all_tags = true; }
+
+  /** Set a list of tags to strip */
+  void setStripTags(const std::string& list);
+  
+  /** Set to have verbose actions */
+  void setVerbose() { m_verbose = true; }
+
+  /** Return the MiniRulesCollection as a string */
+  std::string displayMiniRulesCollection() const;
+
+ protected:
+
+  // for stdout mode, print header?
+  bool m_print_header = false;
+
+  // point index to this region of bam
+  bool __set_region(const GenomicRegion& gp);
+
+  struct timespec start;
+
+  //open bam, true if success
+  bool __open_BAM_for_reading();
+
+  // close this bam file and delete all pointers
+  void __close_read_bam();
+  void __close_write_bam();
+
+  // open m_out, true if success
+  bool __open_BAM_for_writing();
+  
+  // define the regions to walk
+  size_t m_region_idx = 0;
+  GenomicRegionVector m_region;
+
+  bool m_strip_all_tags = false;
+
+  // hts
+  BGZF * fp = 0;
+  hts_idx_t * idx = 0; // hts_idx_load(bamfile.c_str(), HTS_FMT_BAI);
+  hts_itr_t * hts_itr = 0; // sam_itr_queryi(idx, 3, 60000, 80000);
+  bam_hdr_t * br = 0;
+
+  htsFile* fop = 0;
+
+  // which tags to strip
+  std::vector<std::string> m_tag_list;
+
+  // 
+  bool m_verbose = false;
+};
+
+
+}
+#endif 
+
+
diff --git a/src/SnowTools/GenomicRegion.h b/src/SnowTools/GenomicRegion.h
new file mode 100644
index 000000000..18465b723
--- /dev/null
+++ b/src/SnowTools/GenomicRegion.h
@@ -0,0 +1,104 @@
+#ifndef SNOWTOOLS_GENOMIC_REGION_H__
+#define SNOWTOOLS_GENOMIC_REGION_H__
+
+#include <vector>
+#include <iostream>
+#include <cstdint>
+#include <utility>
+#include <list>
+
+#include "SnowTools/SnowUtils.h"
+#include "SnowTools/SnowToolsCommon.h"
+#include "SnowTools/HTSTools.h"
+
+/** 
+ */
+namespace SnowTools {
+class GenomicRegion {
+
+  template<typename T> friend class GenomicRegionCollection;
+  
+ public:
+
+  /** Construct an "empty" GenomicRegion at (chr -1), pos 0, width = 1
+   */
+  GenomicRegion() : chr(-1), pos1(0), pos2(0) {};
+  
+  /** Construct a GenomicRegion at a specific start and end location 
+   * @param t_chr Chromosome id  (chr1 = 0, etc)
+   * @param t_pos1 Start position
+   * @param t_pos2 End position
+   * @param strand true for positive, false for negative
+  */
+  GenomicRegion(int32_t t_chr, uint32_t t_pos1, uint32_t t_pos2, bool t_strand = true);
+
+  /** Construct a GenomicRegion from a string
+   */
+  GenomicRegion(std::string t_chr, std::string t_pos1, std::string t_pos2);
+  
+  /** Construct a GenomicRegion from a samtools style region string.
+   *
+   * This calls the samtools-like parser, which accepts in form "chr7:10,000-11,100".
+   * Note that this requires that a pointer to the BAM header be provided as well 
+   * to convert the text representation of the chr to the id number.
+   */
+  GenomicRegion(const std::string& reg, bam_hdr_t* h);
+
+  static int32_t chrToNumber(std::string ref);
+  static std::string chrToString(int32_t ref);
+
+  static uint32_t posToBigPos(int32_t refid, uint32_t pos);
+
+  /** Randomize the position of this GenomicRegion on the genome
+   * 
+   * Creates a GenomicRegion with pos1 = pos2. Simulates a random value
+   * with val <= genome_size_XY and then converts to GenomicRegion
+   */
+  void random();
+
+  /** Check if the GenomicRegion is empty (aka chr -1)
+   */
+  bool isEmpty() const;
+
+  /** Find the distance between two GenomicRegion objects
+   */
+  int distance(const GenomicRegion &gr) const;
+
+  // define how these are to be sorted
+  bool operator < (const GenomicRegion& b) const;
+  bool operator==(const GenomicRegion& b) const;
+  bool operator<=(const GenomicRegion &b) const;
+  
+  // determine if something overlaps with centromere 
+  int centromereOverlap() const;
+
+  // check if there is an overlap
+  int getOverlap(const GenomicRegion gr) const;
+
+  friend std::ostream& operator<<(std::ostream& out, const GenomicRegion& gr);
+  std::string toString() const;
+  void pad(uint32_t pad);
+  
+  int width() const;
+
+  int32_t chr = 0;
+  uint32_t pos1 = 0;
+  uint32_t pos2 = 0;
+  bool strand = true; // true = pos, false = neg
+
+ private:
+
+
+  //char strand = '*';
+  //std::string id;
+  //int mapq = 0;
+  
+};
+
+typedef std::vector<GenomicRegion> GenomicRegionVector;
+typedef GenomicRegion GR;
+
+}
+
+
+#endif
diff --git a/src/SnowTools/GenomicRegionCollection.h b/src/SnowTools/GenomicRegionCollection.h
new file mode 100644
index 000000000..aa4621b81
--- /dev/null
+++ b/src/SnowTools/GenomicRegionCollection.h
@@ -0,0 +1,200 @@
+#ifndef SWAP_GENOMIC_REGION_COLLECTION_H__
+#define SWAP_GENOMIC_REGION_COLLECTION_H__
+
+#include <vector>
+#include <string>
+#include <cstdlib>
+#include <list>
+#include <unordered_map>
+
+#include "SnowTools/IntervalTree.h"
+#include "SnowTools/GenomicRegion.h"
+
+
+/** Class to store vector of intervals on the genome
+ */
+
+namespace SnowTools {
+
+typedef Interval<size_t> GenomicInterval;
+typedef std::unordered_map<int, std::vector<GenomicInterval> > GenomicIntervalMap;
+typedef IntervalTree<size_t> GenomicIntervalTree;
+typedef std::unordered_map<int, GenomicIntervalTree> GenomicIntervalTreeMap;
+typedef std::vector<GenomicInterval> GenomicIntervalVector;
+
+template<typename T=GenomicRegion>
+class GenomicRegionCollection {
+
+ public:
+
+  /** Construct an empty GenomicRegionCollection 
+   */
+  GenomicRegionCollection() {};
+
+  /** Construct from a plain vector of GenomicRegion objects
+   */
+  GenomicRegionCollection(std::vector<T>& vec);
+
+  /** Construct from a single GenomicRegion
+   */
+  GenomicRegionCollection(const T& gr);
+
+  /** Construct a GenomicRegionCollection with overlapping intervals
+   * 
+   * @param width Desired bin width
+   * @param ovlp Amount that the bins should overlap
+   * @param gr GenomicRegion to divide into smaller overlapping bins
+   */
+  GenomicRegionCollection(int width, int ovlp, const T &gr);
+
+  /** Read in a MuTect call-stats file and adds to GenomicRegionCollection object.
+   *
+   * Reads a MuTect call-stats file and imports only
+   * lines with KEEP marked. 
+   * @param file Path to call-stats file
+   * @param pad Amount to pad intervals by
+   */
+  void readMuTect(const std::string &file, int pad = 0);
+
+  /** Read in a BED file and adds to GenomicRegionCollection object
+   * @param file Path to BED file
+   * @param pad Amount to pad intervals by
+   */
+  void readBEDfile(const std::string &file, int pad = 0);
+
+  /** Read in a VCF file and adds to GenomicRegionCollection object
+   * @param file Path to BED file
+   * @param pad Amount to pad intervals by
+   */
+  void readVCFfile(const std::string &file, int pad = 0);
+
+  /** Read in a text file (can be gzipped) and add to GenomicRegionCollection
+   *
+   * This function will automatically detect which file type is being input:
+   * -- ends in .vcf -> readVCFfile
+   * -- ends in .bed -> readBEDfile
+   * -- header contains "MuTect" -> readMuTect
+   * The values are appended to existing vector of GenomicRegion objects
+   * @param file Text file to read and store intervals
+   * @param pad Amount to pad the intervals by (calls GenomicRegion::pad)
+   */
+  void regionFileToGRV(const std::string &file, int pad = 0);
+
+  /** Fill in the GenomicIntervalTreeMap stored in this object. 
+   *
+   * A GenomicIntervalTreeMap is an unordered_map of GenomicIntervalTrees for 
+   * each chromosome. A GenomicIntervalTree is an interval tree on the ranges
+   * defined by the genomic interval, with cargo set at the same GenomicRegion object.
+   */
+  void createTreeMap();
+  
+  /** Send the GenomicRegionCollection to a BED file
+  * @param file Output file
+  */
+  void sendToBED(const std::string file);
+  
+  /** Reduces the GenomicRegion objects to minimal set
+   */
+  void mergeOverlappingIntervals();
+
+  /** Return the number of GenomicRegions stored 
+   */
+  size_t size() const { return m_grv.size(); }
+
+  /** Add a new GenomicRegion to end
+   */
+  void add(const T& g) { m_grv.push_back(g); createTreeMap(); }
+
+  /** Is this object empty?
+   */
+  bool empty() const { return !m_grv.size(); }
+
+  /** Clear out all of the GenomicRegion objects
+   */
+  void clear() { m_grv.clear(); m_tree.clear(); idx = 0; }
+
+  /** Retrieve a GenomicRegion at given index. 
+   * 
+   * Note that this does not move the idx iterator, which is 
+   * used to loop through all the regions. Throws an exception
+   * if the index is out of bounds.
+   * @return GenomicRegion pointed to by index i
+   */
+  const T& at(size_t i) const;
+
+  /** Find overlaps between this vector and input GenomicRegion.
+   *
+   * Requires that the GenomicIntervalTreeMap have been created first
+   * @param gr Region to test
+   * @return Number of overlapping elements in this GenomicRegionCollection
+   */
+ size_t findOverlapping(const T &gr);
+
+ size_t countContained(const T &gr);
+ 
+ GenomicRegionCollection<GenomicRegion> findOverlaps(GenomicRegionCollection<GenomicRegion> &subject, std::vector<size_t>& query_id, std::vector<size_t>& subject_id, bool ignore_strand = false);
+
+ /** Increase the left and right ends of each contained GenomicRegion by 
+  * the pad value.
+  * @param v Amount to pad each end by. Result is increase in width by 2*pad.
+  */
+ void pad(int v);
+
+ /** Set the i'th GenomicRegion */
+ T& operator[](size_t i) { return m_grv[i]; }
+ 
+ /** Retreive the i'th GenomicRegion */
+ const T& operator[](size_t i) const { return m_grv[i]; }
+ 
+  /** Add two GenomicRegionCollection objects together
+   */
+  void concat(const GenomicRegionCollection<T>& g);
+
+  /** Output the GenomicRegionCollection to a BED format
+   *
+   * Currently just outputs chr   pos   pos2 with no header.
+   * @return BED formated string reprsentation 
+   */
+  std::string sendToBED() const;
+
+  /** Fill the next GenomicRegion object. 
+   * @return false if add end of vector
+   */
+  bool getNextGenomicRegion(T& gr);
+
+  /** Rewind the element pointer to the first GenomicRegion
+   */
+  void rewind() { idx = 0; }
+
+  GenomicRegionVector asGenomicRegionVector() const { 
+    GenomicRegionVector gg;
+    for (auto& i : m_grv)
+      gg.push_back(GenomicRegion(i.chr, i.pos1, i.pos2, i.strand));
+    return gg; 
+  } 
+  
+  typename std::vector<T>::iterator begin() { return m_grv.begin(); } 
+
+  typename std::vector<T>::iterator end() { return m_grv.end(); } 
+
+  // always construct this object any time m_grv is modifed
+  GenomicIntervalTreeMap m_tree;
+ 
+ GenomicRegionCollection<GenomicRegion> intersection(GenomicRegionCollection<GenomicRegion>& subject, bool ignore_strand = false);
+
+  std::vector<T> m_grv;
+  
+ private:
+
+  // index for current GenomicRegion
+  size_t idx = 0;
+
+};
+
+typedef GenomicRegionCollection<GenomicRegion> GRC;
+
+}
+
+#include "GenomicRegionCollection.cpp"
+
+#endif
diff --git a/src/SnowTools/HTSTools.h b/src/SnowTools/HTSTools.h
new file mode 100644
index 000000000..4bc299b3f
--- /dev/null
+++ b/src/SnowTools/HTSTools.h
@@ -0,0 +1,85 @@
+#ifndef SNOWTOOLS_HTSTOOLS_H__
+#define SNOWTOOLS_HTSTOOLS_H__
+
+#include <memory>
+#include <vector>
+#include <unordered_map>
+#include <string>
+#include <algorithm>
+
+#include "htslib/hts.h"
+#include "htslib/sam.h"
+#include "htslib/bgzf.h"
+#include "htslib/kstring.h"
+
+
+static const char BASES[16] = {' ', 'A', 'C', ' ',
+                               'G', ' ', ' ', ' ', 
+                               'T', ' ', ' ', ' ', 
+                               ' ', ' ', ' ', 'N'};
+
+struct free_delete {
+  void operator()(void* x) { bam_destroy1((bam1_t*)x); }
+};
+
+typedef std::shared_ptr<bam1_t> Read;
+typedef std::vector<Read> ReadVec;
+
+#define r_is_rev(b) (((b)->core.flag&BAM_FREVERSE) != 0)
+#define r_is_mrev(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
+#define r_pos(b) ((b)->core.pos)
+#define r_mpos(b) ((b)->core.mpos)
+#define r_id(b) ((b)->core.tid)
+#define r_mid(b) ((b)->core.mtid)
+#define r_is_mapped(b) (!((b)->core.flag&BAM_FUNMAP))
+#define r_is_mmapped(b) (!((b)->core.flag&BAM_FMUNMAP))
+#define r_mapq(b) ((b)->core.qual)
+#define r_cig_size(b) ((b)->core.n_cigar)
+#define r_is_first(b) ((b)->core.flag&BAM_FREAD1)
+#define r_qname(b) (std::string(bam_get_qname((b).get())))
+#define r_remove_tag(b, t) do { uint8_t *p  = bam_aux_get((b).get(), t); if (p) bam_aux_del((b).get(), p); } while(0)
+#define r_flag(b) ((b)->core.flag)
+#define r_is_pmapped(b) (!((b)->core.flag&BAM_FUNMAP) && !((b)->core.flag&BAM_FMUNMAP) && ((b)->core.flag&BAM_FPAIRED))
+#define r_length(b) ((b)->core.l_qseq)
+#define r_isize(b) ((b)->core.isize)
+#define r_seq(b, s) do { uint8_t * p = bam_get_seq(b); char c[(b)->core.l_qseq+1]; for (int ww = 0; ww < (b)->core.l_qseq; ww++) { c[ww] = BASES[bam_seqi(p, ww)]; } c[(b)->core.l_qseq] = '\0'; s = std::string(c); } while(0)
+#define r_get_clip(b, p) do { uint32_t* c = bam_get_cigar(b); for (int ww = 0; ww < (b)->core.n_cigar; ww++) if (c[ww] & BAM_CSOFT_CLIP || c[ww] & BAM_CHARD_CLIP) p += bam_cigar_oplen(c[ww]); } while(0)
+
+#define r_get_Z_tag(b, t, v) do { uint8_t* p = bam_aux_get((b).get(), t); if (p) v = std::string(bam_aux2Z(p)); else v = std::string(); } while(0) 
+#define r_get_SR(b, v) do { uint8_t* p = bam_aux_get((b).get(), "SR"); assert(p); v = bam_aux2Z(p); } while(0) 
+#define r_get_int32_tag(b, t, v) do { uint8_t* p = bam_aux_get((b).get(), t); if (p) v = bam_aux2i(p); else v = 0; } while(0) 
+
+#define r_cig_type(b, i) (bam_cigar_opchr(bam_get_cigar((b).get())[i]))
+#define r_cig_len(b, i) (bam_cigar_oplen(bam_get_cigar((b).get())[i]))
+
+#define r_add_Z_tag(b, t, v) bam_aux_append((b).get(), t, 'Z', (v).length()+1, (uint8_t*)(v).c_str())
+#define r_add_int32_tag(b, t, v) bam_aux_append((b).get(), t, 'i', 4, (uint8_t*)&(v))
+#define r_count_nbases(b, n) do { uint8_t* p = bam_get_seq(b); for (int ww = 0; ww < (b)->core.l_qseq; ww++) if (bam_seqi(p,ww) == 15) n++; } while(0)
+#define r_count_sub_nbases(b, n, s, e) do { uint8_t* p = bam_get_seq(b); for (int ww = (s); ww < (e); ww++) if (bam_seqi(p,ww) == 15) n++; } while(0)
+
+#define r_is_rstrand(b) (((b)->core.flag&BAM_FREVERSE) > 0) 
+#define r_is_mrstrand(b) (((b)->core.flag&BAM_FMREVERSE) > 0)
+#define r_is_dup(b) (((b)->core.flag&BAM_FDUP) > 0)
+#define r_is_primary(b) (!((b)->core.flag&BAM_FSECONDARY))
+#define r_is_qc_fail(b) (((b)->core.flag&BAM_FQCFAIL) > 0)
+#define r_strand(b) ((b)->core.flag&BAM_FREVERSE ? '-' : '+')
+#define r_endpos(b) (bam_endpos((b).get()))
+
+#define r_get_trimmed_seq(b, s) do { int32_t ts, tl; r_get_int32_tag(b, "TS", ts);  r_get_int32_tag(b, "TL", tl); r_seq(b, s); if (tl != 0 && tl != r_length(b))  s = s.substr(ts, tl); } while (0) 
+
+  namespace SnowTools {
+
+    int32_t qualityTrimRead(int qualTrim, int32_t &startpoint, Read &r);
+
+    std::vector<std::string> GetStringTag(const Read& a, const std::string tag);
+    
+    void SmartAddTag(Read &a, const std::string tag, const std::string val);
+    
+    std::vector<int> GetIntTag(const Read& a, const std::string tag);
+
+    void rcomplement(std::string& a);
+
+    void removeAllTags(Read& a);
+  }
+
+#endif
diff --git a/src/SnowTools/Histogram.h b/src/SnowTools/Histogram.h
new file mode 100644
index 000000000..f945e6439
--- /dev/null
+++ b/src/SnowTools/Histogram.h
@@ -0,0 +1,168 @@
+#ifndef SNOWTOOLS_HISTOGRAM_H__
+#define SNOWTOOLS_HISTOGRAM_H__
+
+#include <iostream>
+#include <cassert>
+#include <string>
+#include <utility>
+#include <vector>
+#include <fstream>
+
+#include "IntervalTree.h"
+
+class Bin;
+
+typedef Interval<Bin> BinInterval;
+typedef IntervalTree<Bin> BinIntervalTree;
+typedef std::vector<BinInterval> BinIntervalVector;
+
+typedef uint32_t S;
+
+#define INTERCHR 250000000
+
+namespace SnowTools {
+
+  class Matrix;
+  class Histogram;
+
+/** Stores one bin in a Histogram
+ */
+class Bin {
+
+  friend class Histogram;
+
+ public:
+  
+  /** Construct a new object with 0 count and range [0,1]
+   */
+  Bin() : m_count(0) 
+  { 
+    bounds = {0,1}; 
+  }
+    
+    /** Output the bin in format "start range, end range, count"
+     */
+    friend std::ostream& operator<<(std::ostream &out, const Bin &b) {
+      out << b.bounds.first << "," << b.bounds.second << "," << b.m_count;
+      return out;
+    }
+
+    /** Return the number of counts in this histogram bin 
+     */
+    size_t getCount() const { return m_count; }
+    
+    /** Check if a value fits within the range of this bin 
+     * @param dist Distance value to check if its in this range
+     * @return true if the value is within the range
+     */
+
+    /** Check if this bin contains a value
+     * @param elem Value to check if its in this range
+     * @return true if the value is within the range
+     */
+    bool contains(const int32_t& elem) const; 
+
+    /** Define bin comparison operator by location of left bound, then right */
+    bool operator < (const Bin& b) const;
+    
+    /** Decrement the histogram bin by one. 
+     * Note that this is the prefix version only
+     */
+    Bin& operator--();
+
+    /** Increment the histogram bin by one. 
+     * Note that this is the prefix version only
+     */
+    Bin& operator++();
+
+ private:
+    size_t m_count;
+    std::pair<int32_t, int32_t> bounds; //@! was"bin";
+};
+
+/** Class to store histogram of numeric values.
+ *
+ * The bins of the Histogram are not uniformly spaced, and their ranges determined 
+ * by partitioning the spans it tablulates into uniform quantiles when initialized
+ * by Histogram::initialSpans(). As elements are added and removed this initial bin
+ * definition remains constant.
+ */
+class Histogram { 
+
+ private:
+
+  std::vector<int32_t> m_ind;
+
+ public:
+
+  std::vector<Bin> m_bins;
+  /** Construct an empty histogram
+   */
+  Histogram() {}
+
+  /** Construct a new histogram with bins spaced evenly
+   */
+  Histogram(const int32_t& start, const int32_t& end, const uint32_t& width);
+
+  friend std::ostream& operator<<(std::ostream &out, const Histogram &h) {
+    for (auto& i : h.m_bins)
+      out << i << std::endl;
+    return out;
+  }
+
+  /** Return iterator to the fist bin
+   */
+  std::vector<int32_t>::iterator begin() { return m_ind.begin(); }
+  
+  /** Return iterator to the last bin
+   */
+  std::vector<int32_t>::iterator end() { return m_ind.end(); }
+  
+  /** initialize histogram from a vector of spans 
+   */
+  void initialSpans(size_t num_bins, std::vector<S>* pspanv, size_t min_bin_width = 0);
+
+  /** Add an element to the histogram
+   * @param elem Length of event to add
+   */
+  void addElem(const int32_t &elem);
+
+  /** Remove a span from the histogram
+   * @param span Length of event to remove
+   */
+  void removeElem(const int32_t &elem);
+
+  /** Output to CSV file like: bin_start,bin_end,count
+   */
+  void toCSV(std::ofstream &fs);
+
+  /** Return the total number of elements in the Histogram
+   */
+  size_t totalCount() const {
+    size_t tot = 0;
+    for (auto&  i : m_bins)
+      tot += i.getCount();
+    return tot;
+  }
+  
+  /** Get count for a histogram bin
+   * @param i Bin index
+   * @return number of events in histogram bin
+   */
+  int32_t binCount(size_t i) { return m_bins[i].getCount(); }
+
+  /** Get number of bins in histogram
+   * @return Number of bins in histogram
+   */
+  size_t numBins() { return m_bins.size(); }
+
+  /** Find bin corresponding to a span
+   * @param elem Event length
+   * @return Bin containing event length
+   */
+  size_t retrieveBinID(const int32_t& elem) const;
+
+};
+
+}
+#endif
diff --git a/src/SnowTools/IntervalTree.h b/src/SnowTools/IntervalTree.h
new file mode 100644
index 000000000..7b5c27c80
--- /dev/null
+++ b/src/SnowTools/IntervalTree.h
@@ -0,0 +1,214 @@
+#ifndef __INTERVAL_TREE_H
+#define __INTERVAL_TREE_H
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+template <class T, typename K = std::size_t>
+  class Interval {
+  public:
+  K start;
+  K stop;
+  T value;
+  Interval(K s, K e, const T& v)
+  : start(s)
+  , stop(e)
+  , value(v)
+  { }
+};
+
+template <class T, typename K>
+  K intervalStart(const Interval<T,K>& i) {
+  return i.start;
+}
+
+template <class T, typename K>
+  K intervalStop(const Interval<T,K>& i) {
+  return i.stop;
+}
+
+template <class T, typename K>
+  std::ostream& operator<<(std::ostream& out, Interval<T,K>& i) {
+  out << "Interval(" << i.start << ", " << i.stop << "): " << i.value;
+  return out;
+}
+
+template <class T, typename K = std::size_t>
+  class IntervalStartSorter {
+  public:
+  bool operator() (const Interval<T,K>& a, const Interval<T,K>& b) {
+    return a.start < b.start;
+  }
+};
+
+template <class T, typename K = std::size_t>
+  class IntervalTree {
+
+  public:
+  typedef Interval<T,K> interval;
+  typedef std::vector<interval> intervalVector;
+  typedef IntervalTree<T,K> intervalTree;
+    
+  intervalVector intervals;
+  intervalTree* left;
+  intervalTree* right;
+  K center;
+
+  IntervalTree<T,K>(void)
+  : left(NULL)
+  , right(NULL)
+  , center(0)
+  { }
+
+  IntervalTree<T,K>(const intervalTree& other) 
+  : left(NULL)
+  , right(NULL)
+  {
+    center = other.center;
+    intervals = other.intervals;
+    if (other.left) {
+      left = new intervalTree(*other.left);
+    }
+    if (other.right) {
+      right = new intervalTree(*other.right);
+    }
+  }
+
+  IntervalTree<T,K>& operator=(const intervalTree& other) {
+    center = other.center;
+    intervals = other.intervals;
+    if (other.left) {
+      left = new intervalTree(*other.left);
+    } else {
+      if (left) delete left;
+      left = NULL;
+    }
+    if (other.right) {
+      right = new intervalTree(*other.right);
+    } else {
+      if (right) delete right;
+      right = NULL;
+    }
+    return *this;
+  }
+
+  IntervalTree<T,K>(
+		    intervalVector& ivals,
+		    std::size_t depth = 16,
+		    std::size_t minbucket = 64,
+		    K leftextent = 0,
+		    K rightextent = 0,
+		    std::size_t maxbucket = 512
+		    )
+  : left(NULL)
+  , right(NULL)
+  {
+
+   --depth;
+    IntervalStartSorter<T,K> intervalStartSorter;
+    if (depth == 0 || (ivals.size() < minbucket && ivals.size() < maxbucket)) {
+      std::sort(ivals.begin(), ivals.end(), intervalStartSorter);
+      intervals = ivals;
+    } else {
+      if (leftextent == 0 && rightextent == 0) {
+	// sort intervals by start
+	std::sort(ivals.begin(), ivals.end(), intervalStartSorter);
+      }
+
+      K leftp = 0;
+      K rightp = 0;
+      K centerp = 0;
+            
+      if (leftextent || rightextent) {
+	leftp = leftextent;
+	rightp = rightextent;
+      } else {
+	leftp = ivals.front().start;
+	std::vector<K> stops;
+	stops.resize(ivals.size());
+	transform(ivals.begin(), ivals.end(), stops.begin(), intervalStop<T,K>);
+	rightp = *max_element(stops.begin(), stops.end());
+      }
+
+      //centerp = ( leftp + rightp ) / 2;
+      centerp = ivals.at(ivals.size() / 2).start;
+      center = centerp;
+
+      intervalVector lefts;
+      intervalVector rights;
+
+      for (typename intervalVector::iterator i = ivals.begin(); i != ivals.end(); ++i) {
+	interval& interval = *i;
+	if (interval.stop < center) {
+	  lefts.push_back(interval);
+	} else if (interval.start > center) {
+	  rights.push_back(interval);
+	} else {
+	  intervals.push_back(interval);
+	}
+      }
+
+      if (!lefts.empty()) {
+	left = new intervalTree(lefts, depth, minbucket, leftp, centerp);
+      }
+      if (!rights.empty()) {
+	right = new intervalTree(rights, depth, minbucket, centerp, rightp);
+      }
+    }
+  }
+
+  void findOverlapping(K start, K stop, intervalVector& overlapping) const {
+    if (!intervals.empty() && ! (stop < intervals.front().start)) {
+      for (typename intervalVector::const_iterator i = intervals.begin(); i != intervals.end(); ++i) {
+	const interval& interval = *i;
+	if (interval.stop >= start && interval.start <= stop) {
+	  overlapping.push_back(interval);
+	}
+      }
+    }
+
+    if (left && start <= center) {
+      left->findOverlapping(start, stop, overlapping);
+    }
+
+    if (right && stop >= center) {
+      right->findOverlapping(start, stop, overlapping);
+    }
+
+  }
+
+  void findContained(K start, K stop, intervalVector& contained) const {
+    if (!intervals.empty() && ! (stop < intervals.front().start)) {
+      for (typename intervalVector::const_iterator i = intervals.begin(); i != intervals.end(); ++i) {
+	const interval& interval = *i;
+	if (interval.start >= start && interval.stop <= stop) {
+	  contained.push_back(interval);
+	}
+      }
+    }
+
+    if (left && start <= center) {
+      left->findContained(start, stop, contained);
+    }
+
+    if (right && stop >= center) {
+      right->findContained(start, stop, contained);
+    }
+
+  }
+
+  ~IntervalTree(void) {
+    // traverse the left and right
+    // delete them all the way down
+    if (left) {
+      delete left;
+    }
+    if (right) {
+      delete right;
+    }
+  }
+
+};
+
+#endif
diff --git a/src/SnowTools/MiniRules.h b/src/SnowTools/MiniRules.h
new file mode 100644
index 000000000..35b11ad27
--- /dev/null
+++ b/src/SnowTools/MiniRules.h
@@ -0,0 +1,464 @@
+#ifndef MINI_RULES_H
+#define MINI_RULES_H
+
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <regex>
+
+#include "SnowTools/GenomicRegion.h"
+#include "SnowTools/GenomicRegionCollection.h"
+#include "SnowTools/SnowUtils.h"
+#include "SnowTools/HTSTools.h"
+
+#define HAVE_AHOCORASICK_AHOCORASICK_H 1
+#ifdef HAVE_AHOCORASICK_AHOCORASICK_H
+
+#include "ahocorasick/ahocorasick.h"
+#include <memory>
+
+// custom deleter for aho-corasick
+struct atm_free_delete {
+  void operator()(void* x) { free((AC_AUTOMATA_t*)x); }
+};
+typedef std::unique_ptr<AC_AUTOMATA_t> atm_ptr;
+#endif
+
+namespace SnowTools {
+
+/** Stores a rule for a single alignment flag.
+ *
+ * Rules for alignment flags can be one of three states:
+ * - NA - All flag values are valid
+ * - Off - Flag is valid if OFF
+ * - On - Flag is valid if ON
+ */
+class Flag {
+  
+ public:
+
+  /** Construct a new Flag with NA rule 
+   */
+  Flag() : on(false), off(false), na(true) {}
+    
+    /** Set the flag to NA
+     */
+  void setNA() { on = false; off = false; na = true; } 
+  
+  /** Set the flag to ON
+   */
+  void setOn() { on = true; off = false; na = false; } 
+
+  /** Set the flag to OFF
+   */
+  void setOff() { on = false; off = true; na = false; } 
+
+  /** Query if the flag is NA
+   */
+  bool isNA()  const { return na; } 
+
+  /** Query if the flag is ON
+   */
+  bool isOn()  const { return on; } 
+
+  /** Query if the flag is OFF
+   */
+  bool isOff() const { return off; } 
+
+  /** Set the values of this flag from an input line.
+   * 
+   * This takes as input a rule string (e.g. !hardclip)
+   * and parses it to set the rule. The type of rule it 
+   * will look for is specified by the regex (e.g. !?hardclip)
+   * @return Returns true if the regex was successfully parsed
+   */
+  bool parseRuleLine(std::string &val, std::regex &reg);
+
+ private: 
+
+  bool on;
+  bool off; 
+  bool na;
+
+};
+
+/** Hold a range of valid numeric values (e.g. isize). 
+ *
+ * Can optionally invert the range to make rule the complement of the range
+ * (eg isize NOT in [300,600]
+ */
+struct Range {
+
+  /** Construct a Range from 0 to 0, non-inverted
+   */
+  Range() : min(0), max(0), inverted(false), pattern("") {}
+
+  /** Construct a Range from mn to mx, inclusive
+   */
+  Range(int mn, int mx, int in, std::string p) : min(mn), max(mx), inverted(in), pattern(p) {}
+
+  int min;
+  int max;
+  bool inverted;
+  std::string pattern;
+  bool every = true;
+  bool none = false;
+  
+  bool isValid(int val) {
+    if (every)
+      return true;
+    if (none)
+      return true;
+    if (!inverted)
+      return (val >= min && val <= max);
+    else
+      return (val < min || val > max);
+  }
+
+  void parseRuleLine(std::string line);
+
+  friend std::ostream& operator<<(std::ostream &out, const Range &r);
+
+  // set that this ranges accepts everything
+  void setEvery() {
+    every = true;
+    none = false;
+  }
+
+  // set that this range accepts nothing
+    void setNone() {
+    every = false;
+    none = true;
+   }
+  
+  // return if this range accepts all values
+  bool isEvery() const { return every; }
+  
+  // return if this range accepts no values
+  bool isNone() const { return none; }
+
+
+};
+
+// a container to hold boolean rules based mostly on alignment flag
+struct FlagRule {
+  
+  FlagRule() {
+    dup  = Flag();
+    supp       = Flag();
+    qcfail     = Flag();
+    hardclip   = Flag();
+    fwd_strand = Flag();
+    rev_strand = Flag();
+    mate_fwd_strand = Flag();
+    mate_rev_strand = Flag();
+    mapped          = Flag();
+    mate_mapped     = Flag();
+    ff = Flag();
+    fr = Flag();
+    rf = Flag();
+    rr = Flag();
+    ic = Flag();
+    paired = Flag();
+  }
+  
+
+  /**
+   * if inv is true, then if flag rule is ON and read is ON, return FALSE
+   */ 
+  /*bool inline flagCheck(Flag &f, bam1_t *b, int bamflag, bool inv) {
+    
+    if (!f.isNA()) {
+      bool val = (b->core.flag & bamflag);
+      if ( (f.isOff() && val) || (f.isOn() && !val))
+	return inv ? false : true;
+    }
+    return true; 
+    }*/
+
+  Flag dup, supp, qcfail, hardclip, fwd_strand, rev_strand,
+    mate_fwd_strand, mate_rev_strand, mapped, mate_mapped, ff, fr, rf, rr, ic, paired;
+
+  bool na = true;
+  void parseRuleLine(std::string line);
+  
+  // ask whether a read passes the rule
+  bool isValid(Read &r);
+
+  friend std::ostream& operator<<(std::ostream &out, const FlagRule &fr);
+
+  // set every flag to NA (most permissive)
+  void setEvery() {
+    dup.setOn();
+    supp.setOn();
+    qcfail.setOn();
+    hardclip.setOn();
+    fwd_strand.setOn();
+    rev_strand.setOn();
+    mate_fwd_strand.setOn();
+    mate_rev_strand.setOn();
+    mapped.setOn();
+    mate_mapped.setOn();
+    ff.setOn();
+    fr.setOn();
+    rf.setOn();
+    rr.setOn();
+    ic.setOn();
+    paired.setOn();
+    na = true;
+  }
+
+  // set every flag to OFF everythign off)
+  void setNone() {
+    dup.setOff();
+    supp.setOff();
+    qcfail.setOff();
+    hardclip.setOff();
+    fwd_strand.setOff();
+    rev_strand.setOff();
+    mate_fwd_strand.setOff();
+    mate_rev_strand.setOff();
+    mapped.setOff();
+    mate_mapped.setOff();
+    ff.setOff();
+    fr.setOff();
+    rf.setOff();
+    rr.setOff();
+    ic.setOff();
+    paired.setOff();
+  }
+
+
+  // ask if every flag is set to NA (most permissive)
+  bool isEvery() const { return na; }
+
+};
+
+//
+class AbstractRule {
+
+ public:
+
+  AbstractRule() {}
+  ~AbstractRule() {}
+
+  std::string name = "";
+  Range isize = {-1, -1, true, "isize"}; // include all
+  Range mapq =  {-1, -1, true, "mapq"}; 
+  Range len =   {-1, -1, true, "length"};
+  Range clip =  {-1, -1, true, "clip"};
+  Range phred = {-1, -1, true, "phred"};
+  Range nm = {-1, -1, true, "nm"};
+  Range nbases = {-1,-1,true, "nbases"};
+  Range ins = {-1,-1,true, "ins"};
+  Range del = {-1,-1,true, "del"};
+  Range xp = {-1,-1,true, "xp"};
+  std::unordered_map<std::string,bool> orientation;
+
+  std::string atm_file = "";
+  bool atm_inv = false;
+  size_t atm_count = 0;
+
+  std::string id;
+
+  // how many reads pass this rule?
+  size_t m_count = 0;
+
+#ifdef HAVE_AHOCORASICK_AHOCORASICK_H
+  //atm_ptr atm;
+  AC_AUTOMATA_t * atm = 0;
+#endif
+
+  uint32_t subsam_seed = 999;
+  double subsam_frac = 1;
+
+  bool none = false;
+  // set to true if you want a read to belong to the region if its mate does
+  //bool mate = false; 
+
+  FlagRule fr;
+
+  bool isValid(Read &r);
+
+  void parseRuleLine(std::string line);
+
+  void parseSubLine(std::string line);
+
+#ifdef HAVE_AHOCORASICK_AHOCORASICK_H
+  bool ahomatch(Read &r);
+
+  bool ahomatch(const char * seq, unsigned len);
+#endif
+
+  void parseSeqLine(std::string line);
+
+  friend std::ostream& operator<<(std::ostream &out, const AbstractRule &fr);
+
+  void setEvery() {
+    isize.setEvery();
+    mapq.setEvery();
+    len.setEvery();
+    clip.setEvery();
+    phred.setEvery();
+    nm.setEvery();
+    nbases.setEvery();
+    fr.setEvery();
+    ins.setEvery();
+    del.setEvery();
+    xp.setEvery();
+    atm_file = "";
+    subsam_frac = 1;
+  }
+  
+  void setNone() { 
+    isize.setNone();
+    mapq.setNone();
+    len.setNone();
+    clip.setNone();
+    phred.setNone();
+    nm.setNone();
+    nbases.setNone();
+    fr.setNone();
+    del.setNone();
+    xp.setNone();
+    ins.setNone();
+    none = true;
+  }
+
+  // return if this rule accepts all reads
+  bool isEvery() const {
+    return ins.isEvery() && del.isEvery() && isize.isEvery() && mapq.isEvery() && len.isEvery() && clip.isEvery() && phred.isEvery() && nm.isEvery() && nbases.isEvery() && fr.isEvery() && (atm_file.length() == 0) && (subsam_frac >= 1) && xp.isEvery();
+  }
+
+  // return if this rule accepts no reads
+  bool isNone() const {
+    return none;
+    //return isize.isNone() && mapq.isNone() && len.isNone() && clip.isNone() && phred.isNone() && nm.isNone() && fr.isNone();
+  }
+
+
+};
+
+class MiniRulesCollection;
+
+/** Define a set of rules for creating a variant bam. The syntax is:
+   all@!isize:[0,800],mapq:[0,60]
+   region@REGION_FILE
+   rule1@isize:[0,800],mapq:[0,60]
+   rule2@!isize[0,800]:mapq[0,60],:ardclip:supplementary:duplicate:qcfail
+   rule3@
+   
+   A file of NA indicates that the rule should be applied genome-wide.
+   The ordering of the lines sets the hierarchical rule. For instance, a rule on line 2 will be applied 
+   before a rule on line 3 for all regions that are the union of regions in level 3 and below.
+   
+   e.g. Level 3 region file has region chr1   100   1000
+        Level 2 region file has region chr1   150   1200
+	The union of these will produce a new region chr1   100   1200, with level 2
+*/
+class MiniRules {
+  
+  friend class MiniRulesCollection;
+
+  public:
+  MiniRules() {}
+  ~MiniRules() {}
+
+  std::string id;
+    
+  bool isValid(Read &r);
+   
+  void setRegionFromFile(const std::string& file);
+
+  bool isReadOverlappingRegion(Read &r);
+
+  friend std::ostream& operator<<(std::ostream& out, const MiniRules &mr);
+ 
+  size_t size() const {
+    return m_abstract_rules.size();
+  }
+
+  void parseDiscordantShortcut(const std::string& line, const AbstractRule& ar);
+
+  bool m_whole_genome = false;
+
+  int m_width = 0;  
+
+  std::string m_region_file;
+  //private:
+
+  GRC m_grv;
+
+  int m_level = -1;
+
+  int pad = 0; // how much should we pad the region?
+
+  std::vector<AbstractRule> m_abstract_rules;
+
+  // rule applies to mate too
+  bool m_applies_to_mate = false;
+
+  // pointer to its containing MiniRulesCollection
+  MiniRulesCollection * mrc; 
+
+  // how many reads pass this MiniRule
+  size_t m_count = 0;
+
+};
+
+// a hierarchy of mini rules to operate on
+class MiniRulesCollection {
+
+ public: 
+  
+  /** Construct an empty MiniRulesCollection 
+   * that will pass all reads
+   */
+  MiniRulesCollection() {}
+
+  MiniRulesCollection(std::string file, bam_hdr_t *b);
+
+  bool isValid(Read &r);
+  
+  friend std::ostream& operator<<(std::ostream& out, const MiniRulesCollection &mr);
+  
+  void sendToBed(std::string file);
+
+  // check if we should do the whole genome
+  bool hasWholeGenome() const {
+    for (auto it : m_regions)
+      if (it.m_whole_genome)
+	return true;
+    return false;
+  }
+
+  size_t m_count = 0; // passed
+  size_t m_count_seen = 0; // tested
+
+  GRC getAllRegions() const;
+
+  size_t size() const { return m_regions.size(); } 
+
+  size_t numRules() const {
+    size_t num = 0;
+    for (auto& it : m_regions)
+      num += it.size();
+    return num;
+  }
+
+  std::vector<MiniRules> m_regions;
+
+  bam_hdr_t * h;// in case we need to convert from text chr to id chr
+
+  void countsToFile(const std::string& file) const;
+
+  // should we keep checking rules, even it passed? (useful for counting)
+  bool m_fall_through = false;
+
+ private:  
+  
+};
+
+}
+
+#endif
diff --git a/src/SnowTools/Read.h b/src/SnowTools/Read.h
new file mode 100644
index 000000000..915a0b3ca
--- /dev/null
+++ b/src/SnowTools/Read.h
@@ -0,0 +1,14 @@
+#ifndef SNOWTOOLS_READ_H__
+#define SNOWTOOLS_READ_H__
+
+#include "SnowTools/HTSTools.h"
+
+class BamRead {
+
+
+ private:
+  
+  std::shared_ptr<bam1_t> b;
+  
+
+};
diff --git a/src/SnowTools/SnowTools.h b/src/SnowTools/SnowTools.h
new file mode 100644
index 000000000..e7aed03a1
--- /dev/null
+++ b/src/SnowTools/SnowTools.h
@@ -0,0 +1,5 @@
+#ifndef SNOWTOOLS_SNOWTOOLS_H__
+#define SNOWTOOLS_SNOWTOOLS_H__
+
+
+#endif
diff --git a/src/SnowTools/SnowToolsCommon.h b/src/SnowTools/SnowToolsCommon.h
new file mode 100644
index 000000000..fddaac17d
--- /dev/null
+++ b/src/SnowTools/SnowToolsCommon.h
@@ -0,0 +1,106 @@
+#ifndef SNOWTOOLS_COMMON_H__
+#define SNOWTOOLS_COMMON_H__
+
+/*! \mainpage Snowtools 1.0
+ *
+ * \section intro_sec Introduction
+ *
+ * SnowTools is a C++ package for querying BAM and SAM files, performing 
+ * BWA-MEM operations in memory, and performing advanced filtering of 
+ * reads using a hierarchy of rules. At its core, SnowTools is a C++ interface
+ * to the C htslib project, which provides the code for core BAM/SAM operations. 
+ * (https://github.com/samtools/htslib)
+ *
+ * SnowTools is built to be extendable. See (...) for examples of how to take advantage of C++
+ * class extensions to build off of the SnowTools base functionality. 
+ *
+ * SnowTools is available as a stand-alone executable, which wrap a number of tools. 
+ * However, certain functionality provided in an optimal form by SamTools or 
+ * BamTools are intentially left out of SnowTools. In cases where improvements or alternative
+ * approaches are useful, the functionally is partially duplicated (e.g. count).
+ * 
+ * As noted above, there are many overlaps between this project at the BamTools project from Derek Barnett 
+ * (https://github.com/pezmaster31/bamtools). As such, SnowTools is provided as a 
+ * a supplemental library that may be more suited to your individual needs that BamTools, or vice-versa. To
+ * aid developers in deciding which package is right for them, I have put together a small list of 
+ * similarities and differences between these two packages
+ *
+ * 
+ * \subsection similaries SnowTools/BamTools similarity
+ * - Provide read/write access to BAM files
+ * - Classes containing individual reads and ways to interact with them (e.g. edit tags)
+ * - Available as an API and a command-line versoin
+ * \subsection differenes SnowTools/BamTools differences
+ * - Sort/index functionality is independently implemented in BamTools. In SnowTools, the Samtools 
+ * sort and index functions are called directly.
+ * - BamTools stores quality scores and sequences as strings. In SnowTools, the HTSlib native bam1_t format
+ * is used instead. This format has a lower memory footprint by using only 4 bits per base, rather than 8. 
+ * Conversion to C++ style std::string is provided as a function and can be done on the fly.
+ * - BamTools provides the BamMultiReader class for reading multiple BAM files at once, while 
+ * SnowTools does not currently support this functionality.
+ * - BamTools provides the following CLI tools that are not supported in SnowTools
+ *    - convert - Convert between BAM and other formats
+ *    - header - Print the BAM header
+ *    - index - Index a BAM file
+ *    - merge - Merge muliple BAM files
+ *    - random - Select random alignments from BAM file
+ *    - resolve - Resolve paired-end reads IsProperPair flag
+ *    - revert - Remove duplicate marks and restore original base qualities
+ *    - sort - Sorts a BAM file
+ *    - split - Split a BAM file on user-specified property
+ * \section install_sec Installation
+ * \code
+ * git clone https://github.com/broadinstitute/isva.git
+ * cd isva/SnowTools/src
+ * ./configure
+ * make
+ * \endcode
+ * 
+ * I have successfully compiled with GCC-4.9.
+ *
+ * \section support Support
+ * This code is being developed and maintained by Jeremiah Wala (jwala@broadinstitute.org)
+ *
+ * \section attribution Attributions
+ * - Jeremiah Wala - Harvard MD-PhD candidate, Bioinformatics and Integrative Genomics
+ * - Steve Schumacher - Research Scientist, Dana Farber Cancer Institute
+ * - Cheng-Zhong Zhang - Research Scientist, Broad Institute
+ * - Marcin Imielinski - Asst Prof, Cornell University
+ */
+
+#include <string>
+#include <vector>
+#include <cstdint>
+
+namespace SnowTools {
+  
+  static const std::vector<std::string> CHR_NAME {"1", "2", "3", "4", "5", "6", "7", "8", "9",
+      "10", "11", "12", "13", "14", "15", "16", "17", 
+      "18", "19", "20", "21", "22", "X", "Y", "M"};
+  static const std::vector<std::string> CHR_NAME_NUM {"1", "2", "3", "4", "5", "6", "7", "8", "9",
+      "10", "11", "12", "13", "14", "15", "16", "17", 
+      "18", "19", "20", "21", "22", "23", "24"};
+  
+  static const int CHR_LEN [25] = {249250621, 243199373, 198022430, 191154276, //1-4
+				   180915260, 171115067, //5-6
+				   159138663, 146364022, 141213431, 135534747, 135006516, 133851895, //7-12
+				   115169878, 107349540, 102531392, 90354753,  81195210,  78077248, //13-18
+				   59128983,  63025520,  48129895,  51304566,  155270560, 59373566, //19-24
+				   16571}; //25
+  
+  static const uint32_t CHR_CLEN [25] = {0, 249250621, 492449994,  690472424, 881626700, 1062541960, 1233657027,
+					 1392795690,1539159712,1680373143,1815907890,1950914406,2084766301,
+					 2199936179, 2307285719, 2409817111, 2500171864, 2581367074, 2659444322,                                                                                                                   
+					 2718573305, 2781598825, 2829728720, 2881033286, 3036303846, 3095677412};
+  
+  static const uint32_t genome_size_XY = 3095677411;
+  
+  static std::string const REFHG19 = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
+  
+  static const int NONCENT_CHR [44] = {1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,
+				       11,11,12,12,13,14,15,16,16,17,17,18,18,19,19,20,20,21,21,22,23,23,24,24};
+  
+  
+}
+
+#endif
diff --git a/src/SnowTools/SnowUtils.h b/src/SnowTools/SnowUtils.h
new file mode 100644
index 000000000..833ed4975
--- /dev/null
+++ b/src/SnowTools/SnowUtils.h
@@ -0,0 +1,136 @@
+#ifndef SNOWUTILS_H
+#define SNOWUTILS_H
+
+#include <string>
+#include <time.h>
+#include <ctime>
+#include <vector>
+#include <unistd.h>
+#include <sstream>
+#include <cmath>
+#include <algorithm>
+
+#include "SnowTools/gzstream.h"
+#include "SnowTools/GenomicRegion.h"
+
+namespace SnowTools {
+
+  /** Check if a file is readable and exists
+   * @param name Name of a file to test
+   * @return File is readable and exists
+   */
+  inline bool read_access_test (const std::string& name) {
+    return (access (name.c_str(), R_OK) == 0); 
+  }
+
+  /** Format a number to include commas
+   * @param data Number to format
+   * @param Formatted number
+   */
+  template <typename T> 
+    std::string AddCommas(T data) { 
+    std::stringstream ss; 
+    ss << data; 
+    std::string s = ss.str();
+    if (s.length() > 3)
+      for (int i = s.length()-3; i > 0; i -= 3)
+	s.insert(i,",");
+    return s;
+  }
+  
+  inline std::string displayRuntime(const timespec start) {
+    
+#ifndef __APPLE__
+    struct timespec finish;
+    clock_gettime(CLOCK_MONOTONIC, &finish);
+    double elapsed = (finish.tv_sec - start.tv_sec);
+    int t = clock()/CLOCKS_PER_SEC;
+    int min = (int)std::floor(elapsed / 60.0);
+    int sec = (int)(elapsed-min*60);
+    char buffer[100];
+    sprintf (buffer, "CPU: %4dm%02ds Wall: %4dm%02ds", 
+	     (int)floor( ((double)t) /60.0), t % 60, min, sec);
+    buffer[99] = '\0';
+    return std::string(buffer);
+#else
+    return "--- time not configured for apple\n";
+    std::cerr << start << std::endl;
+#endif
+  }
+
+  /** Deprecated
+   */
+  inline void rcomplement(std::string &a) {
+    
+    std::reverse(&a[0], &a[a.size()]);
+    std::string::iterator it = a.begin();
+    for (; it != a.end(); it++)
+      if (*it == 'A')
+	*it = 'T';
+      else if (*it == 'T')
+	*it = 'A';
+      else if (*it == 'C')
+	*it = 'G';
+      else
+	*it = 'C';
+  }
+  
+  // calculate the percentage
+  template <typename T> inline int percentCalc(T numer, T denom) {
+    if (denom <= 0)
+      return 0;
+    int perc  = static_cast<int>(floor((float)numer / (float)denom * 100.0));
+    return perc;
+  }
+
+  // remove the last character from a string
+  inline std::string cutLastChar(std::string in) {
+    if (in.length() == 0)
+      return in;
+    else 
+      return in.substr(0, in.length() - 1);
+  }
+  
+  // remove substrings from a string
+ inline std::string scrubString(std::string toscrub, std::string toremove) {
+   std::string::size_type i = toscrub.find(toremove);
+   while (i != std::string::npos) {
+     toscrub.erase(i, toremove.length());
+     i = toscrub.find(toremove);
+   }
+   return toscrub;
+ }
+
+ void genRandomVals(uint32_t &i1, uint32_t &i2, const uint32_t &max);
+
+ void genRandomValue(uint32_t &i, const uint32_t &max);
+
+ /*! @function Loops through a text file to count the number of lines.
+  * @param file The file to count
+  * @param exclude String which, if present in line, causes line to not be counted.
+  * @param include String which must be present in the line to be counted.
+  * @return Number of valid lines in file
+  */
+ inline size_t countLines(const std::string &file, const std::string &exclude = "", const std::string &include = "") {
+   
+   //open the file
+   igzstream inFile(file.c_str());
+   if (!inFile) 
+     return 0;
+   
+   // loop through the file
+   size_t count = 0;
+   std::string dum;
+   while (std::getline(inFile, dum)) {
+     if (!include.length() || (dum.find(include) != std::string::npos)) // file must have include
+       if (!exclude.length() || (dum.find(exclude) == std::string::npos)) // and not have exclude
+	 count++;
+     
+   }
+   
+   return count;
+ } 
+
+}
+
+#endif
diff --git a/src/SnowTools/gChain.h b/src/SnowTools/gChain.h
new file mode 100644
index 000000000..fb6576e41
--- /dev/null
+++ b/src/SnowTools/gChain.h
@@ -0,0 +1,32 @@
+#ifndef SNOWTOOLS_GCHAIN_H__
+#define SNOWTOOLS_GCHAIN_H__
+
+#include "SnowTools/GenomicRegionCollection.h"
+
+namespace SnowTools {
+
+template <typename T>
+class gChain {
+
+ public:
+
+  gChain() {}
+
+  gChain(const GenomicRegionCollection<T>& x, const GenomicRegionCollection<T>& y);
+
+  GenomicRegionCollection<T> lift(GenomicRegionCollection<T>& g);
+
+  bool checkValidity() const;
+
+ private:
+
+  GenomicRegionCollection<T>m_galx;
+  GenomicRegionCollection<T> m_galy;
+
+  std::vector<int32_t> m_pad_left;
+  std::vector<int32_t> m_pad_right;
+
+};
+
+}
+#endif
diff --git a/src/SnowTools/gzstream.h b/src/SnowTools/gzstream.h
new file mode 100644
index 000000000..3c74fe054
--- /dev/null
+++ b/src/SnowTools/gzstream.h
@@ -0,0 +1,128 @@
+// ============================================================================
+// gzstream, C++ iostream classes wrapping the zlib compression library.
+// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+//
+// See src/COPYING for the full terms of the license.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+// ============================================================================
+//
+// File          : gzstream.h
+// Revision      : $Revision: 1.5 $
+// Revision_date : $Date: 2002/04/26 23:30:15 $
+// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+// Modified      : Jared Simpson 2010/04/16. Converted license notification to
+//                 GPLv3 to be compatible with the rest of the code base as 
+//                 allowed by LGPLv2.1. No other changes were made.
+// 
+// 
+// Standard streambuf implementation following Nicolai Josuttis, "The 
+// Standard C++ Library".
+// ============================================================================
+
+#ifndef GZSTREAM_H
+#define GZSTREAM_H 1
+
+// standard C++ with new header file names and std:: namespace
+#include <iostream>
+#include <fstream>
+#include <zlib.h>
+
+#ifdef GZSTREAM_NAMESPACE
+namespace GZSTREAM_NAMESPACE {
+#endif
+
+// ----------------------------------------------------------------------------
+// Internal classes to implement gzstream. See below for user classes.
+// ----------------------------------------------------------------------------
+
+class gzstreambuf : public std::streambuf {
+private:
+    static const int bufferSize = 47+256;    // size of data buff
+    // totals 512 bytes under g++ for igzstream at the end.
+
+    gzFile           file;               // file handle for compressed file
+    char             buffer[bufferSize]; // data buffer
+    char             opened;             // open/close state of stream
+    int              mode;               // I/O mode
+
+    int flush_buffer();
+public:
+    gzstreambuf() : opened(0) {
+        setp( buffer, buffer + (bufferSize-1));
+        setg( buffer + 4,     // beginning of putback area
+              buffer + 4,     // read position
+              buffer + 4);    // end position      
+        // ASSERT: both input & output capabilities will not be used together
+    }
+    int is_open() { return opened; }
+    gzstreambuf* open( const char* name, int open_mode);
+    gzstreambuf* close();
+    ~gzstreambuf() { close(); }
+    
+    virtual int     overflow( int c = EOF);
+    virtual int     underflow();
+    virtual int     sync();
+};
+
+class gzstreambase : virtual public std::ios {
+protected:
+    gzstreambuf buf;
+public:
+    gzstreambase() { init(&buf); }
+    gzstreambase( const char* name, int open_mode);
+    ~gzstreambase();
+    void open( const char* name, int open_mode);
+    void close();
+    gzstreambuf* rdbuf() { return &buf; }
+};
+
+// ----------------------------------------------------------------------------
+// User classes. Use igzstream and ogzstream analogously to ifstream and
+// ofstream respectively. They read and write files based on the gz* 
+// function interface of the zlib. Files are compatible with gzip compression.
+// ----------------------------------------------------------------------------
+
+class igzstream : public gzstreambase, public std::istream {
+public:
+    igzstream() : std::istream( &buf) {} 
+    igzstream( const char* name, int open_mode = std::ios::in)
+        : gzstreambase( name, open_mode), std::istream( &buf) {}  
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::in) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+class ogzstream : public gzstreambase, public std::ostream {
+public:
+    ogzstream() : std::ostream( &buf) {}
+    ogzstream( const char* name, int mode = std::ios::out)
+        : gzstreambase( name, mode), std::ostream( &buf) {}  
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::out) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+#ifdef GZSTREAM_NAMESPACE
+} // namespace GZSTREAM_NAMESPACE
+#endif
+
+#endif // GZSTREAM_H
+// ============================================================================
+// EOF //
+