Skip to content

Commit

Permalink
better error handling and conditions for failing
Browse files Browse the repository at this point in the history
  • Loading branch information
hiraksarkar committed May 15, 2020
1 parent 54fea1f commit 591c915
Show file tree
Hide file tree
Showing 8 changed files with 426 additions and 423 deletions.
11 changes: 6 additions & 5 deletions include/BFHClass.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
class BFHClass{
public:
BFHClass(){}
BFHClass(std::shared_ptr<spdlog::logger>& consoleLogIn){
consoleLog = consoleLogIn ;
}
BFHClass(std::shared_ptr<spdlog::logger>& consoleLogIn){
consoleLog = consoleLogIn ;
}

void loadBFH(
std::string& bfhFile,
Expand All @@ -39,7 +39,8 @@ class BFHClass{
std::map<std::string, uint32_t>& cellWhiteListMap,
bool generateNoiseProfile,
std::unordered_map<std::string, uint32_t>& cellNoisyMap,
std::string& outDir
std::string& outDir,
bool dump = false
) ;

void loadProbability(std::string& file, Reference& refInfo, bool geneLevel) ;
Expand All @@ -50,7 +51,7 @@ class BFHClass{
}


std::shared_ptr<spdlog::logger> consoleLog ;
std::shared_ptr<spdlog::logger> consoleLog ;
std::string bfhFile ;
std::vector<double> countProbability ;
std::unordered_map<uint32_t, std::unordered_map<uint32_t, uint32_t>> geneCountHistogram ; // Gene id -> (EqClass_Length -> Numebr)
Expand Down
10 changes: 8 additions & 2 deletions include/GFAReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,22 @@ class GFAReader{
public:

GFAReader(
std::string gfaFileIn
std::string gfaFileIn,
std::shared_ptr<spdlog::logger>& consoleLogIn
){
gfaFileName_ = gfaFileIn ;
consoleLog = consoleLogIn ;
}

void parseFile(
Reference& refInfo
) ;

void readUnitigs() ;
std::vector<std::pair<size_t, bool>> explode(
const std::string str,
const char& ch
);

void updateEqClass(
std::string& transcriptName,
Expand Down Expand Up @@ -55,7 +61,7 @@ class GFAReader{
std::unique_ptr<std::ifstream> file ;
std::unordered_map<size_t, std::string> unitigMap ;


std::shared_ptr<spdlog::logger> consoleLog;

};

Expand Down
8 changes: 4 additions & 4 deletions include/MatrixParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,17 @@ public :
std::vector<std::vector<T>> data ; // Matrix containing the Cell x Transcriptome Matrix
std::vector<std::vector<T>> geneCounts ; // Matrix containing the Cell x Gene Matrix
std::vector<std::vector<int>> trueGeneCounts ; // True Matrix containing the Cell x Gene Matrix
std::vector<std::string> cellNames ; // Vector of Cell Names


// cell specific
std::vector<std::string> cellNames ; // Vector of Cell Names
std::map<std::string, uint32_t> cellNamesMap ; // Cell Name -> Cell Id
std::map<std::string, uint32_t> cellNamesDupCount ; // Cell Name -> dedup count

std::map<std::string, uint32_t> cellWhiteListMap ;
std::unordered_map<std::string, uint32_t> cellNoisyMap ;
std::unordered_map<std::string, uint32_t> cellDoubletMap ;

std::unordered_map<uint32_t, uint32_t> cell2ClusterMap ;
std::unordered_map<uint32_t, uint32_t> cell2ClusterMap ;

std::shared_ptr<spdlog::logger> consoleLog ; // Logger for outputting errors

std::map<uint32_t, uint32_t> alevin2refMap ; // Map Col 0f Input Matrix -> Gene ID from t2g tsv
Expand Down
54 changes: 29 additions & 25 deletions src/BFHClass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,22 @@ void BFHClass::loadBFH(
std::map<std::string, uint32_t>& cellWhiteListMap,
bool generateNoiseProfile,
std::unordered_map<std::string, uint32_t>& cellNoisyMap,
std::string& outDir
std::string& outDir,
bool dump
){



if(! util::fs::FileExists(bfhFile.c_str())){
std::cerr << bfhFile << " does not exists \n" ;
consoleLog->error("{} does not exists", bfhFile) ;
std::exit(1) ;
}

bool createClusterLevelHist{false} ;
if(cellClusterFile != ""){
std::cout << "[DEBUG] cell Clust file " << cellClusterFile << "\n" ;

consoleLog->info("Feeding cell cluster file {}", cellClusterFile) ;
if(! util::fs::FileExists(cellClusterFile.c_str())){
std::cerr << cellClusterFile << " is not empty and does not exist \n" ;
consoleLog->error("Cell cluster file {} does not exist", cellClusterFile) ;
std::exit(1) ;
}
createClusterLevelHist = true ;
Expand All @@ -72,16 +72,15 @@ void BFHClass::loadBFH(
auto cell_id = it->second ;
cell2ClusterMap[cell_id] = cluster_id ;
}else{
std::cerr << "Avoiding this cluster\n" ;
consoleLog->error("Avoiding {} cluster", cluster_id) ;
}
}
std::cerr << "[DEBUG] read cluster file with size " << cell2ClusterMap.size() << "\n" ;
consoleLog->info("read cluster file with size {}",cell2ClusterMap.size());
}

std::cerr<< "[DEBUG] Reading BFH file ........ \n" ;

std::ifstream dataStream(bfhFile.c_str()) ;
std::string line ;
consoleLog->info("Reading BFH file {}",bfhFile);
std::ifstream dataStream(bfhFile.c_str()) ;
std::string line ;

std::getline(dataStream, line) ;
uint32_t numTranscripts = std::stoul(line) ;
Expand All @@ -90,9 +89,9 @@ void BFHClass::loadBFH(
std::getline(dataStream, line) ;
uint32_t numEqClasses = std::stoul(line) ;

std::cerr << "[DEBUG] numTranscripts: " << numTranscripts << "\n" ;
std::cerr << "[DEBUG] numCells: " << numCells << "\n" ;
std::cerr << "[DEBUG] numEqClasses: " << numEqClasses << "\n" ;
consoleLog->info("numTranscripts: {}", numTranscripts) ;
consoleLog->info("numCells: {}", numCells) ;
consoleLog->info("numEqClasses: {}", numEqClasses) ;

std::vector<std::string> trNames(numTranscripts) ;
std::vector<std::string> CBNames(numCells) ;
Expand All @@ -101,14 +100,14 @@ void BFHClass::loadBFH(
trNames[i] = line ;
}

std::cerr << "[DEBUG] Transcripts read \n" ;
consoleLog->info("Transcripts read ") ;
for(size_t i = 0; i < numCells; ++i){
std::getline(dataStream, line) ;
CBNames[i] = line ;
}


std::cerr << "[DEBUG] Cell names read \n" ;
consoleLog->info("Cell names read ") ;

// read equivalence classes now
uint32_t tot_reads{0} ;
Expand All @@ -129,7 +128,10 @@ void BFHClass::loadBFH(
auto gid = transcript2geneMap[tid] ;
geneIds.insert(gid) ;
}else{
std::cerr << "transcript is in the list but no corresponding gene found \n" ;
consoleLog->error("transcript is in the list but no corresponding gene found "
"this signifies that the BFH and the annotation does not belong "
"to the same annotation (e.g. gencode version) or same organism"
) ;
std::exit(2) ;
}
}
Expand Down Expand Up @@ -195,29 +197,31 @@ void BFHClass::loadBFH(
}


std::cerr << "[DEBUG] countHistogram.size() " << countHistogram.size() << "\n" ;
consoleLog->info("countHistogram.size(): {}", countHistogram.size());



auto x = std::max_element( countHistogram.begin(), countHistogram.end(),
[](const std::pair<uint32_t, uint32_t>& p1, const std::pair<uint32_t, uint32_t>& p2) {
return p1.first < p2.first; });

std::cerr << "[DEBUG] x->first, x->second tot_reads "
<< x->first << "\t" << x->second
<< "\t" << tot_reads << "\n" ;
consoleLog->info("Histogram statistics "
"max value: {}, max freq: {}, total reads {}",
x->first, x->second, tot_reads
);

consoleLog->info("Converting histogram to a probablity vector");
countProbability.resize(x->first + 1, 0.0) ;
for(auto it: countHistogram){
if(it.first >= countProbability.size()){
std::cerr << "[DEBUG] out of memory " << it.first << "\t" << countProbability.size() << "\n" ;
consoleLog->error("[DEBUG] out of memory {} >= {}",it.first,countProbability.size()) ;
}

countProbability[it.first] = static_cast<double>(it.second)/static_cast<double>(tot_reads) ;
}



if(dump)
{
std::string geneCountHistogramFile = outDir + "/geneLevelProb.txt" ;
std::cerr << "DEBUG: " << geneCountHistogramFile << "\n" ;
Expand All @@ -237,7 +241,7 @@ void BFHClass::loadBFH(

}


if(dump)
{
std::string countProbabilityFile = outDir + "/countProb.txt" ;
std::ofstream probStream(countProbabilityFile.c_str()) ;
Expand All @@ -248,7 +252,7 @@ void BFHClass::loadBFH(
}
}

std::cerr << "[DEBUG] Exiting after reading BFH \n" ;
consoleLog->info("Exiting after reading BFH ") ;

}

Expand Down
Loading

0 comments on commit 591c915

Please sign in to comment.