Skip to content

Commit

Permalink
connected components
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasrausch committed Oct 25, 2023
1 parent fcca61b commit cd0a480
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 11 deletions.
6 changes: 2 additions & 4 deletions src/convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,14 +498,12 @@ namespace lorax
std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] Load pan-genome graph" << std::endl;
Graph g;
parseGfa(c, g, true);

// Parse alignments
std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] Parse alignments" << std::endl;
std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] Convert alignments" << std::endl;
std::vector<AlignRecord> aln;
parseGaf(c, g, aln);

// Convert to BAM
std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] Parse reads" << std::endl;
//if (!plotGraphAlignments(c, g, aln)) return -1;
if (c.hasFastq) {
if (!convertToBamViaFASTQ(c, g, aln)) {
Expand Down
6 changes: 4 additions & 2 deletions src/gaf.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,11 @@ namespace lorax
bool parseAR = true;
while (parseAR) {
AlignRecord ar;
if (parseAlignRecord(instream, g, ar)) aln.push_back(ar);
if (parseAlignRecord(instream, g, ar)) {
aln.push_back(ar);
//std::cerr << ar.seed << ',' << ar.qlen << ',' << ar.qstart << ',' << ar.qend << ',' << ar.strand << ',' << ar.plen << ',' << ar.pstart << ',' << ar.pend << ',' << ar.matches << ',' << ar.alignlen << ',' << ar.mapq << std::endl;
}
else parseAR = false;
//std::cerr << ar.seed << ',' << ar.qlen << ',' << ar.qstart << ',' << ar.qend << ',' << ar.strand << ',' << ar.plen << ',' << ar.pstart << ',' << ar.pend << ',' << ar.matches << ',' << ar.alignlen << ',' << ar.mapq << std::endl;
}

// Close file
Expand Down
64 changes: 63 additions & 1 deletion src/gfa.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ namespace lorax
uint32_t tid;
uint32_t pos;
uint32_t len;
uint32_t comp;

Segment(uint32_t const t, uint32_t const p, uint32_t const l) : tid(t), pos(p), len(l) {}
Segment(uint32_t const t, uint32_t const p, uint32_t const l) : tid(t), pos(p), len(l), comp(0) {}
};

struct Link {
Expand Down Expand Up @@ -70,10 +71,71 @@ namespace lorax
std::vector<std::string> chrnames;
std::vector<uint32_t> ranks;
TSegmentIdMap smap;
uint32_t numComp; // Component 0 are the singletons or unlabelled graph (no components)

Graph() : numComp(1) {}

bool empty() const { return segments.empty(); }
};


inline void
connectedComponents(Graph& g) {
std::vector<uint32_t> comp(g.segments.size(), 0);
uint32_t numComp = 0;
for(uint32_t i = 0; i < g.links.size(); ++i) {
if (!comp[g.links[i].from]) {
if (!comp[g.links[i].to]) {
// Both vertices have no component
++numComp;
comp[g.links[i].from] = numComp;
comp[g.links[i].to] = numComp;
} else comp[g.links[i].from] = comp[g.links[i].to];
} else {
if (!comp[g.links[i].to]) comp[g.links[i].to] = comp[g.links[i].from];
else {
// Both vertices have a component, merge
if (comp[g.links[i].to] != comp[g.links[i].from]) {
uint32_t compIndex = comp[g.links[i].to];
uint32_t otherIndex = comp[g.links[i].from];
if (otherIndex < compIndex) {
compIndex = comp[g.links[i].from];
otherIndex = comp[g.links[i].to];
}
// Re-label other index
for(uint32_t k = 0; k < comp.size(); ++k) {
if (otherIndex == comp[k]) comp[k] = compIndex;
}
}
}
}
}
++numComp;
// Compute comp size
std::vector<uint32_t> countPerComp(numComp, 0);
for(uint32_t k = 0; k < comp.size(); ++k) ++countPerComp[comp[k]];
// Relabel components, level 0 are the singleton nodes (potentially none)
numComp = 0;
countPerComp[0] = numComp;
for(uint32_t i = 1; i < countPerComp.size(); ++i) {
if (countPerComp[i]) countPerComp[i] = ++numComp;
}
++numComp;

// Assign components
for(uint32_t k = 0; k < comp.size(); ++k) g.segments[k].comp = countPerComp[comp[k]];
g.numComp = numComp;

// Debug
//std::vector<std::string> idSegment(g.smap.size());
//for(typename Graph::TSegmentIdMap::const_iterator it = g.smap.begin(); it != g.smap.end(); ++it) idSegment[it->second] = it->first;
//std::vector<uint32_t> nc(g.numComp, 0);
//for(uint32_t k = 0; k < g.segments.size(); ++k) {
//if (g.segments[k].comp == 0) std::cerr << "Singleton: " << idSegment[k] << std::endl;
//++nc[g.segments[k].comp];
//}
//for(uint32_t i = 0; i < g.numComp; ++i) std::cerr << i << ',' << nc[i] << std::endl;
}

template<typename TConfig>
inline bool
Expand Down
8 changes: 4 additions & 4 deletions src/lorax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ inline void
displayUsage() {
std::cout << "Usage: lorax <command> <arguments>" << std::endl;
std::cout << std::endl;
std::cout << "Linear reference alignments - Commands:" << std::endl;
std::cout << "Linear reference alignments:" << std::endl;
std::cout << std::endl;
std::cout << " tithreads templated insertion threads" << std::endl;
std::cout << " telomere telomere fusion identification" << std::endl;
Expand All @@ -38,12 +38,12 @@ displayUsage() {
std::cout << " pct percent identity" << std::endl;
std::cout << " extract extract matches and fasta for selected reads" << std::endl;
std::cout << std::endl;
std::cout << "Pan-genome alignments - Commands:" << std::endl;
std::cout << "Pan-genome graphs:" << std::endl;
std::cout << std::endl;
std::cout << " geno edge-genotyping using pan-genome alignments" << std::endl;
std::cout << " ncov node coverage" << std::endl;
std::cout << " convert convert pan-genome graph alignment to BAM" << std::endl;
std::cout << " gfa2dot convert pan-genome graph to dot (graphviz) format" << std::endl;
std::cout << " ncov node coverage" << std::endl;
std::cout << " geno edge-genotyping using pan-genome alignments" << std::endl;
std::cout << std::endl;
std::cout << std::endl;
}
Expand Down

0 comments on commit cd0a480

Please sign in to comment.