forked from ekg/mutatrix
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Repeats.cpp
69 lines (61 loc) · 1.85 KB
/
Repeats.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include "Repeats.h"
map<string, int> repeatCounts(long int position, const string& sequence, int maxsize) {
map<string, int> counts;
for (int i = 1; i <= maxsize; ++i) {
// subseq here i bases
string seq = sequence.substr(position, i);
// go left.
int j = position - i;
int leftsteps = 0;
while (j >= 0 && seq == sequence.substr(j, i)) {
j -= i;
++leftsteps;
}
// go right.
j = position;
int rightsteps = 0;
while (j + i <= sequence.size() && seq == sequence.substr(j, i)) {
j += i;
++rightsteps;
}
// if we went left and right a non-zero number of times,
if (leftsteps + rightsteps > 1) {
counts[seq] = leftsteps + rightsteps;
}
}
// filter out redundant repeat information
if (counts.size() > 1) {
map<string, int> filteredcounts;
map<string, int>::iterator c = counts.begin();
string prev = c->first;
filteredcounts[prev] = c->second; // shortest sequence
++c;
for (; c != counts.end(); ++c) {
int i = 0;
string seq = c->first;
while (i + prev.length() <= seq.length() && seq.substr(i, prev.length()) == prev) {
i += prev.length();
}
if (i < seq.length()) {
filteredcounts[seq] = c->second;
prev = seq;
}
}
return filteredcounts;
} else {
return counts;
}
}
bool isRepeatUnit(const string& seq, const string& unit) {
if (seq.size() % unit.size() != 0) {
return false;
} else {
int maxrepeats = seq.size() / unit.size();
for (int i = 0; i < maxrepeats; ++i) {
if (seq.substr(i * unit.size(), unit.size()) != unit) {
return false;
}
}
return true;
}
}