-
Notifications
You must be signed in to change notification settings - Fork 1
/
EndIsoform.cpp
128 lines (93 loc) · 3.15 KB
/
EndIsoform.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <map>
#include <set>
#include <vector>
#include <cstdlib>
#include <algorithm>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
using namespace std;
using namespace boost;
struct ENDS {
string strand;
int CAGE_pos;
int PAS_pos;
};
bool SimilarEnds (ENDS a, ENDS b) {
if(a.strand == b.strand && abs(a.CAGE_pos - b.CAGE_pos) < 30 && abs(a.PAS_pos - b.PAS_pos) < 30) return true;
else return false;
}
vector<ENDS> ReduceEndsVectorII (vector<ENDS> &vec) {
vector<ENDS> output_vec;
output_vec.push_back(vec[0]);
for(int i = 1; i < vec.size(); ++i){
bool SIMILAR = false;
for(int j = 0; j < output_vec.size(); ++j){
SIMILAR = SimilarEnds(vec[i], output_vec[j]);
if(SIMILAR) break;
}
if(!SIMILAR) output_vec.push_back(vec[i]);
}
return output_vec;
}
vector<ENDS> DealWithMinusOne (vector<ENDS> &vec) {
vector<ENDS> output_vec;
for(int i = 0; i < vec.size(); ++i){
if(vec[i].CAGE_pos == -1){
bool compatible = false;
for(int j = 0; j < vec.size(); ++j){
if((vec[j].strand == vec[i].strand) && (vec[j].CAGE_pos != -1) && (abs(vec[j].PAS_pos - vec[i].PAS_pos) < 30)){
compatible = true;
break;
}
}
if(!compatible) output_vec.push_back(vec[i]);
}
else output_vec.push_back(vec[i]);
}
return output_vec;
}
int main (int argc, char **argv) {
cerr << "EndsIsoform <isoform_CAGE_PAS_ends.txt>" << endl;
ifstream endsinf(argv[1]);
map<string,vector<ENDS> > isoform_ends_map;
set<string> undetermined_set;
map<string,vector<ENDS> >::iterator it_map;
set<string>::iterator it_set;
while(endsinf){
string strInput;
getline(endsinf, strInput);
if(strInput.length() > 0){
vector<string> vec;
split(vec, strInput, is_any_of("\t"));
if(vec[2] != "*"){
ENDS ends;
ends.strand = vec[1];
ends.CAGE_pos = lexical_cast<int>(vec[3]);
ends.PAS_pos = lexical_cast<int>(vec[5]);
isoform_ends_map[vec[0]].push_back(ends);
}
else{
ENDS ends;
ends.strand = vec[1];
ends.CAGE_pos = -1;
ends.PAS_pos = lexical_cast<int>(vec[5]);
isoform_ends_map[vec[0]].push_back(ends);
}
}
}
for(it_map = isoform_ends_map.begin(); it_map != isoform_ends_map.end(); ++it_map){
vector<ENDS> reduced_vec = ReduceEndsVectorII(it_map->second);
vector<ENDS> final_vec = DealWithMinusOne(reduced_vec);
for(int i = 0; i < final_vec.size(); ++i){
if(final_vec.size() == 0) cout << "Alert" << endl;
cout << (it_map->first) << '\t' << final_vec[i].strand << '\t' << final_vec[i].CAGE_pos << '\t' << final_vec[i].PAS_pos << endl;
}
}
endsinf.close();
return 0;
}