-
Notifications
You must be signed in to change notification settings - Fork 0
/
sdfcut.cc
119 lines (106 loc) · 2.96 KB
/
sdfcut.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <sstream>
const unsigned int PROGRESS_GRANULARITY = 50;
inline long max(long x, long y) {
return x > y ? x: y;
}
void usage() {
std::cerr << "Usage: sdfcut file.sdf NUM_MOLS_PER_FILE" << std::endl;
}
std::string next_output_file(std::fstream& ofs, const char* prefix)
{
static unsigned int i = 0;
std::stringstream ss(std::stringstream::out);
ss << prefix << "." << i ++;
std::string ofp = ss.str();
ofs.open(ofp.c_str(), std::fstream::out);
if (! ofs.good()) {
return std::string("");
}
std::cerr << ofp << " >> ";
return ofp;
}
int main(int argc, char* argv[])
{
if (argc != 3) {
usage();
return 1;
}
// parse and prepare input file
std::ifstream _ifs;
bool use_stdin = strcmp(argv[1], "-") == 0;
const char* ifp = argv[1];
std::istream& ifs = use_stdin ? std::cin : _ifs;
if (! use_stdin) {
_ifs.open(ifp, std::ios::in);
if (! ifs.good()) {
std::cerr << "Fail to open input file: " << ifp << std::endl;
return 1;
}
} else {
std::cerr << "Read from stdin..." << std::endl;
ifp = "sdfcut-out.sdf";
}
// parse per-file limit
long per_file = 0;
per_file = strtol(argv[2], NULL, 10);
if (per_file == 0) {
std::cerr << "Number of mols for per must be a positive integer."
<< std::endl
<< "You specified '" << argv[2] << "'" << std::endl;
return 1;
}
// output
std::string ofp;
std::fstream ofs;
std::string buffer, buf;
long counter = 0, c = 0;
while (ifs.good()) {
// open output if necessary
if (counter == 0) {
if ((ofp = next_output_file(ofs, ifp)) == "") {
std::cerr << "Fail to open output file. Exit now. " << std::endl;
return 1;
}
}
// read input
buffer.clear();
buf.clear();
std::getline(ifs, buffer, '$'); /* read till we see a dollar sign */
if (buffer.length() == 0 && ifs.eof())
break;
std::getline(ifs, buf); /* read till we see end of this line or EOF */
// write what we've read
ofs << buffer;
if (buf.length())
ofs << "$" << buf;
ofs << "\n"; /* this compensates the LF we consumed or
will append an LF if the input file does
not end with one */
// did we just finish one mol?
if (buf == "$$$" || ifs.eof()) {
if (counter % PROGRESS_GRANULARITY == 0)
std::cerr << ".";
++ counter, ++ c;
// did we reach the limit of the number of per-file molecules?
if (counter == per_file) {
std::cerr << std::endl;
counter = 0;
ofs.close();
}
}
}
if (ofs.good())
ofs.close();
// done
std::cerr << std::endl << "Finsh processing " << c << " molecules" <<
std::endl;
// check why we exit
if (not ifs.eof()) {
std::cerr << "We did not end with a clean state, possibly due to IO error"
<< std::endl;
}
return 0;
}