-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.cpp
229 lines (181 loc) · 6.65 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#include <iostream>
#include <ctime>
#include <filesystem>
#include <vector>
#include <tuple>
#include "opencv2/opencv.hpp"
#include "src/filter.hpp"
#include "src/conv2d_layer.hpp"
#include "src/thread_pool.hpp"
using namespace std;
using namespace cv;
namespace fs = filesystem;
int w_size, bias; // kernel window size
double** kernel; // image kernel
vector<filter*> filters;
// const int thread_count = 1;
const int thread_count = thread::hardware_concurrency();
ThreadPool::ThreadPool pool(thread_count); // 스레드 풀 생성
void push_filter(int idx) {
// make edge detector for color idx
double ***ed = get_tensor(w_size, w_size, 3);
for (int i = 0; i < w_size; ++i) {
for (int j = 0; j < w_size; ++j) {
ed[i][j][idx] = kernel[i][j];
}
}
filter *f = new filter(ed, w_size, 3, bias);
f->normalize();
filters.push_back(f);
}
// image에 필터를 적용하여 파일에 저장한다.
// 이미지의 크기가 크면, 멀티 스레드(스레드풀)를 사용하여 병렬처리한다.
void applyFilterAndSaveWithMultiThread(Mat image, string outPath){
int sep_row = 512, sep_col = 512;
int pading = 1;
int stride = 1;
Mat pad_image;
// 이미지에 패딩 추가
copyMakeBorder(image, pad_image, pading, pading, pading, pading, BORDER_CONSTANT, Scalar(0));
int out_height = (pad_image.rows - w_size) / stride + 1;
int out_width = (pad_image.cols - w_size) / stride + 1;
// 결과 이미지 크기가 (sep_row, sep_col) 이하라면 바로 필터 적용
if (out_height <= sep_row && out_width <= sep_col){
conv_layer clayer(pad_image.rows, pad_image.cols, image.channels(), w_size, stride, filters.size());
Mat output = get<2>(clayer.conv2d(pad_image, filters));
imwrite(outPath, output);
}
// 그 이상이라면 분할해서 처리
else{
// (start_row, start_col, split_image)
vector<tuple<int, int, Mat>> split_images;
// 나뉜 크기와 윈도우 사이즈를 고려해서 잘라냄
for(int i = 0; i < pad_image.rows; i += sep_row){
for(int j = 0; j < pad_image.cols; j += sep_col){
split_images.emplace_back(i, j,
pad_image(Range(i, min(pad_image.rows, i + sep_row + w_size - 1))
, Range(j, min(pad_image.cols, j + sep_col + w_size - 1)))
);
}
}
// 나눈 이미지를 병렬적으로 처리하고, 결과를 future 배열에 담음
vector<future<tuple<int, int, Mat>>> futures;
for(auto img_info: split_images){
int start_row, start_col;
Mat split_img;
tie(start_row, start_col, split_img) = img_info;
conv_layer clayer(split_img.rows, split_img.cols, image.channels(), w_size, stride, filters.size());
futures.emplace_back(pool.enqueueJob(conv_layer::conv2d, clayer, split_img, filters, start_row, start_col));
}
// 필터를 적용한 이미지들을 다시 합침
Mat output(Size(out_width, out_height), image.type(), Scalar::all(0));
for(auto& f: futures){
tuple<int, int, Mat> result = f.get();
int start_row, start_col;
Mat split_img;
tie(start_row, start_col, split_img) = result;
split_img.copyTo(output(Rect(start_col, start_row, split_img.cols, split_img.rows)));
}
imwrite(outPath, output);
}
}
// image에 필터를 적용하여 반환해준다.
// 싱글 스레드로 실행된다.
Mat applyFilterWithSingleThread(Mat image){
Mat pad_image;
int pading = 1;
int stride = 1;
// 이미지에 패딩 추가
copyMakeBorder(image, pad_image, pading, pading, pading, pading, BORDER_CONSTANT, Scalar(0));
int out_height = (pad_image.rows - w_size) / stride + 1;
int out_width = (pad_image.cols - w_size) / stride + 1;
conv_layer clayer(pad_image.rows, pad_image.cols, image.channels(), w_size, stride, filters.size());
Mat output = get<2>(clayer.conv2d(pad_image, filters));
return output;
}
struct HumanReadable {
std::uintmax_t size{};
private: friend
std::ostream& operator<<(std::ostream& os, HumanReadable hr) {
int i{};
double mantissa = hr.size;
for (; mantissa >= 1024.; mantissa /= 1024., ++i) { }
mantissa = std::ceil(mantissa * 10.) / 10.;
os << mantissa << "BKMGTPE"[i];
return i == 0 ? os : os << "B";
}
};
int main(int argc, char *argv[]){
// 명령행 인자 유효성 검사 및 사전 준비
if (argc < 3){
cout << "[USAGE] " << argv[0] << " <image_folder> <output_folder>\n";
pool.end();
return 0;
}
fs::path imageFolder(argv[1]);
fs::path outputFolder(argv[2]);
if (!fs::exists(imageFolder)){
cout << argv[1] << " is Not Found" << endl;
pool.end();
return 0;
}
if (!fs::is_directory(imageFolder)){
cout << argv[1] << " is not Directory" << endl;
pool.end();
return 0;
}
if (!fs::exists(outputFolder)){
fs::create_directories(outputFolder);
}
// 필터 입력 부분
cin >> w_size >> bias;
kernel = new double*[w_size];
for (int i = 0; i < w_size; i++) {
kernel[i] = new double[w_size];
for (int j = 0; j < w_size; j++) {
cin >> kernel[i][j];
}
}
push_filter(0); // B
push_filter(1); // G
push_filter(2); // R
cout << "thread count : " << thread_count << "\n\n";
clock_t t1 = clock();
int count = 0;
uintmax_t fileSize = 0;
// 스레드가 한개면 파일이 클 경우, 내가 구현한 방식으로는 작동 안한다.
// 이런 경우에는 그냥 스레드 하나에서 쭉 동작시키자.
if (thread_count == 1){
for (const fs::directory_entry& entry : fs::directory_iterator(imageFolder)){
// 이미지 읽기
Mat image = imread(entry.path().u8string());
fileSize += fs::file_size(entry.path());
count++;
string outPath = (outputFolder / entry.path().filename()).u8string();
Mat output = applyFilterWithSingleThread(image);
imwrite(outPath, output);
}
cout << "image counts : " << count << "\n";
cout << "total file size : " << HumanReadable{fileSize} << "\n\n";
cout << "total time : " << (double)(clock() - t1) / CLOCKS_PER_SEC << " sec\n";
pool.end();
return 0;
}
// 이미지를 읽고, 스레드에게 job으로 던져줌
for (const fs::directory_entry& entry : fs::directory_iterator(imageFolder)){
Mat image = imread(entry.path().u8string());
fileSize += fs::file_size(entry.path());
count++;
pool.enqueueJob(applyFilterAndSaveWithMultiThread, image, (outputFolder / entry.path().filename()).u8string());
}
clock_t t2 = clock();
pool.end();
clock_t t3 = clock();
double load_image_time = (double)(t2 - t1) / CLOCKS_PER_SEC;
double apply_filter_time = (double)(t3 - t2) / CLOCKS_PER_SEC;
double total_time = (double)(t3 - t1) / CLOCKS_PER_SEC;
cout << "image counts : " << count << "\n";
cout << "total file size : " << HumanReadable{fileSize} << "\n\n";
cout << "total time : " << total_time << " sec\n";
return 0;
}