-
Notifications
You must be signed in to change notification settings - Fork 0
/
object_detector.cpp
247 lines (203 loc) · 9.1 KB
/
object_detector.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
//
// Some code is lent from: https://github.com/freshtechyy/ONNX-Runtime-GPU-image-classifciation-example at the point of writing licensed with the MIT license.
// The rest is written by me: https://github.com/SiBensberg
//
#include "object_detector.h"
// Colors for class ID 0,1,2,3 respective:
const cv::Scalar BLUE = {180, 128, 0};
const cv::Scalar YELLOW = {77, 220, 255};
const cv::Scalar ORANGE = {0, 110, 250};
const cv::Scalar BIGORANGE = {60, 30, 190};
const std::vector<cv::Scalar> COLORS = {BLUE, YELLOW, ORANGE, BIGORANGE};
ObjectDetector::ObjectDetector(const std::string &modelPath) {
std::cout << "Initiating ObjectDetector: " << std::endl;
// Create Environment:
std::string instance_Name = "Object Detector";
mEnv = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, instance_Name.c_str());
// print available providers
std::cout << " - Available providers: ";
for (const std::string &prov: Ort::GetAvailableProviders()) {
std::cout << " " << prov << " ";
}
std::cout << std::endl;
OrtCUDAProviderOptions cuda_opts; // todo: what happens without cuda?
cuda_opts.device_id = 0;
// Ort Session
Ort::SessionOptions sessionOptions;
// Enable Cuda
sessionOptions.AppendExecutionProvider_CUDA(cuda_opts);
sessionOptions.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_ENABLE_EXTENDED); // other optimization levels ara avaiable
// Load model
Ort::OrtRelease(mSession); // release nullptr initialized session object to make a new one
mSession = Ort::Session(mEnv, modelPath.c_str(), sessionOptions); // model gets loaded
// Extract input info:
auto numInputNodes = mSession.GetInputCount();
//mInputName = mSession->GetInputName(0, allocator);
mInputName = "image_arrays:0"; //todo: initialize this more general or with constructor parameter
// Input type:
auto inputTypeInfo = mSession.GetInputTypeInfo(0);
auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
auto inputType = inputTensorInfo.GetElementType();
mInputDims = inputTensorInfo.GetShape();
// Check if any input dim is variable (-1):
bool variable = false;
for (const auto &i: mInputDims) {
variable |= i <= 0;
}
if (variable) {
std::cout
<< "Input size of exported ONNX model is variable. For this reason it has to be predefined.\nSetting it to 512x512"
<< std::endl;
mInputDims = mDefaultInputDims;
}
auto numOutputNodes = mSession.GetOutputCount();
//mOutputName = mSession -> GetOutputNameAllocated(0, allocator);
mOutputName = "detections:0";
auto outputTypeInfo = mSession.GetOutputTypeInfo(0);
auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
auto outputType = outputTensorInfo.GetElementType();
mOutputDims = outputTensorInfo.GetShape();
std::cout << "Input Type: " << inputType << std::endl;
std::cout << "Input Nodes: " << numInputNodes << std::endl;
std::cout << "Input Dimension: ";
for (const auto &i: mInputDims) {
std::cout << i << ' ';
}
std::cout << std::endl;
std::cout << "Output Type: " << outputType << std::endl;
std::cout << "Output Nodes: " << numOutputNodes << std::endl;
std::cout << "Output Dimension: ";
for (const auto &i: mOutputDims) {
std::cout << i << ' ';
}
std::cout << std::endl;
}
std::vector<std::vector<float>> ObjectDetector::inference(const cv::Mat &imageBGR) const {
// for time measuring
const auto start = clock_time::now();
// Calculate flat tensor input size:
long inputTensorSize = 1;
for (const auto &e: mInputDims) {
inputTensorSize *= e;
}
// inputTensorValues is flattened array with chw format.
// inputTensorValues must be reordered to hwc format
std::vector<uint8_t> inputTensorValues(inputTensorSize);
createTensorFromImage(imageBGR, inputTensorValues);
//Assign memory
std::vector<Ort::Value> inputTensors;
// Create input tensor
inputTensors.push_back(Ort::Value::CreateTensor(
memoryInfo,
inputTensorValues.data(),
inputTensorSize,
mInputDims.data(),
mInputDims.size(),
ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8
));
//Create output tensor
size_t outputTensorSize = 1;
for (const auto &e: mOutputDims) {
outputTensorSize *= e;
}
std::vector<float> outputTensorValues(outputTensorSize);
std::vector<Ort::Value> outputTensors;
outputTensors.push_back(
Ort::Value::CreateTensor(
memoryInfo,
outputTensorValues.data(),
4 *
outputTensorSize, //*4 for Float32. https://github.com/triton-inference-server/server/issues/4478
mOutputDims.data(),
mOutputDims.size(),
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
));
const sec preprocessing_time = clock_time::now() - start;
// inference:
const auto before_inference = std::chrono::system_clock::now();
std::vector<const char *> inputNames{mInputName};
std::vector<const char *> outputNames{mOutputName};
// std::cout<< "\nStarting Inferencing:" << std::endl;
mSession.Run(Ort::RunOptions{nullptr},
inputNames.data(),
inputTensors.data(),
1,
outputNames.data(),
outputTensors.data(),
1);
const sec inference_time = clock_time::now() - start;
// std::cout<< "The inference takes " << inference_time.count() << "s" << std::endl;
// debug: try to show image
auto outputBoxes = this->calculateBoxes(outputTensors.back());
const sec after = clock_time::now() - start;
// std::cout << "Image Precessing and inference taking a overall: " << after.count() << "s" << std::endl;
return outputBoxes;
}
// Create a tensor from the input image
void ObjectDetector::createTensorFromImage(
const cv::Mat &img, std::vector<uint8_t> &inputTensorValues) const {
auto type = img.type();
auto input_height = mInputDims.at(1);
auto input_width = mInputDims.at(2);
int nativeRows = img.rows;
int nativeCols = img.cols;
this->cameraInputDims = {nativeRows, nativeCols};
// Init new Images todo: can probably be simplified and made more memory efficient
// also todo: shift to gpu memory maybe helpful.
cv::Mat scaledImage(nativeRows, nativeCols, CV_8UC3);
cv::Mat preprocessedImage(input_height, input_width, CV_8UC3);
std::vector<cv::Mat> rgbchannel;
cv::split(img, rgbchannel);
rgbchannel.erase(rgbchannel.begin() + 3);
cv::merge(rgbchannel, scaledImage);
/******* Preprocessing *******/
// Scale image pixels from [0 255] to [-1, 1]
//img.convertTo(scaledImage, CV_32F, 2.0f / 255.0f, -1.0f);
if (type != 24) {
scaledImage.convertTo(scaledImage, CV_8U, 2.0f / 255.0f, -1.0f);
} else {
scaledImage.convertTo(scaledImage, CV_8U, 1.0f, 0.0f);
}
// Convert HWC to CHW
// Tensorflow models do mostly need input image in NHWC.
// PyTorch usually needs NCHW.
if (!this->hwc) {
// std::cout<< "ATTENTION BLOB" << std::endl;
cv::dnn::blobFromImage(scaledImage, preprocessedImage);
} else {
cv::resize(scaledImage,
preprocessedImage,
cv::Size(input_width, input_height),
cv::INTER_LINEAR);
cv::cvtColor(preprocessedImage, preprocessedImage, cv::COLOR_RGB2BGR);
}
// Assign MAT values to flat vector
// this is from here: https://stackoverflow.com/a/26685567
inputTensorValues.assign(preprocessedImage.data,
preprocessedImage.data + (preprocessedImage.total() * preprocessedImage.channels()));
}
std::vector<std::vector<float>> ObjectDetector::calculateBoxes(const Ort::Value &outputTensor) const {
// Calculate Factors for later upscaling of boxes with very sexy casts
auto width_factor = (float) cameraInputDims[1] / (float) mInputDims.at(2);
auto height_factor = (float) cameraInputDims[0] / (float) mInputDims.at(1);
auto shape = outputTensor.GetTensorTypeAndShapeInfo().GetShape();
// Get data from tensor:
const auto data = outputTensor.GetTensorData<float>();
std::vector<std::vector<float>> outputBoxes;
// for every of the 100 boxes:
for (int row = 0; row < shape[1]; ++row) {
// init indexes for easy access of flattened array.
const auto confidence = *(data + (row * 7 + 5)); // confidence value is on the 5th place of the row
const auto class_id = *(data + (row * 7 + 6));
if (confidence >= 0.09) {
std::vector<float> box_data{class_id, confidence,
*(data + (row * 7 + 1)) * height_factor,
*(data + (row * 7 + 2)) * width_factor,
*(data + (row * 7 + 3)) * height_factor,
*(data + (row * 7 + 4)) * width_factor};
outputBoxes.push_back(box_data);
}
}
return outputBoxes;
}