Skip to content

Commit

Permalink
Support new reid model
Browse files Browse the repository at this point in the history
  • Loading branch information
Nuzhny007 committed Oct 6, 2023
1 parent 2930733 commit 7dea766
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 41 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@

# Last changes

* Re-identification model osnet_x0_25_msmt17 from [mikel-brostrom/yolo_tracking](https://github.com/mikel-brostrom/yolo_tracking)

* YOLOv8 detector worked with TensorRT! Export pretrained Pytorch models [here (ultralytics/ultralytics)](https://github.com/ultralytics/ultralytics) to onnx format and run Multitarget-tracker with -e=6 example

* Some experiments with YOLOv7_mask and results with rotated rectangles: detector works tracker in progress

* YOLOv7 worked with TensorRT! Export pretrained Pytorch models [here (WongKinYiu/yolov7)](https://github.com/WongKinYiu/yolov7) to onnx format and run Multitarget-tracker with -e=6 example

* YOLOv6 worked with TensorRT! Download pretrained onnx models [here (meituan/YOLOv6)](https://github.com/meituan/YOLOv6/releases/tag/0.1.0) and run Multitarget-tracker with -e=6 example

# New videos!

* YOLOv7 instance segmentation
Expand Down
Binary file added data/reid/osnet_x0_25_msmt17.onnx
Binary file not shown.
56 changes: 39 additions & 17 deletions example/examples.h
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ class YoloDarknetExample final : public VideoExample
{
if (!m_trackerSettingsLoaded)
{
bool useDeepSORT = false;
bool useDeepSORT = true;
if (useDeepSORT)
{
#ifdef _WIN32
Expand All @@ -681,26 +681,18 @@ class YoloDarknetExample final : public VideoExample
std::string pathToModel = "../data/";
#endif

#if 1
m_trackerSettings.m_embeddings.emplace_back(pathToModel + "open_model_zoo/person-reidentification-retail-0286/FP16-INT8/person-reidentification-retail-0286.xml",
pathToModel + "open_model_zoo/person-reidentification-retail-0286/FP16-INT8/person-reidentification-retail-0286.bin",
cv::Size(128, 256),
std::vector<objtype_t>{ TypeConverter::Str2Type("person") });
#endif

#if 0
m_trackerSettings.m_embeddings.emplace_back(pathToModel + "open_model_zoo/vehicle-reid-0001/osnet_ain_x1_0_vehicle_reid.xml",
pathToModel + "open_model_zoo/vehicle-reid-0001/osnet_ain_x1_0_vehicle_reid.bin",
cv::Size(208, 208),
std::vector<objtype_t>{ TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), TypeConverter::Str2Type("truck"), TypeConverter::Str2Type("vehicle") });
#endif
m_trackerSettings.m_embeddings.emplace_back(pathToModel + "reid/osnet_x0_25_msmt17.onnx",
pathToModel + "reid/osnet_x0_25_msmt17.onnx",
cv::Size(128, 256),
std::vector<objtype_t>{ TypeConverter::Str2Type("person"), TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), TypeConverter::Str2Type("truck"), TypeConverter::Str2Type("vehicle") });

std::array<track_t, tracking::DistsCount> distType{
0.f, // DistCenters
0.f, // DistRects
0.5f, // DistJaccard
0.f, // DistHist
0.5f // DistFeatureCos
0.5f, // DistFeatureCos
0.f // DistMahalanobis
};
if (!m_trackerSettings.SetDistances(distType))
std::cerr << "SetDistances failed! Absolutly summ must be equal 1" << std::endl;
Expand Down Expand Up @@ -843,7 +835,7 @@ class YoloTensorRTExample final : public VideoExample
YOLOv7Mask,
YOLOv8
};
YOLOModels usedModel = YOLOModels::YOLOv5;
YOLOModels usedModel = YOLOModels::YOLOv4;
switch (usedModel)
{
case YOLOModels::TinyYOLOv3:
Expand Down Expand Up @@ -980,7 +972,37 @@ class YoloTensorRTExample final : public VideoExample
{
if (!m_trackerSettingsLoaded)
{
m_trackerSettings.SetDistance(tracking::DistCenters);
bool useDeepSORT = true;
if (useDeepSORT)
{
#ifdef _WIN32
std::string pathToModel = "../../data/";
#else
std::string pathToModel = "../data/";
#endif

m_trackerSettings.m_embeddings.emplace_back(pathToModel + "reid/osnet_x0_25_msmt17.onnx",
pathToModel + "reid/osnet_x0_25_msmt17.onnx",
cv::Size(128, 256),
std::vector<objtype_t>{ TypeConverter::Str2Type("person"), TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), TypeConverter::Str2Type("truck"), TypeConverter::Str2Type("vehicle") });

std::array<track_t, tracking::DistsCount> distType{
0.f, // DistCenters
0.f, // DistRects
0.5f, // DistJaccard
0.f, // DistHist
0.5f, // DistFeatureCos
0.f // DistMahalanobis
};
if (!m_trackerSettings.SetDistances(distType))
std::cerr << "SetDistances failed! Absolutly summ must be equal 1" << std::endl;
}
else
{
m_trackerSettings.SetDistance(tracking::DistCenters);
}

//m_trackerSettings.SetDistance(tracking::DistCenters);
m_trackerSettings.m_kalmanType = tracking::KalmanLinear;
m_trackerSettings.m_filterGoal = tracking::FilterCenter;
m_trackerSettings.m_lostTrackType = tracking::TrackKCF; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect
Expand Down
127 changes: 127 additions & 0 deletions example/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,100 @@ const char* keys =

// ----------------------------------------------------------------------

#pragma once

///
/// \brief The EmbeddingsCalculator class
///
class EmbeddingsCalculatorSimple
{
public:
EmbeddingsCalculatorSimple() = default;
virtual ~EmbeddingsCalculatorSimple() = default;

///
bool Initialize(const std::string& cfgName, const std::string& weightsName, const cv::Size& inputLayer)
{
m_inputLayer = inputLayer;

#if 1
m_net = cv::dnn::readNet(weightsName);
#else
m_net = cv::dnn::readNetFromTorch(weightsName);
#endif
if (!m_net.empty())
{
//m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
//m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);

auto outNames = m_net.getUnconnectedOutLayersNames();
auto outLayers = m_net.getUnconnectedOutLayers();
auto outLayerType = m_net.getLayer(outLayers[0])->type;

std::vector<cv::dnn::MatShape> outputs;
std::vector<cv::dnn::MatShape> internals;
m_net.getLayerShapes(cv::dnn::MatShape(), 0, outputs, internals);
std::cout << "REID: getLayerShapes: outputs (" << outputs.size() << ") = " << (outputs.size() > 0 ? outputs[0].size() : 0) << ", internals (" << internals.size() << ") = " << (internals.size() > 0 ? internals[0].size() : 0) << std::endl;
if (outputs.size() && outputs[0].size() > 3)
std::cout << "outputs = [" << outputs[0][0] << ", " << outputs[0][1] << ", " << outputs[0][2] << ", " << outputs[0][3] << "], internals = [" << internals[0][0] << ", " << internals[0][1] << ", " << internals[0][2] << ", " << internals[0][3] << "]" << std::endl;
}
return !m_net.empty();
}

///
bool IsInitialized() const
{
return !m_net.empty();
}

///
cv::Mat Calc(const cv::Mat& img, cv::Rect rect)
{
auto Clamp = [](int& v, int& size, int hi) -> int
{
int res = 0;
if (v < 0)
{
res = v;
v = 0;
return res;
}
else if (v + size > hi - 1)
{
res = v;
v = hi - 1 - size;
if (v < 0)
{
size += v;
v = 0;
}
res -= v;
return res;
}
return res;
};
Clamp(rect.x, rect.width, img.cols);
Clamp(rect.y, rect.height, img.rows);

cv::Mat obj;
cv::resize(img(rect), obj, m_inputLayer, 0., 0., cv::INTER_CUBIC);
cv::Mat blob = cv::dnn::blobFromImage(obj, 1.0 / 255.0, cv::Size(), cv::Scalar(), false, false, CV_32F);

m_net.setInput(blob);
cv::Mat embedding;
std::cout << "embedding: " << embedding.size() << ", chans = " << embedding.channels() << std::endl;
//std::cout << "orig: " << embedding << std::endl;
cv::normalize(m_net.forward(), embedding);
//std::cout << "normalized: " << embedding << std::endl;
return embedding;
}

private:
cv::dnn::Net m_net;
cv::Size m_inputLayer{ 128, 256 };
};


int main(int argc, char** argv)
{
cv::CommandLineParser parser(argc, argv, keys);
Expand All @@ -54,6 +148,39 @@ int main(int argc, char** argv)
cv::ocl::setUseOpenCL(useOCL);
std::cout << (cv::ocl::useOpenCL() ? "OpenCL is enabled" : "OpenCL not used") << std::endl;

#if 0
EmbeddingsCalculatorSimple ec;
ec.Initialize("C:/work/home/mtracker/tmp/reid/models/osnet_x0_25_msmt17.onnx",
"C:/work/home/mtracker/tmp/reid/models/osnet_x0_25_msmt17.onnx",
cv::Size(128, 256));
std::cout << "ec.IsInitialized(): " << ec.IsInitialized() << std::endl;

cv::Mat img = cv::imread("C:/work/home/mtracker/Multitarget-tracker/build/Release/vlcsnap-2023-10-06-17h31m54s413.png");
cv::Rect r1(564, 526, 124, 260);
//cv::Rect r2(860, 180, 48, 160);
cv::Rect r2(560, 522, 132, 264);

cv::Mat e1 = ec.Calc(img, r1);
cv::Mat e2 = ec.Calc(img, r2);

//cv::Mat mul = e1 * e2.t();
std::cout << "e1: " << e1 << std::endl;
std::cout << "e2: " << e2 << std::endl;
cv::Mat diff;
cv::absdiff(e1, e2, diff);
cv::Scalar ss = cv::sum(diff);
cv::Mat mul = e1 * e2.t();
float res = static_cast<float>(1.f - mul.at<float>(0, 0));
std::cout << "mul = " << mul << ", sum = " << ss << ", res = " << res << std::endl;

cv::rectangle(img, r1, cv::Scalar(255, 0, 255));
cv::rectangle(img, r2, cv::Scalar(255, 0, 0));
cv::imshow("img", img);
cv::waitKey(0);

return 0;
#endif

int exampleNum = parser.get<int>("example");
int asyncPipeline = parser.get<int>("async");

Expand Down
7 changes: 3 additions & 4 deletions src/Tracker/Ctracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ void CTracker::CreateDistaceMatrix(const regions_t& regions,
if (resCos.second)
{
dist += m_settings.m_distType[ind] * resCos.first;
//std::cout << "CalcCosine: " << TypeConverter::Type2Str(track->LastRegion().m_type) << ", reg = " << reg.m_brect << ", track = " << track->LastRegion().m_brect << ": res = " << resCos.value() << ", dist = " << dist << std::endl;
//std::cout << "CalcCosine: " << TypeConverter::Type2Str(track->LastRegion().m_type) << ", reg = " << reg.m_brect << ", track = " << track->LastRegion().m_brect << ": res = " << resCos.first << ", dist = " << dist << std::endl;
}
else
{
Expand Down Expand Up @@ -628,13 +628,12 @@ void CTracker::CalcEmbeddins(std::vector<RegionEmbedding>& regionEmbeddings, con
if (embCalc != std::end(m_embCalculators))
{
embCalc->second->Calc(currFrame, regions[j].m_brect, regionEmbeddings[j].m_embedding);
regionEmbeddings[j].m_embDot = regionEmbeddings[j].m_embedding.dot(regionEmbeddings[j].m_embedding);

// std::cout << "Founded! m_embedding = " << regionEmbeddings[j].m_embedding.size() << ", m_embDot = " << regionEmbeddings[j].m_embDot << std::endl;
//std::cout << "Founded! m_embedding = " << regionEmbeddings[j].m_embedding.size() << std::endl;
}
else
{
// std::cout << "Not found" << std::endl;
//std::cout << "Not found" << std::endl;
}
}
}
Expand Down
58 changes: 50 additions & 8 deletions src/Tracker/EmbeddingsCalculator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,56 @@ class EmbeddingsCalculator
m_inputLayer = inputLayer;

#if 1
m_net = cv::dnn::readNet(weightsName, cfgName);
m_net = cv::dnn::readNet(weightsName);
#else
m_net = cv::dnn::readNetFromTensorflow(weightsName, cfgName);
m_net = cv::dnn::readNetFromTorch(weightsName);
#endif
if (!m_net.empty())
{
m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4))
std::map<cv::dnn::Target, std::string> dictTargets;
dictTargets[cv::dnn::DNN_TARGET_CPU] = "DNN_TARGET_CPU";
dictTargets[cv::dnn::DNN_TARGET_OPENCL] = "DNN_TARGET_OPENCL";
dictTargets[cv::dnn::DNN_TARGET_OPENCL_FP16] = "DNN_TARGET_OPENCL_FP16";
dictTargets[cv::dnn::DNN_TARGET_MYRIAD] = "DNN_TARGET_MYRIAD";
dictTargets[cv::dnn::DNN_TARGET_CUDA] = "DNN_TARGET_CUDA";
dictTargets[cv::dnn::DNN_TARGET_CUDA_FP16] = "DNN_TARGET_CUDA_FP16";

std::map<int, std::string> dictBackends;
dictBackends[cv::dnn::DNN_BACKEND_DEFAULT] = "DNN_BACKEND_DEFAULT";
dictBackends[cv::dnn::DNN_BACKEND_HALIDE] = "DNN_BACKEND_HALIDE";
dictBackends[cv::dnn::DNN_BACKEND_INFERENCE_ENGINE] = "DNN_BACKEND_INFERENCE_ENGINE";
dictBackends[cv::dnn::DNN_BACKEND_OPENCV] = "DNN_BACKEND_OPENCV";
dictBackends[cv::dnn::DNN_BACKEND_VKCOM] = "DNN_BACKEND_VKCOM";
dictBackends[cv::dnn::DNN_BACKEND_CUDA] = "DNN_BACKEND_CUDA";
dictBackends[1000000] = "DNN_BACKEND_INFERENCE_ENGINE_NGRAPH";
dictBackends[1000000 + 1] = "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019";

std::cout << "Avaible pairs for Target - backend:" << std::endl;
std::vector<std::pair<cv::dnn::Backend, cv::dnn::Target>> pairs = cv::dnn::getAvailableBackends();
for (auto p : pairs)
{
std::cout << dictBackends[p.first] << " (" << p.first << ") - " << dictTargets[p.second] << " (" << p.second << ")" << std::endl;

if (p.first == cv::dnn::DNN_BACKEND_CUDA)
{
//m_net.setPreferableTarget(p.second);
//m_net.setPreferableBackend(p.first);
//std::cout << "Set!" << std::endl;
}
}
#endif

auto outNames = m_net.getUnconnectedOutLayersNames();
auto outLayers = m_net.getUnconnectedOutLayers();
auto outLayerType = m_net.getLayer(outLayers[0])->type;

std::vector<cv::dnn::MatShape> outputs;
std::vector<cv::dnn::MatShape> internals;
m_net.getLayerShapes(cv::dnn::MatShape(), 0, outputs, internals);
std::cout << "REID: getLayerShapes: outputs (" << outputs.size() << ") = " << (outputs.size() > 0 ? outputs[0].size() : 0) << ", internals (" << internals.size() << ") = " << (internals.size() > 0 ? internals[0].size() : 0) << std::endl;
if (outputs.size() && outputs[0].size() > 3)
std::cout << "outputs = [" << outputs[0][0] << ", " << outputs[0][1] << ", " << outputs[0][2] << ", " << outputs[0][3] << "], internals = [" << internals[0][0] << ", " << internals[0][1] << ", " << internals[0][2] << ", " << internals[0][3] << "]" << std::endl;
}
return !m_net.empty();
#else
Expand Down Expand Up @@ -72,13 +114,13 @@ class EmbeddingsCalculator
Clamp(rect.x, rect.width, img.cols);
Clamp(rect.y, rect.height, img.rows);

cv::UMat obj;
cv::resize(img(rect), obj, m_inputLayer, 0., 0., cv::INTER_LANCZOS4);
cv::Mat blob = cv::dnn::blobFromImage(obj, 1.0, cv::Size(), cv::Scalar(), false, false);
cv::Mat obj;
cv::resize(img(rect), obj, m_inputLayer, 0., 0., cv::INTER_CUBIC);
cv::Mat blob = cv::dnn::blobFromImage(obj, 1.0 / 255.0, cv::Size(), cv::Scalar(), false, false, CV_32F);

m_net.setInput(blob);
embedding = m_net.forward();
//std::cout << "embedding: " << embedding.size() << ", chans = " << embedding.channels() << std::endl;
cv::normalize(m_net.forward(), embedding);
#else
std::cerr << "EmbeddingsCalculator was disabled in CMAKE! Check SetDistances params." << std::endl;
#endif
Expand Down
11 changes: 4 additions & 7 deletions src/Tracker/track.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,10 @@ std::pair<track_t, bool> CTrack::CalcCosine(const RegionEmbedding& embedding) co
track_t res = 1;
if (!embedding.m_embedding.empty() && !m_regionEmbedding.m_embedding.empty())
{
double xy = embedding.m_embedding.dot(m_regionEmbedding.m_embedding);
double norm = sqrt(embedding.m_embDot * m_regionEmbedding.m_embDot) + 1e-6;
#if 0
res = 1.f - 0.5f * fabs(static_cast<float>(xy / norm));
#else
res = 0.5f * static_cast<float>(1.0 - xy / norm);
#endif
cv::Mat mul = embedding.m_embedding * m_regionEmbedding.m_embedding.t();
res = static_cast<track_t>(1.f - mul.at<float>(0, 0));
if (res < 0)
res = 0;
//std::cout << "CTrack::CalcCosine: " << embedding.m_embedding.size() << " - " << m_regionEmbedding.m_embedding.size() << " = " << res << std::endl;
return { res, true };
}
Expand Down
1 change: 0 additions & 1 deletion src/Tracker/track.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ struct RegionEmbedding
{
cv::Mat m_hist;
cv::Mat m_embedding;
double m_embDot = 0.;
};

///
Expand Down

0 comments on commit 7dea766

Please sign in to comment.