Skip to content

Commit 6fe9ba9

Browse files
committed
fix blobFromImage
1 parent a6345ba commit 6fe9ba9

1 file changed

Lines changed: 52 additions & 187 deletions

File tree

  • yolox_ros_cpp/yolox_cpp/include/yolox_cpp

yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp

Lines changed: 52 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
#define _YOLOX_CPP_CORE_HPP
33

44
#include <opencv2/core/types.hpp>
5-
#include <opencv2/core/simd_intrinsics.hpp>
6-
#include <algorithm>
75

86
namespace yolox_cpp
97
{
@@ -34,9 +32,9 @@ namespace yolox_cpp
3432
{
3533
public:
3634
AbcYoloX() {}
37-
AbcYoloX(const float nms_th = 0.45, const float conf_th = 0.3,
35+
AbcYoloX(float nms_th = 0.45, float conf_th = 0.3,
3836
const std::string &model_version = "0.1.1rc0",
39-
const int num_classes = 80, const bool p6 = false)
37+
int num_classes = 80, bool p6 = false)
4038
: nms_thresh_(nms_th), bbox_conf_thresh_(conf_th),
4139
num_classes_(num_classes), p6_(p6), model_version_(model_version)
4240
{
@@ -56,17 +54,16 @@ namespace yolox_cpp
5654
const std::vector<float> std255_inv_ = {
5755
1.0 / (255.0 * 0.229), 1.0 / (255.0 * 0.224), 1.0 / (255.0 * 0.225)};
5856
const std::vector<float> mean_std_ = {
59-
-0.485 / 0.229 , -0.456 / 0.224, -0.406 / 0.225};
57+
-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225};
6058
const std::vector<int> strides_ = {8, 16, 32};
6159
const std::vector<int> strides_p6_ = {8, 16, 32, 64};
6260
std::vector<GridAndStride> grid_strides_;
6361

6462
cv::Mat static_resize(const cv::Mat &img)
6563
{
6664
const float r = std::min(
67-
static_cast<float>(input_w_) / (static_cast<float>(img.cols) * 1.0f),
68-
static_cast<float>(input_h_) / (static_cast<float>(img.rows) * 1.0f));
69-
// r = std::min(r, 1.0f);
65+
static_cast<float>(input_w_) / static_cast<float>(img.cols),
66+
static_cast<float>(input_h_) / static_cast<float>(img.rows));
7067
const int unpad_w = r * img.cols;
7168
const int unpad_h = r * img.rows;
7269
cv::Mat re(unpad_h, unpad_w, CV_8UC3);
@@ -79,23 +76,55 @@ namespace yolox_cpp
7976
// for NCHW
8077
void blobFromImage(const cv::Mat &img, float *blob_data)
8178
{
82-
blobFromImage_cpu(img, blob_data);
83-
// #if defined(CV_SIMD128) && CV_SIMD128 == 1
84-
// blobFromImage_simd(img, blob_data);
85-
// #else
86-
// blobFromImage_cpu(img, blob_data);
87-
// #endif
79+
const size_t channels = 3;
80+
const size_t img_h = img.rows;
81+
const size_t img_w = img.cols;
82+
const size_t img_hw = img_h * img_w;
83+
float *blob_data_ch0 = blob_data;
84+
float *blob_data_ch1 = blob_data + img_hw;
85+
float *blob_data_ch2 = blob_data + img_hw * 2;
86+
// HWC -> CHW
87+
if (this->model_version_ == "0.1.0")
88+
{
89+
for (size_t i = 0; i < img_hw; ++i)
90+
{
91+
// blob = (img / 255.0 - mean) / std
92+
const size_t src_idx = i * channels;
93+
blob_data_ch0[i] = static_cast<float>(img.data[src_idx + 0]) * this->std255_inv_[0] + this->mean_std_[0];
94+
blob_data_ch1[i] = static_cast<float>(img.data[src_idx + 1]) * this->std255_inv_[1] + this->mean_std_[1];
95+
blob_data_ch2[i] = static_cast<float>(img.data[src_idx + 2]) * this->std255_inv_[2] + this->mean_std_[2];
96+
}
97+
}
98+
else
99+
{
100+
for (size_t i = 0; i < img_hw; ++i)
101+
{
102+
const size_t src_idx = i * channels;
103+
blob_data_ch0[i] = static_cast<float>(img.data[src_idx + 0]);
104+
blob_data_ch1[i] = static_cast<float>(img.data[src_idx + 1]);
105+
blob_data_ch2[i] = static_cast<float>(img.data[src_idx + 2]);
106+
}
107+
}
88108
}
89109

90110
// for NHWC
91111
void blobFromImage_nhwc(const cv::Mat &img, float *blob_data)
92112
{
93-
blobFromImage_nhwc_cpu(img, blob_data);
94-
// #if defined(CV_SIMD128) && CV_SIMD128 == 1
95-
// blobFromImage_nhwc_simd(img, blob_data);
96-
// #else
97-
// blobFromImage_nhwc_cpu(img, blob_data);
98-
// #endif
113+
const size_t channels = 3;
114+
cv::Mat img_f32;
115+
img.convertTo(img_f32, CV_32FC3);
116+
if (this->model_version_ == "0.1.0")
117+
{
118+
std::vector<cv::Mat> img_f32_split(3);
119+
cv::split(img_f32, img_f32_split);
120+
for (size_t i = 0; i < channels; ++i)
121+
{
122+
img_f32_split[i] *= this->std255_inv_[i];
123+
img_f32_split[i] += this->mean_std_[i];
124+
}
125+
cv::merge(img_f32_split, img_f32);
126+
}
127+
blob_data = reinterpret_cast<float *>(img_f32.data);
99128
}
100129

101130
void generate_grids_and_stride(const int target_w, const int target_h, const std::vector<int> &strides, std::vector<GridAndStride> &grid_strides)
@@ -216,10 +245,10 @@ namespace yolox_cpp
216245

217246
std::sort(
218247
proposals.begin(), proposals.end(),
219-
[](const Object& a, const Object& b) {
248+
[](const Object &a, const Object &b)
249+
{
220250
return a.prob > b.prob; // descent
221-
}
222-
);
251+
});
223252

224253
std::vector<int> picked;
225254
nms_sorted_bboxes(proposals, picked, nms_thresh_);
@@ -251,170 +280,6 @@ namespace yolox_cpp
251280
objects[i].rect.height = y1 - y0;
252281
}
253282
}
254-
255-
private:
256-
#if defined(CV_SIMD128) && CV_SIMD128 == 1
257-
void blobFromImage_simd(const cv::Mat &img, float *blob_data)
258-
{
259-
const size_t channels = 3;
260-
const size_t img_h = img.rows;
261-
const size_t img_w = img.cols;
262-
const size_t img_hw = img_h * img_w;
263-
264-
const size_t step = 4; // load 4 pixel
265-
const size_t N = img_hw / step;
266-
const size_t remain = img_hw % step;
267-
268-
float *blob_data_ch0 = blob_data;
269-
float *blob_data_ch1 = blob_data + img_hw;
270-
float *blob_data_ch2 = blob_data + img_hw * 2;
271-
272-
if (this->model_version_ == "0.1.0")
273-
{
274-
cv::Mat img_f32;
275-
img.convertTo(img_f32, CV_32FC3);
276-
const cv::v_float32x4 mean_std0 = cv::v_setall_f32(-this->mean_std_[0]);
277-
const cv::v_float32x4 mean_std1 = cv::v_setall_f32(-this->mean_std_[1]);
278-
const cv::v_float32x4 mean_std2 = cv::v_setall_f32(-this->mean_std_[2]);
279-
const cv::v_float32x4 std255_inv_0 = cv::v_setall_f32(this->std255_inv_[0]);
280-
const cv::v_float32x4 std255_inv_1 = cv::v_setall_f32(this->std255_inv_[1]);
281-
const cv::v_float32x4 std255_inv_2 = cv::v_setall_f32(this->std255_inv_[2]);
282-
283-
for (size_t i = 0; i < N; ++i)
284-
{
285-
cv::v_float32x4 ch0_f;
286-
cv::v_float32x4 ch1_f;
287-
cv::v_float32x4 ch2_f;
288-
// load 4 pixel x 3ch
289-
cv::v_load_deinterleave(
290-
reinterpret_cast<const float*>(img_f32.data) + i * (step * channels),
291-
ch0_f, ch1_f, ch2_f);
292-
293-
{
294-
ch0_f = ch0_f * std255_inv_0 + mean_std0;
295-
ch1_f = ch1_f * std255_inv_1 + mean_std1;
296-
ch2_f = ch2_f * std255_inv_2 + mean_std2;
297-
}
298-
299-
cv::v_store(blob_data_ch0 + i * step, ch0_f);
300-
cv::v_store(blob_data_ch1 + i * step, ch1_f);
301-
cv::v_store(blob_data_ch2 + i * step, ch2_f);
302-
}
303-
}
304-
else
305-
{
306-
cv::Mat img_f32;
307-
img.convertTo(img_f32, CV_32FC3);
308-
for (size_t i = 0; i < N; ++i)
309-
{
310-
cv::v_float32x4 ch0_f;
311-
cv::v_float32x4 ch1_f;
312-
cv::v_float32x4 ch2_f;
313-
// load 4 pixel x 3ch
314-
cv::v_load_deinterleave(
315-
reinterpret_cast<const float*>(img_f32.data) + i * (step * channels),
316-
ch0_f, ch1_f, ch2_f);
317-
318-
cv::v_store(blob_data_ch0 + i * step, ch0_f);
319-
cv::v_store(blob_data_ch1 + i * step, ch1_f);
320-
cv::v_store(blob_data_ch2 + i * step, ch2_f);
321-
}
322-
}
323-
324-
if (remain > 0)
325-
{
326-
const size_t simd_done_num = N * step;
327-
if (this->model_version_ == "0.1.0")
328-
{
329-
for (size_t i = 0; i < remain; ++i)
330-
{
331-
// HWC -> CHW
332-
const size_t out_idx = simd_done_num + i;
333-
const size_t src_idx = out_idx * channels;
334-
blob_data_ch0[out_idx] = static_cast<float>(img.data[src_idx + 0]) * this->std255_inv_[0] + this->mean_std_[0];
335-
blob_data_ch1[out_idx] = static_cast<float>(img.data[src_idx + 1]) * this->std255_inv_[1] + this->mean_std_[1];
336-
blob_data_ch2[out_idx] = static_cast<float>(img.data[src_idx + 2]) * this->std255_inv_[2] + this->mean_std_[2];
337-
}
338-
}
339-
else
340-
{
341-
for (size_t i = 0; i < remain; ++i)
342-
{
343-
// HWC -> CHW
344-
const size_t out_idx = simd_done_num + i;
345-
const size_t src_idx = out_idx * channels;
346-
blob_data_ch0[out_idx] = static_cast<float>(img.data[src_idx + 0]);
347-
blob_data_ch1[out_idx] = static_cast<float>(img.data[src_idx + 1]);
348-
blob_data_ch2[out_idx] = static_cast<float>(img.data[src_idx + 2]);
349-
}
350-
351-
}
352-
}
353-
354-
}
355-
#endif
356-
void blobFromImage_cpu(const cv::Mat &img, float *blob_data)
357-
{
358-
const size_t channels = 3;
359-
const size_t img_h = img.rows;
360-
const size_t img_w = img.cols;
361-
const size_t img_hw = img_h * img_w;
362-
float *blob_data_ch0 = blob_data;
363-
float *blob_data_ch1 = blob_data + img_hw;
364-
float *blob_data_ch2 = blob_data + img_hw * 2;
365-
// HWC -> CHW
366-
if (this->model_version_ == "0.1.0")
367-
{
368-
for (size_t i = 0; i < img_hw; ++i)
369-
{
370-
// blob = (img / 255.0 - mean) / std
371-
const size_t src_idx = i * channels;
372-
blob_data_ch0[i] = static_cast<float>(img.data[src_idx + 0]) * this->std255_inv_[0] + this->mean_std_[0];
373-
blob_data_ch1[i] = static_cast<float>(img.data[src_idx + 1]) * this->std255_inv_[1] + this->mean_std_[1];
374-
blob_data_ch2[i] = static_cast<float>(img.data[src_idx + 2]) * this->std255_inv_[2] + this->mean_std_[2];
375-
}
376-
}
377-
else
378-
{
379-
for (size_t i = 0; i < img_hw; ++i)
380-
{
381-
// HWC -> CHW
382-
const size_t src_idx = i * channels;
383-
blob_data_ch0[i] = static_cast<float>(img.data[src_idx + 0]);
384-
blob_data_ch1[i] = static_cast<float>(img.data[src_idx + 1]);
385-
blob_data_ch2[i] = static_cast<float>(img.data[src_idx + 2]);
386-
}
387-
}
388-
389-
}
390-
void blobFromImage_nhwc_cpu(const cv::Mat &img, float *blob_data)
391-
{
392-
const size_t channels = 3;
393-
const size_t img_h = img.rows;
394-
const size_t img_w = img.cols;
395-
if (this->model_version_ == "0.1.0")
396-
{
397-
for (size_t i = 0; i < img_h * img_w; ++i)
398-
{
399-
for (size_t c = 0; c < channels; ++c)
400-
{
401-
// blob = (img / 255.0 - mean) / std
402-
blob_data[i * channels + c] =
403-
static_cast<float>(img.data[i * channels + c]) * this->std255_inv_[c] + this->mean_std_[c];
404-
}
405-
}
406-
}
407-
else
408-
{
409-
for (size_t i = 0; i < img_h * img_w; ++i)
410-
{
411-
for (size_t c = 0; c < channels; ++c)
412-
{
413-
blob_data[i * channels + c] = static_cast<float>(img.data[i * channels + c]); // 0.1.1rc0 or later
414-
}
415-
}
416-
}
417-
}
418283
};
419284
}
420285
#endif

0 commit comments

Comments
 (0)