22#define _YOLOX_CPP_CORE_HPP
33
44#include < opencv2/core/types.hpp>
5- #include < opencv2/core/simd_intrinsics.hpp>
6- #include < algorithm>
75
86namespace yolox_cpp
97{
@@ -34,9 +32,9 @@ namespace yolox_cpp
3432 {
3533 public:
3634 AbcYoloX () {}
37- AbcYoloX (const float nms_th = 0.45 , const float conf_th = 0.3 ,
35+ AbcYoloX (float nms_th = 0.45 , float conf_th = 0.3 ,
3836 const std::string &model_version = " 0.1.1rc0" ,
39- const int num_classes = 80 , const bool p6 = false )
37+ int num_classes = 80 , bool p6 = false )
4038 : nms_thresh_(nms_th), bbox_conf_thresh_(conf_th),
4139 num_classes_ (num_classes), p6_(p6), model_version_(model_version)
4240 {
@@ -56,17 +54,16 @@ namespace yolox_cpp
5654 const std::vector<float > std255_inv_ = {
5755 1.0 / (255.0 * 0.229 ), 1.0 / (255.0 * 0.224 ), 1.0 / (255.0 * 0.225 )};
5856 const std::vector<float > mean_std_ = {
59- -0.485 / 0.229 , -0.456 / 0.224 , -0.406 / 0.225 };
57+ -0.485 / 0.229 , -0.456 / 0.224 , -0.406 / 0.225 };
6058 const std::vector<int > strides_ = {8 , 16 , 32 };
6159 const std::vector<int > strides_p6_ = {8 , 16 , 32 , 64 };
6260 std::vector<GridAndStride> grid_strides_;
6361
6462 cv::Mat static_resize (const cv::Mat &img)
6563 {
6664 const float r = std::min (
67- static_cast <float >(input_w_) / (static_cast <float >(img.cols ) * 1 .0f ),
68- static_cast <float >(input_h_) / (static_cast <float >(img.rows ) * 1 .0f ));
69- // r = std::min(r, 1.0f);
65+ static_cast <float >(input_w_) / static_cast <float >(img.cols ),
66+ static_cast <float >(input_h_) / static_cast <float >(img.rows ));
7067 const int unpad_w = r * img.cols ;
7168 const int unpad_h = r * img.rows ;
7269 cv::Mat re (unpad_h, unpad_w, CV_8UC3);
@@ -79,23 +76,55 @@ namespace yolox_cpp
7976 // for NCHW
8077 void blobFromImage (const cv::Mat &img, float *blob_data)
8178 {
82- blobFromImage_cpu (img, blob_data);
83- // #if defined(CV_SIMD128) && CV_SIMD128 == 1
84- // blobFromImage_simd(img, blob_data);
85- // #else
86- // blobFromImage_cpu(img, blob_data);
87- // #endif
79+ const size_t channels = 3 ;
80+ const size_t img_h = img.rows ;
81+ const size_t img_w = img.cols ;
82+ const size_t img_hw = img_h * img_w;
83+ float *blob_data_ch0 = blob_data;
84+ float *blob_data_ch1 = blob_data + img_hw;
85+ float *blob_data_ch2 = blob_data + img_hw * 2 ;
86+ // HWC -> CHW
87+ if (this ->model_version_ == " 0.1.0" )
88+ {
89+ for (size_t i = 0 ; i < img_hw; ++i)
90+ {
91+ // blob = (img / 255.0 - mean) / std
92+ const size_t src_idx = i * channels;
93+ blob_data_ch0[i] = static_cast <float >(img.data [src_idx + 0 ]) * this ->std255_inv_ [0 ] + this ->mean_std_ [0 ];
94+ blob_data_ch1[i] = static_cast <float >(img.data [src_idx + 1 ]) * this ->std255_inv_ [1 ] + this ->mean_std_ [1 ];
95+ blob_data_ch2[i] = static_cast <float >(img.data [src_idx + 2 ]) * this ->std255_inv_ [2 ] + this ->mean_std_ [2 ];
96+ }
97+ }
98+ else
99+ {
100+ for (size_t i = 0 ; i < img_hw; ++i)
101+ {
102+ const size_t src_idx = i * channels;
103+ blob_data_ch0[i] = static_cast <float >(img.data [src_idx + 0 ]);
104+ blob_data_ch1[i] = static_cast <float >(img.data [src_idx + 1 ]);
105+ blob_data_ch2[i] = static_cast <float >(img.data [src_idx + 2 ]);
106+ }
107+ }
88108 }
89109
90110 // for NHWC
91111 void blobFromImage_nhwc (const cv::Mat &img, float *blob_data)
92112 {
93- blobFromImage_nhwc_cpu (img, blob_data);
94- // #if defined(CV_SIMD128) && CV_SIMD128 == 1
95- // blobFromImage_nhwc_simd(img, blob_data);
96- // #else
97- // blobFromImage_nhwc_cpu(img, blob_data);
98- // #endif
113+ const size_t channels = 3 ;
114+ cv::Mat img_f32;
115+ img.convertTo (img_f32, CV_32FC3);
116+ if (this ->model_version_ == " 0.1.0" )
117+ {
118+ std::vector<cv::Mat> img_f32_split (3 );
119+ cv::split (img_f32, img_f32_split);
120+ for (size_t i = 0 ; i < channels; ++i)
121+ {
122+ img_f32_split[i] *= this ->std255_inv_ [i];
123+ img_f32_split[i] += this ->mean_std_ [i];
124+ }
125+ cv::merge (img_f32_split, img_f32);
126+ }
127+ blob_data = reinterpret_cast <float *>(img_f32.data );
99128 }
100129
101130 void generate_grids_and_stride (const int target_w, const int target_h, const std::vector<int > &strides, std::vector<GridAndStride> &grid_strides)
@@ -216,10 +245,10 @@ namespace yolox_cpp
216245
217246 std::sort (
218247 proposals.begin (), proposals.end (),
219- [](const Object& a, const Object& b) {
248+ [](const Object &a, const Object &b)
249+ {
220250 return a.prob > b.prob ; // descent
221- }
222- );
251+ });
223252
224253 std::vector<int > picked;
225254 nms_sorted_bboxes (proposals, picked, nms_thresh_);
@@ -251,170 +280,6 @@ namespace yolox_cpp
251280 objects[i].rect .height = y1 - y0;
252281 }
253282 }
254-
255- private:
256- #if defined(CV_SIMD128) && CV_SIMD128 == 1
257- void blobFromImage_simd (const cv::Mat &img, float *blob_data)
258- {
259- const size_t channels = 3 ;
260- const size_t img_h = img.rows ;
261- const size_t img_w = img.cols ;
262- const size_t img_hw = img_h * img_w;
263-
264- const size_t step = 4 ; // load 4 pixel
265- const size_t N = img_hw / step;
266- const size_t remain = img_hw % step;
267-
268- float *blob_data_ch0 = blob_data;
269- float *blob_data_ch1 = blob_data + img_hw;
270- float *blob_data_ch2 = blob_data + img_hw * 2 ;
271-
272- if (this ->model_version_ == " 0.1.0" )
273- {
274- cv::Mat img_f32;
275- img.convertTo (img_f32, CV_32FC3);
276- const cv::v_float32x4 mean_std0 = cv::v_setall_f32 (-this ->mean_std_ [0 ]);
277- const cv::v_float32x4 mean_std1 = cv::v_setall_f32 (-this ->mean_std_ [1 ]);
278- const cv::v_float32x4 mean_std2 = cv::v_setall_f32 (-this ->mean_std_ [2 ]);
279- const cv::v_float32x4 std255_inv_0 = cv::v_setall_f32 (this ->std255_inv_ [0 ]);
280- const cv::v_float32x4 std255_inv_1 = cv::v_setall_f32 (this ->std255_inv_ [1 ]);
281- const cv::v_float32x4 std255_inv_2 = cv::v_setall_f32 (this ->std255_inv_ [2 ]);
282-
283- for (size_t i = 0 ; i < N; ++i)
284- {
285- cv::v_float32x4 ch0_f;
286- cv::v_float32x4 ch1_f;
287- cv::v_float32x4 ch2_f;
288- // load 4 pixel x 3ch
289- cv::v_load_deinterleave (
290- reinterpret_cast <const float *>(img_f32.data ) + i * (step * channels),
291- ch0_f, ch1_f, ch2_f);
292-
293- {
294- ch0_f = ch0_f * std255_inv_0 + mean_std0;
295- ch1_f = ch1_f * std255_inv_1 + mean_std1;
296- ch2_f = ch2_f * std255_inv_2 + mean_std2;
297- }
298-
299- cv::v_store (blob_data_ch0 + i * step, ch0_f);
300- cv::v_store (blob_data_ch1 + i * step, ch1_f);
301- cv::v_store (blob_data_ch2 + i * step, ch2_f);
302- }
303- }
304- else
305- {
306- cv::Mat img_f32;
307- img.convertTo (img_f32, CV_32FC3);
308- for (size_t i = 0 ; i < N; ++i)
309- {
310- cv::v_float32x4 ch0_f;
311- cv::v_float32x4 ch1_f;
312- cv::v_float32x4 ch2_f;
313- // load 4 pixel x 3ch
314- cv::v_load_deinterleave (
315- reinterpret_cast <const float *>(img_f32.data ) + i * (step * channels),
316- ch0_f, ch1_f, ch2_f);
317-
318- cv::v_store (blob_data_ch0 + i * step, ch0_f);
319- cv::v_store (blob_data_ch1 + i * step, ch1_f);
320- cv::v_store (blob_data_ch2 + i * step, ch2_f);
321- }
322- }
323-
324- if (remain > 0 )
325- {
326- const size_t simd_done_num = N * step;
327- if (this ->model_version_ == " 0.1.0" )
328- {
329- for (size_t i = 0 ; i < remain; ++i)
330- {
331- // HWC -> CHW
332- const size_t out_idx = simd_done_num + i;
333- const size_t src_idx = out_idx * channels;
334- blob_data_ch0[out_idx] = static_cast <float >(img.data [src_idx + 0 ]) * this ->std255_inv_ [0 ] + this ->mean_std_ [0 ];
335- blob_data_ch1[out_idx] = static_cast <float >(img.data [src_idx + 1 ]) * this ->std255_inv_ [1 ] + this ->mean_std_ [1 ];
336- blob_data_ch2[out_idx] = static_cast <float >(img.data [src_idx + 2 ]) * this ->std255_inv_ [2 ] + this ->mean_std_ [2 ];
337- }
338- }
339- else
340- {
341- for (size_t i = 0 ; i < remain; ++i)
342- {
343- // HWC -> CHW
344- const size_t out_idx = simd_done_num + i;
345- const size_t src_idx = out_idx * channels;
346- blob_data_ch0[out_idx] = static_cast <float >(img.data [src_idx + 0 ]);
347- blob_data_ch1[out_idx] = static_cast <float >(img.data [src_idx + 1 ]);
348- blob_data_ch2[out_idx] = static_cast <float >(img.data [src_idx + 2 ]);
349- }
350-
351- }
352- }
353-
354- }
355- #endif
356- void blobFromImage_cpu (const cv::Mat &img, float *blob_data)
357- {
358- const size_t channels = 3 ;
359- const size_t img_h = img.rows ;
360- const size_t img_w = img.cols ;
361- const size_t img_hw = img_h * img_w;
362- float *blob_data_ch0 = blob_data;
363- float *blob_data_ch1 = blob_data + img_hw;
364- float *blob_data_ch2 = blob_data + img_hw * 2 ;
365- // HWC -> CHW
366- if (this ->model_version_ == " 0.1.0" )
367- {
368- for (size_t i = 0 ; i < img_hw; ++i)
369- {
370- // blob = (img / 255.0 - mean) / std
371- const size_t src_idx = i * channels;
372- blob_data_ch0[i] = static_cast <float >(img.data [src_idx + 0 ]) * this ->std255_inv_ [0 ] + this ->mean_std_ [0 ];
373- blob_data_ch1[i] = static_cast <float >(img.data [src_idx + 1 ]) * this ->std255_inv_ [1 ] + this ->mean_std_ [1 ];
374- blob_data_ch2[i] = static_cast <float >(img.data [src_idx + 2 ]) * this ->std255_inv_ [2 ] + this ->mean_std_ [2 ];
375- }
376- }
377- else
378- {
379- for (size_t i = 0 ; i < img_hw; ++i)
380- {
381- // HWC -> CHW
382- const size_t src_idx = i * channels;
383- blob_data_ch0[i] = static_cast <float >(img.data [src_idx + 0 ]);
384- blob_data_ch1[i] = static_cast <float >(img.data [src_idx + 1 ]);
385- blob_data_ch2[i] = static_cast <float >(img.data [src_idx + 2 ]);
386- }
387- }
388-
389- }
390- void blobFromImage_nhwc_cpu (const cv::Mat &img, float *blob_data)
391- {
392- const size_t channels = 3 ;
393- const size_t img_h = img.rows ;
394- const size_t img_w = img.cols ;
395- if (this ->model_version_ == " 0.1.0" )
396- {
397- for (size_t i = 0 ; i < img_h * img_w; ++i)
398- {
399- for (size_t c = 0 ; c < channels; ++c)
400- {
401- // blob = (img / 255.0 - mean) / std
402- blob_data[i * channels + c] =
403- static_cast <float >(img.data [i * channels + c]) * this ->std255_inv_ [c] + this ->mean_std_ [c];
404- }
405- }
406- }
407- else
408- {
409- for (size_t i = 0 ; i < img_h * img_w; ++i)
410- {
411- for (size_t c = 0 ; c < channels; ++c)
412- {
413- blob_data[i * channels + c] = static_cast <float >(img.data [i * channels + c]); // 0.1.1rc0 or later
414- }
415- }
416- }
417- }
418283 };
419284}
420285#endif
0 commit comments