Skip to content

Commit 16a618d

Browse files
committed
replace tensorrt deprecated function
1 parent 4605de8 commit 16a618d

2 files changed

Lines changed: 55 additions & 36 deletions

File tree

yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ namespace yolox_cpp{
3636
YoloXTensorRT(file_name_t path_to_engine, int device=0,
3737
float nms_th=0.45, float conf_th=0.3, std::string model_version="0.1.1rc0",
3838
int num_classes=80, bool p6=false);
39+
~YoloXTensorRT();
3940
std::vector<Object> inference(const cv::Mat& frame) override;
4041

4142
private:
@@ -49,6 +50,7 @@ namespace yolox_cpp{
4950
int output_size_;
5051
const int inputIndex_ = 0;
5152
const int outputIndex_ = 1;
53+
void *inference_buffers_[2];
5254

5355
};
5456
} // namespace yolox_cpp

yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,32 @@
11
#include "yolox_cpp/yolox_tensorrt.hpp"
22

3-
namespace yolox_cpp{
3+
namespace yolox_cpp
4+
{
45

56
YoloXTensorRT::YoloXTensorRT(file_name_t path_to_engine, int device,
67
float nms_th, float conf_th, std::string model_version,
78
int num_classes, bool p6)
8-
:AbcYoloX(nms_th, conf_th, model_version, num_classes, p6),
9-
DEVICE_(device)
9+
: AbcYoloX(nms_th, conf_th, model_version, num_classes, p6),
10+
DEVICE_(device)
1011
{
1112
cudaSetDevice(this->DEVICE_);
1213
// create a model using the API directly and serialize it to a stream
1314
char *trtModelStream{nullptr};
1415
size_t size{0};
1516

1617
std::ifstream file(path_to_engine, std::ios::binary);
17-
if (file.good()) {
18+
if (file.good())
19+
{
1820
file.seekg(0, file.end);
1921
size = file.tellg();
2022
file.seekg(0, file.beg);
2123
trtModelStream = new char[size];
2224
assert(trtModelStream);
2325
file.read(trtModelStream, size);
2426
file.close();
25-
}else{
27+
}
28+
else
29+
{
2630
std::cerr << "invalid arguments path_to_engine: " << path_to_engine << std::endl;
2731
return;
2832
}
@@ -35,28 +39,41 @@ namespace yolox_cpp{
3539
assert(this->context_ != nullptr);
3640
delete[] trtModelStream;
3741

38-
auto input_dims = this->engine_->getBindingDimensions(0);
42+
const auto input_name = this->engine_->getIOTensorName(this->inputIndex_);
43+
const auto input_dims = this->engine_->getTensorShape(input_name);
3944
this->input_h_ = input_dims.d[2];
4045
this->input_w_ = input_dims.d[3];
4146
std::cout << "INPUT_HEIGHT: " << this->input_h_ << std::endl;
4247
std::cout << "INPUT_WIDTH: " << this->input_w_ << std::endl;
4348

44-
auto out_dims = this->engine_->getBindingDimensions(1);
49+
const auto output_name = this->engine_->getIOTensorName(this->outputIndex_);
50+
auto output_dims = this->engine_->getTensorShape(output_name);
4551
this->output_size_ = 1;
46-
for(int j=0; j<out_dims.nbDims; ++j) {
47-
this->output_size_ *= out_dims.d[j];
52+
for (int j = 0; j < output_dims.nbDims; ++j)
53+
{
54+
this->output_size_ *= output_dims.d[j];
4855
}
4956

5057
// Pointers to input and output device buffers to pass to engine.
5158
// Engine requires exactly IEngine::getNbBindings() number of buffers.
52-
assert(this->engine_->getNbBindings() == 2);
59+
assert(this->engine_->getNbIOTensors() == 2);
5360
// In order to bind the buffers, we need to know the names of the input and output tensors.
5461
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
55-
assert(this->engine_->getBindingDataType(this->inputIndex_) == nvinfer1::DataType::kFLOAT);
56-
assert(this->engine_->getBindingDataType(this->outputIndex_) == nvinfer1::DataType::kFLOAT);
62+
assert(this->engine_->getTensorDataType(input_name) == nvinfer1::DataType::kFLOAT);
63+
assert(this->engine_->getTensorDataType(output_name) == nvinfer1::DataType::kFLOAT);
64+
65+
// Create GPU buffers on device
66+
CHECK(cudaMalloc(&this->inference_buffers_[this->inputIndex_], 3 * this->input_h_ * this->input_w_ * sizeof(float)));
67+
CHECK(cudaMalloc(&this->inference_buffers_[this->outputIndex_], this->output_size_ * sizeof(float)));
68+
69+
assert(this->context_->setInputShape(input_name, input_dims));
70+
assert(this->context_->allInputDimensionsSpecified());
71+
72+
assert(this->context_->setTensorAddress(input_name, this->inference_buffers_[this->inputIndex_]));
73+
assert(this->context_->setTensorAddress(output_name, this->inference_buffers_[this->outputIndex_]));
5774

5875
// Prepare GridAndStrides
59-
if(this->p6_)
76+
if (this->p6_)
6077
{
6178
generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_p6_, this->grid_strides_);
6279
}
@@ -66,18 +83,24 @@ namespace yolox_cpp{
6683
}
6784
}
6885

69-
std::vector<Object> YoloXTensorRT::inference(const cv::Mat& frame)
86+
YoloXTensorRT::~YoloXTensorRT()
87+
{
88+
CHECK(cudaFree(inference_buffers_[this->inputIndex_]));
89+
CHECK(cudaFree(inference_buffers_[this->outputIndex_]));
90+
}
91+
92+
std::vector<Object> YoloXTensorRT::inference(const cv::Mat &frame)
7093
{
7194
// preprocess
7295
auto pr_img = static_resize(frame);
73-
float* input_blob = new float[pr_img.total()*3];
96+
float *input_blob = new float[pr_img.total() * 3];
7497
blobFromImage(pr_img, input_blob);
7598

7699
// inference
77-
float* output_blob = new float[this->output_size_];
100+
float *output_blob = new float[this->output_size_];
78101
this->doInference(input_blob, output_blob);
79102

80-
float scale = std::min(this->input_w_ / (frame.cols*1.0), this->input_h_ / (frame.rows*1.0));
103+
float scale = std::min(this->input_w_ / (frame.cols * 1.0), this->input_h_ / (frame.rows * 1.0));
81104

82105
std::vector<Object> objects;
83106
decode_outputs(output_blob, this->grid_strides_, objects, this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
@@ -87,31 +110,25 @@ namespace yolox_cpp{
87110
return objects;
88111
}
89112

90-
void YoloXTensorRT::doInference(float* input, float* output)
113+
void YoloXTensorRT::doInference(float *input, float *output)
91114
{
92-
// Pointers to input and output device buffers to pass to engine.
93-
// Engine requires exactly IEngine::getNbBindings() number of buffers.
94-
void* buffers[2];
95-
96-
// Create GPU buffers on device
97-
CHECK(cudaMalloc(&buffers[this->inputIndex_], 3 * this->input_h_ * this->input_w_ * sizeof(float)));
98-
CHECK(cudaMalloc(&buffers[this->outputIndex_], this->output_size_ * sizeof(float)));
99-
100115
// Create stream
101116
cudaStream_t stream;
102117
CHECK(cudaStreamCreate(&stream));
103118

104119
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
105-
CHECK(cudaMemcpyAsync(buffers[this->inputIndex_], input, 3 * this->input_h_ * this->input_w_ * sizeof(float), cudaMemcpyHostToDevice, stream));
106-
context_->enqueueV2(buffers, stream, nullptr);
107-
CHECK(cudaMemcpyAsync(output, buffers[this->outputIndex_], this->output_size_ * sizeof(float), cudaMemcpyDeviceToHost, stream));
108-
cudaStreamSynchronize(stream);
109-
110-
// Release stream and buffers
111-
cudaStreamDestroy(stream);
112-
CHECK(cudaFree(buffers[0]));
113-
CHECK(cudaFree(buffers[1]));
120+
CHECK(cudaMemcpyAsync(this->inference_buffers_[this->inputIndex_], input, 3 * this->input_h_ * this->input_w_ * sizeof(float), cudaMemcpyHostToDevice, stream));
121+
122+
bool success = context_->enqueueV3(stream);
123+
if (!success)
124+
throw std::runtime_error("failed inference");
125+
126+
CHECK(cudaMemcpyAsync(output, this->inference_buffers_[this->outputIndex_], this->output_size_ * sizeof(float), cudaMemcpyDeviceToHost, stream));
127+
128+
CHECK(cudaStreamSynchronize(stream));
129+
130+
// Release stream
131+
CHECK(cudaStreamDestroy(stream));
114132
}
115133

116134
} // namespace yolox_cpp
117-

0 commit comments

Comments
 (0)