@[使用onnxruntime c++ API实现yolov5m视频检测]
本文演示了yolov5m从模型导出到onnxruntime推理的过程
一.创建容器
docker run --shm-size=32g -ti --privileged --net=host \
--rm \
-v $PWD:/home -w /home ghcr.io/intel/llvm/ubuntu2204_base /bin/bash
二.安装依赖
apt install libopencv-dev -y
wget https://github.com/microsoft/onnxruntime/releases/download/v1.19.2/onnxruntime-linux-x64-1.19.2.tgz
tar -xf onnxruntime-linux-x64-1.19.2.tgz
三.生成onnx模型
rm yolov5 -rf
git clone https://github.com/ultralytics/yolov5.git
cd yolov5
pip install -r requirements.txt
wget https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m.pt
python export.py --weights yolov5m.pt --include onnx --img 640
mv yolov5m.onnx ../
cd ..
四.生成类别名
tee gen_names.py<<-'EOF'
import yaml
data=yaml.safe_load(open("yolov5/data/coco.yaml","r"))
with open("coco.names","w") as f:
for name in list(data['names'].values()):
f.write(f"{name}\n")
EOF
python gen_names.py
五.运行测试程序
tee yolov5_onnxruntime.cpp<<-'EOF'
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
using namespace std;
using namespace cv;
// NMS参数
float confThreshold = 0.25; // 置信度阈值
float nmsThreshold = 0.45; // NMS 阈值
int inpWidth = 640; // 网络输入宽度
int inpHeight = 640; // 网络输入高度
// COCO 数据集类别名称
vector<string> classes;
// 加载类别名称
void loadClasses(const string& classesFile) {
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line)) {
classes.push_back(line);
}
}
// 后处理,解析模型输出并进行NMS
void postprocess(const Mat& frame, const vector<vector<Mat>>& outputs);
// 自定义 NMSBoxes 函数
void NMSBoxesCustom(const vector<Rect>& boxes, const vector<float>& scores, float scoreThreshold, float nmsThreshold, vector<int>& indices);
int main(int argc, char** argv) {
// 检查参数
if (argc != 4) {
cout << "用法: ./yolov5_onnxruntime <yolov5.onnx> <classes.names> <input.mp4>" << endl;
return -1;
}
string model_path = argv[1];
string classesFile = argv[2];
string video_path = argv[3];
// 加载类别名称
loadClasses(classesFile);
// 初始化 ONNX Runtime 环境
Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "YoloV5");
Ort::SessionOptions session_options;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
// 如果需要使用GPU,需要启用CUDA
// OrtCUDAProviderOptions cuda_options;
// session_options.AppendExecutionProvider_CUDA(cuda_options);
// 创建会话
printf("%s\n",model_path.c_str());
Ort::Session session(env, model_path.c_str(), session_options);
// 获取输入输出节点信息
Ort::AllocatorWithDefaultOptions allocator;
// 输入节点
size_t num_input_nodes = session.GetInputCount();
printf("num_input_nodes:%d\n",num_input_nodes);
vector<const char*> input_node_names(num_input_nodes);
vector<int64_t> input_node_dims;
for (int i = 0; i < num_input_nodes; i++) {
// 获取输入节点名
Ort::AllocatedStringPtr input_name = session.GetInputNameAllocated(i, allocator);
printf("input_name:%s\n",input_name.get());
input_node_names[i] = strdup(input_name.get());
// 获取输入节点维度
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
input_node_dims = tensor_info.GetShape();
}
// 输出节点
size_t num_output_nodes = session.GetOutputCount();
printf("num_output_nodes:%d\n",num_output_nodes);
vector<const char*> output_node_names(num_output_nodes);
for (int i = 0; i < num_output_nodes; i++) {
Ort::AllocatedStringPtr output_name = session.GetOutputNameAllocated(i, allocator);
printf("output_name:%s\n",output_name.get());
output_node_names[i] = strdup(output_name.get());
}
// 打开视频文件
VideoCapture cap(video_path);
if (!cap.isOpened()) {
cerr << "无法打开视频文件!" << endl;
return -1;
}
Mat frame;
while (cap.read(frame)) {
// 图像预处理
Mat img;
resize(frame, img, Size(inpWidth, inpHeight));
cvtColor(img, img, COLOR_BGR2RGB);
img.convertTo(img, CV_32F, 1.0 / 255.0);
// 转换为CHW格式
vector<float> img_data;
int channels = img.channels();
int img_h = img.rows;
int img_w = img.cols;
img_data.resize(channels * img_h * img_w);
vector<Mat> chw;
for (int i = 0; i < channels; ++i) {
Mat channel(img.rows, img.cols, CV_32FC1, img_data.data() + i * img_h * img_w);
chw.push_back(channel);
}
split(img, chw);
// 创建输入张量
array<int64_t, 4> input_shape{1, channels, img_h, img_w};
size_t input_tensor_size = img_data.size();
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(allocator.GetInfo(), img_data.data(), input_tensor_size, input_shape.data(), input_shape.size());
// 进行推理
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), num_output_nodes);
// 解析输出
vector<vector<Mat>> outputs;
for (auto& tensor : output_tensors) {
float* output_data = tensor.GetTensorMutableData<float>();
auto type_info = tensor.GetTensorTypeAndShapeInfo();
vector<int64_t> output_shape = type_info.GetShape();
// 将输出数据转换为 Mat
int rows = output_shape[1];
int dimensions = output_shape[2];
Mat output = Mat(rows, dimensions, CV_32F, output_data);
// 将输出添加到列表
outputs.push_back({output});
}
// 后处理
postprocess(frame, outputs);
// 显示结果
imwrite("Detection.jpg", frame);
break;
}
cap.release();
destroyAllWindows();
return 0;
}
void postprocess(const Mat& frame, const vector<vector<Mat>>& outputs) {
// 存储检测结果
vector<int> classIds;
vector<float> confidences;
vector<Rect> boxes;
int img_w = frame.cols;
int img_h = frame.rows;
float x_factor = img_w / (float)inpWidth;
float y_factor = img_h / (float)inpHeight;
// 遍历检测结果
for (size_t i = 0; i < outputs.size(); ++i) {
Mat detections = outputs[i][0];
int rows = detections.rows;
for (int r = 0; r < rows; ++r) {
float confidence = detections.at<float>(r, 4);
if (confidence >= confThreshold) {
Mat scores = detections.row(r).colRange(5, detections.cols);
Point classIdPoint;
double maxClassScore;
minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
if (maxClassScore >= confThreshold) {
// 解析坐标
float cx = detections.at<float>(r, 0);
float cy = detections.at<float>(r, 1);
float w = detections.at<float>(r, 2);
float h = detections.at<float>(r, 3);
int left = int((cx - 0.5 * w) * x_factor);
int top = int((cy - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
classIds.push_back(classIdPoint.x);
confidences.push_back(confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
}
// 执行自定义非极大值抑制
vector<int> indices;
NMSBoxesCustom(boxes, confidences, confThreshold, nmsThreshold, indices);
// 绘制检测框
for (size_t i = 0; i < indices.size(); ++i) {
int idx = indices[i];
Rect box = boxes[idx];
// 绘制边界框
rectangle(frame, box, Scalar(0, 255, 0), 2);
// 显示类别名称和置信度
string label = format("%.2f", confidences[idx]);
if (!classes.empty()) {
CV_Assert(classIds[idx] < (int)classes.size());
label = classes[classIds[idx]] + ":" + label;
}
printf("%02d %04d,%04d,%04d,%04d\n",classIds[idx],box.x,box.y,box.width,box.height);
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int top = max(box.y, labelSize.height);
rectangle(frame, Point(box.x, top - labelSize.height),
Point(box.x + labelSize.width, top + baseLine),
Scalar::all(255), FILLED);
putText(frame, label, Point(box.x, top),
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0), 1);
}
}
// 自定义 NMSBoxes 函数实现
void NMSBoxesCustom(const vector<Rect>& boxes, const vector<float>& scores, float scoreThreshold, float nmsThreshold, vector<int>& indices) {
// 创建一个向量,包含每个框的索引
vector<int> idxs;
for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] >= scoreThreshold) {
idxs.push_back(i);
}
}
// 如果没有满足条件的框,返回空的索引
if (idxs.empty()) {
return;
}
// 根据置信度分数对索引进行排序(从高到低)
sort(idxs.begin(), idxs.end(), [&scores](int i1, int i2) {
return scores[i1] > scores[i2];
});
vector<bool> suppressed(idxs.size(), false);
// 进行 NMS 处理
for (size_t i = 0; i < idxs.size(); ++i) {
if (suppressed[i]) {
continue;
}
int idx_i = idxs[i];
indices.push_back(idx_i);
Rect box_i = boxes[idx_i];
for (size_t j = i + 1; j < idxs.size(); ++j) {
if (suppressed[j]) {
continue;
}
int idx_j = idxs[j];
Rect box_j = boxes[idx_j];
// 计算 IoU(交并比)
float iou = (box_i & box_j).area() / float((box_i | box_j).area());
// 如果 IoU 大于阈值,抑制当前框
if (iou > nmsThreshold) {
suppressed[j] = true;
}
}
}
}
EOF
g++ yolov5_onnxruntime.cpp -o yolov5_onnxruntime `pkg-config --cflags --libs opencv4` \
-I onnxruntime-linux-x64-1.19.2/include -L onnxruntime-linux-x64-1.19.2/lib -lonnxruntime \
-Wl,-rpath onnxruntime-linux-x64-1.19.2/lib
./yolov5_onnxruntime yolov5m.onnx coco.names input.mp4