可以用自己訓練的模型後綴名是.weights,對應的配置文件後綴名是.cfg。如果沒有自己訓練的模型可以到YOLO官網下載預訓練好的模型。
自己訓練模型可以參考darknet-YOLO系列博客。
int main()
{
std::string modelPath = "yolov3.weights";
std::string configPath = "yolov3.cfg";
std::string labelPath = "coco.names";
std::string imagePath = "test.jpg"
int networkW = 416;
int networkH = 416;
float thresh = 0.5;
float nms = 0.45;
dnn::Net net= dnn::readNet(modelPath , configPath );//我用的OpenCV4.1.1版本,調用dnn::readNetFromDarknet會報錯,調用dnn::readNet可以正常識別
vector<String> labels;
int err = readClassNamesFromFile(labelPath , labels);
std::vector<String> outNames = net.getUnconnectedOutLayersNames();
Mat srcImg = imread(imagePath);
Mat inputBlob = dnn::blobFromImage(srcImg, 1.0/255, Size(networkW, networkH), Scalar(), false, false);//這裏因爲訓練時對圖像做了歸一化,所以在推理的時候也要對圖像進行歸一化
net.setInput(blob);
vector<Mat> outs;
net.forward(outs, outNames);
postprocess(srcImg, outs, net, thresh, nms);
return 0;
}
int readClassNamesFromFile(String fileName, vector<String>& classNames)
{
ifstream fp(fileName, ios::in);
if (!fp.is_open())
{
cout << "can not open file " << fileName << endl;
return -1;
}
String name;
while (!fp.eof())
{
getline(fp, name);
if (name.length())
{
classNames.push_back(name);
}
}
fp.close();
return 0;
}
void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net, float thresh, float nms)
{
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<Rect> boxes;
if (outLayerType == "Region")
{
for (size_t i = 0; i < outs.size(); ++i)
{
//網絡輸出的數據是一個NxC矩陣向量,N是檢測到的目標數量,C的類別數 + 4
//開始的4個數據是[center_x, center_y, width, height]
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > thresh)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
std::vector<int> indices;
NMSBoxes(boxes, confidences, thresh, nms, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
drawPrediction(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
}
}
int drawPrediction(const vector<String>& labelNames, int classId, float conf, int left, int top, int right, int bottom, Mat& img)
{
rectangle(img, Point(left, top), Point(right, bottom), cv::Scalar(255, 0, 0), 2);
if (labelNames.empty())
{
cout << "labelNames is empty!" << endl;
return -1;
}
if (classId >= (int)labelNames.size())
{
cout << "classId is out of boundary!" << endl;
return -1;
}
String label = labelNames[classId];
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
putText(img, label, Point(left, top), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(255, 0, 0), 1);
return 0;
}