void SampleMNIST::constructNetwork(SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser, SampleUniquePtr<nvinfer1::INetworkDefinition>& network){// 解析 caffe 的模型文件与权重文件,将结果写入 network 中const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(
mParams.prototxtFileName.c_str(),
mParams.weightsFileName.c_str(),*network,
nvinfer1::DataType::kFLOAT);// 标记模型输出for(auto& s : mParams.outputTensorNames){
network->markOutput(*blobNameToTensor->find(s.c_str()));}// 在模型开头添加 `输入图片减去平均数` 操作// add mean subtraction to the beginning of the network
nvinfer1::Dims inputDims = network->getInput(0)->getDimensions();
mMeanBlob = SampleUniquePtr<nvcaffeparser1::IBinaryProtoBlob>(parser->parseBinaryProto(mParams.meanFileName.c_str()));
nvinfer1::Weights meanWeights{nvinfer1::DataType::kFLOAT, mMeanBlob->getData(), inputDims.d[1]* inputDims.d[2]};// For this sample, a large range based on the mean data is chosen and applied to the head of the network.// After the mean subtraction occurs, the range is expected to be between -127 and 127, so the rest of the network// is given a generic range.// The preferred method is use scales computed based on a representative data set// and apply each one individually based on the tensor. The range here is large enough for the// network, but is chosen for example purposes only.float maxMean = samplesCommon::getMaxValue(static_cast<constfloat*>(meanWeights.values), samplesCommon::volume(inputDims));// 模型中添加常量(图片channel均值)auto mean = network->addConstant(nvinfer1::Dims3(1, inputDims.d[1], inputDims.d[2]), meanWeights);
mean->getOutput(0)->setDynamicRange(-maxMean, maxMean);
network->getInput(0)->setDynamicRange(-maxMean, maxMean);// 添加 减均值 操作auto meanSub = network->addElementWise(*network->getInput(0),*mean->getOutput(0), ElementWiseOperation::kSUB);
meanSub->getOutput(0)->setDynamicRange(-maxMean, maxMean);
network->getLayer(0)->setInput(0,*meanSub->getOutput(0));// 执行缩放,输出结果为 [-1, 1]
samplesCommon::setAllTensorScales(network.get(),127.0f,127.0f);}
2.4. 模型推理
主要工作:就是将转换好的模型在tensorrt engine上跑一边。
里面用到的各种东西比较多,目前也看不懂。
主要通过 infer() 函数完成,本函数的主要操作就是:
读取输入数据(processInput)。
通过 cuda stream / buffer 等进行推理。
判断输出结果是否正确(verifyOutput)。
bool SampleMNIST::infer(){// 实现具体推理过程// 缓存对象管理// Create RAII buffer manager object
samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);// 创建上下文auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());if(!context){returnfalse;}// 随机选择一个数字// Pick a random digit to try to infersrand(time(NULL));constint digit =rand()%10;// 读取输入数据到缓存对象中// 即将 digit 写入 buffers 中,名字为 mParams.inputTensorNames[0]// Read the input data into the managed buffers// There should be just 1 input tensorassert(mParams.inputTensorNames.size()==1);if(!processInput(buffers, mParams.inputTensorNames[0], digit)){returnfalse;}// 创建 cuda 流,准备执行推理// Create CUDA stream for the execution of this inference.
cudaStream_t stream;CHECK(cudaStreamCreate(&stream));// 异步将数据从主机输入缓冲区(buffer)复制到设备输入缓冲区(stream)// Asynchronously copy data from host input buffers to device input buffers
buffers.copyInputToDeviceAsync(stream);// 异步将推理任务加入队列中// Asynchronously enqueue the inference workif(!context->enqueue(mParams.batchSize, buffers.getDeviceBindings().data(), stream,nullptr)){returnfalse;}// 异步将模型结果从设备(stream)保存到主机缓冲区(buffers)// Asynchronously copy data from device output buffers to host output buffers
buffers.copyOutputToHostAsync(stream);// 等待工作结束,关闭stream// Wait for the work in the stream to completecudaStreamSynchronize(stream);// Release streamcudaStreamDestroy(stream);// 得到结果,判断结果是否准确// 即从 buffer 中获取名为 mParams.outputTensorNames[0] 的结果,判断与digit是否相同// Check and print the output of the inference// There should be just one output tensorassert(mParams.outputTensorNames.size()==1);bool outputCorrect =verifyOutput(buffers, mParams.outputTensorNames[0], digit);return outputCorrect;}
读取输入数据
这部分没啥好说的。
bool SampleMNIST::processInput(const samplesCommon::BufferManager& buffers,const std::string& inputTensorName,int inputFileIdx)const{constint inputH = mInputDims.d[1];constint inputW = mInputDims.d[2];// Read a random digit filesrand(unsigned(time(nullptr)));
std::vector<uint8_t>fileData(inputH * inputW);readPGMFile(locateFile(std::to_string(inputFileIdx)+".pgm", mParams.dataDirs), fileData.data(), inputH, inputW);// Print ASCII representation of digit
gLogInfo <<"Input:\n";for(int i =0; i < inputH * inputW; i++){
gLogInfo <<(" .:-=+*#%@"[fileData[i]/26])<<(((i +1)% inputW)?"":"\n");}
gLogInfo << std::endl;float* hostInputBuffer =static_cast<float*>(buffers.getHostBuffer(inputTensorName));for(int i =0; i < inputH * inputW; i++){
hostInputBuffer[i]=float(fileData[i]);}returntrue;}
验证输出结果
还会输出可视化结果,有种梦回当年的感觉。
bool SampleMNIST::verifyOutput(const samplesCommon::BufferManager& buffers,const std::string& outputTensorName,int groundTruthDigit)const{// 获取 host buffer 中的输出tensor数值// 应该是10个数字的概率constfloat* prob =static_cast<constfloat*>(buffers.getHostBuffer(outputTensorName));// Print histogram of the output distribution
gLogInfo <<"Output:\n";float val{0.0f};int idx{0};constint kDIGITS =10;for(int i =0; i < kDIGITS; i++){if(val < prob[i]){
val = prob[i];
idx = i;}
gLogInfo << i <<": "<< std::string(int(std::floor(prob[i]*10+0.5f)),'*')<<"\n";}
gLogInfo << std::endl;return(idx == groundTruthDigit && val >0.9f);}