void SampleMNIST::constructNetwork(SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser, SampleUniquePtr<nvinfer1::INetworkDefinition>& network){// 解析 caffe 的模型文件與權重文件,將結果寫入 network 中const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(
mParams.prototxtFileName.c_str(),
mParams.weightsFileName.c_str(),*network,
nvinfer1::DataType::kFLOAT);// 標記模型輸出for(auto& s : mParams.outputTensorNames){
network->markOutput(*blobNameToTensor->find(s.c_str()));}// 在模型開頭添加 `輸入圖片減去平均數` 操作// add mean subtraction to the beginning of the network
nvinfer1::Dims inputDims = network->getInput(0)->getDimensions();
mMeanBlob = SampleUniquePtr<nvcaffeparser1::IBinaryProtoBlob>(parser->parseBinaryProto(mParams.meanFileName.c_str()));
nvinfer1::Weights meanWeights{nvinfer1::DataType::kFLOAT, mMeanBlob->getData(), inputDims.d[1]* inputDims.d[2]};// For this sample, a large range based on the mean data is chosen and applied to the head of the network.// After the mean subtraction occurs, the range is expected to be between -127 and 127, so the rest of the network// is given a generic range.// The preferred method is use scales computed based on a representative data set// and apply each one individually based on the tensor. The range here is large enough for the// network, but is chosen for example purposes only.float maxMean = samplesCommon::getMaxValue(static_cast<constfloat*>(meanWeights.values), samplesCommon::volume(inputDims));// 模型中添加常量(圖片channel均值)auto mean = network->addConstant(nvinfer1::Dims3(1, inputDims.d[1], inputDims.d[2]), meanWeights);
mean->getOutput(0)->setDynamicRange(-maxMean, maxMean);
network->getInput(0)->setDynamicRange(-maxMean, maxMean);// 添加 減均值 操作auto meanSub = network->addElementWise(*network->getInput(0),*mean->getOutput(0), ElementWiseOperation::kSUB);
meanSub->getOutput(0)->setDynamicRange(-maxMean, maxMean);
network->getLayer(0)->setInput(0,*meanSub->getOutput(0));// 執行縮放,輸出結果爲 [-1, 1]
samplesCommon::setAllTensorScales(network.get(),127.0f,127.0f);}
2.4. 模型推理
主要工作:就是將轉換好的模型在tensorrt engine上跑一邊。
裏面用到的各種東西比較多,目前也看不懂。
主要通過 infer() 函數完成,本函數的主要操作就是:
讀取輸入數據(processInput)。
通過 cuda stream / buffer 等進行推理。
判斷輸出結果是否正確(verifyOutput)。
bool SampleMNIST::infer(){// 實現具體推理過程// 緩存對象管理// Create RAII buffer manager object
samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);// 創建上下文auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());if(!context){returnfalse;}// 隨機選擇一個數字// Pick a random digit to try to infersrand(time(NULL));constint digit =rand()%10;// 讀取輸入數據到緩存對象中// 即將 digit 寫入 buffers 中,名字爲 mParams.inputTensorNames[0]// Read the input data into the managed buffers// There should be just 1 input tensorassert(mParams.inputTensorNames.size()==1);if(!processInput(buffers, mParams.inputTensorNames[0], digit)){returnfalse;}// 創建 cuda 流,準備執行推理// Create CUDA stream for the execution of this inference.
cudaStream_t stream;CHECK(cudaStreamCreate(&stream));// 異步將數據從主機輸入緩衝區(buffer)複製到設備輸入緩衝區(stream)// Asynchronously copy data from host input buffers to device input buffers
buffers.copyInputToDeviceAsync(stream);// 異步將推理任務加入隊列中// Asynchronously enqueue the inference workif(!context->enqueue(mParams.batchSize, buffers.getDeviceBindings().data(), stream,nullptr)){returnfalse;}// 異步將模型結果從設備(stream)保存到主機緩衝區(buffers)// Asynchronously copy data from device output buffers to host output buffers
buffers.copyOutputToHostAsync(stream);// 等待工作結束,關閉stream// Wait for the work in the stream to completecudaStreamSynchronize(stream);// Release streamcudaStreamDestroy(stream);// 得到結果,判斷結果是否準確// 即從 buffer 中獲取名爲 mParams.outputTensorNames[0] 的結果,判斷與digit是否相同// Check and print the output of the inference// There should be just one output tensorassert(mParams.outputTensorNames.size()==1);bool outputCorrect =verifyOutput(buffers, mParams.outputTensorNames[0], digit);return outputCorrect;}
讀取輸入數據
這部分沒啥好說的。
bool SampleMNIST::processInput(const samplesCommon::BufferManager& buffers,const std::string& inputTensorName,int inputFileIdx)const{constint inputH = mInputDims.d[1];constint inputW = mInputDims.d[2];// Read a random digit filesrand(unsigned(time(nullptr)));
std::vector<uint8_t>fileData(inputH * inputW);readPGMFile(locateFile(std::to_string(inputFileIdx)+".pgm", mParams.dataDirs), fileData.data(), inputH, inputW);// Print ASCII representation of digit
gLogInfo <<"Input:\n";for(int i =0; i < inputH * inputW; i++){
gLogInfo <<(" .:-=+*#%@"[fileData[i]/26])<<(((i +1)% inputW)?"":"\n");}
gLogInfo << std::endl;float* hostInputBuffer =static_cast<float*>(buffers.getHostBuffer(inputTensorName));for(int i =0; i < inputH * inputW; i++){
hostInputBuffer[i]=float(fileData[i]);}returntrue;}
驗證輸出結果
還會輸出可視化結果,有種夢迴當年的感覺。
bool SampleMNIST::verifyOutput(const samplesCommon::BufferManager& buffers,const std::string& outputTensorName,int groundTruthDigit)const{// 獲取 host buffer 中的輸出tensor數值// 應該是10個數字的概率constfloat* prob =static_cast<constfloat*>(buffers.getHostBuffer(outputTensorName));// Print histogram of the output distribution
gLogInfo <<"Output:\n";float val{0.0f};int idx{0};constint kDIGITS =10;for(int i =0; i < kDIGITS; i++){if(val < prob[i]){
val = prob[i];
idx = i;}
gLogInfo << i <<": "<< std::string(int(std::floor(prob[i]*10+0.5f)),'*')<<"\n";}
gLogInfo << std::endl;return(idx == groundTruthDigit && val >0.9f);}