tensorflow c++ API預測多張圖片batch inference

經過前幾篇的折騰與各種查資料後，單張預測代碼：

//one image prediction ---single image
int mainsingle()
{
	  Session* session;
	  Status status = NewSession(SessionOptions(), &session);

	  const std::string graph_fn = "/media/root/Ubuntu311/projects/Ecology_projects/JPMVCNN_AlgaeAnalysisMathTestDemo/model-0723/model.meta";
	  MetaGraphDef graphdef;
	  Status status_load = ReadBinaryProto(Env::Default(), graph_fn, &graphdef); //從meta文件中讀取圖模型;
	  if (!status_load.ok()) {
	        std::cout << "ERROR: Loading model failed..." << graph_fn << std::endl;
	        std::cout << status_load.ToString() << "\n";
	        return -1;
	  }

	  Status status_create = session->Create(graphdef.graph_def()); //將模型導入會話Session中;
	  if (!status_create.ok()) {
	        std::cout << "ERROR: Creating graph in session failed..." << status_create.ToString() << std::endl;
	        return -1;
	  }
	  cout << "Session successfully created.Load model successfully!"<< endl;

	  // 讀入預先訓練好的模型的權重
	  const std::string checkpointPath = "/media/root/Ubuntu311/projects/Ecology_projects/JPMVCNN_AlgaeAnalysisMathTestDemo/model-0723/model";
	  Tensor checkpointPathTensor(DT_STRING, TensorShape());
	  checkpointPathTensor.scalar<std::string>()() = checkpointPath;
	  status = session->Run(
			  {{ graphdef.saver_def().filename_tensor_name(), checkpointPathTensor },},
			  {},{graphdef.saver_def().restore_op_name()},nullptr);
	  if (!status.ok())
	  {
		  throw runtime_error("Error loading checkpoint from " + checkpointPath + ": " + status.ToString());
	  }
	  cout << "Load weights successfully!"<< endl;


	  //read image for prediction...
	  char srcfile[200];
	  double alltime=0.0;
	  for(int numingroup=0;numingroup<1326;numingroup++)
	  {
		  sprintf(srcfile, "/media/root/Ubuntu311/projects/Ecology_projects/copy/cnn-imgs96224/%d.JPG",numingroup);
		  cv::Mat srcimg=cv::imread(srcfile,0);
		  if(!srcimg.data)
		  {
			  continue;
		  }

		  Tensor resized_tensor(DT_FLOAT, TensorShape({1,96,224,1}));
		  float *imgdata = resized_tensor.flat<float>().data();
		  cv::Mat cameraImg(96, 224, CV_32FC1, imgdata);
		  srcimg.convertTo(cameraImg, CV_32FC1);
		  //對圖像做預處理
		  cameraImg=cameraImg/255;
		  std::cout <<"Read image successfully: "<< resized_tensor.DebugString()<<endl;

		   vector<std::pair<string, Tensor> > inputs;
		   std::string Input1Name = "input";
		   inputs.push_back(std::make_pair(Input1Name, resized_tensor));
		   Tensor is_training_val(DT_BOOL,TensorShape());
		   is_training_val.scalar<bool>()()=false;
		   std::string Input2Name = "is_training";
		   inputs.push_back(std::make_pair(Input2Name, is_training_val));

		   vector<tensorflow::Tensor> outputs;
		   string output="output";

		   cv::TickMeter timer;
		   timer.start();
		   Status status_run = session->Run(inputs, {output}, {}, &outputs);
		   if (!status_run.ok()) {
			   std::cout << "ERROR: RUN failed..."  << std::endl;
			   std::cout << status_run.ToString() << "\n";
			   return -1;
		   }

		   timer.stop();
		   cout<<"single image inference time is: "<<timer.getTimeSec()<<" s."<<endl;
		   alltime+=(timer.getTimeSec());
	       timer.reset();

		  Tensor t = outputs[0];
		  int ndim2 = t.shape().dims();
		  auto tmap = t.tensor<float, 2>();  // Tensor Shape: [batch_size, target_class_num]
		  int output_dim = t.shape().dim_size(1);
		  std::vector<double> tout;

		  // Argmax: Get Final Prediction Label and Probability
		  int output_class_id = -1;
		  double output_prob = 0.0;
		  for (int j = 0; j < output_dim; j++)
		  {
				std::cout << "Class " << j << " prob:" << tmap(0, j) << "," << std::endl;
				if (tmap(0, j) >= output_prob) {
						output_class_id = j;
						output_prob = tmap(0, j);
				}
		  }
		  std::cout << "Final class id: " << output_class_id << std::endl;
		  std::cout << "Final class prob: " << output_prob << std::endl;
	  }

	  cout<<"all image have been predicted and time is: "<<alltime<<endl;

	return 0;
}

我測了下預測時間每張圖幾乎0.02秒：

下面是分成多個batch進行預測：

//batch inference...
int mainbatchinference()
{
	  Session* session;
	  Status status = NewSession(SessionOptions(), &session);

	  const std::string graph_fn = "/media/root/Ubuntu311/projects/Ecology_projects/JPMVCNN_AlgaeAnalysisMathTestDemo/model-0723/model.meta";
	  MetaGraphDef graphdef;
	  Status status_load = ReadBinaryProto(Env::Default(), graph_fn, &graphdef); //從meta文件中讀取圖模型;
	  if (!status_load.ok()) {
	        std::cout << "ERROR: Loading model failed..." << graph_fn << std::endl;
	        std::cout << status_load.ToString() << "\n";
	        return -1;
	  }

	  Status status_create = session->Create(graphdef.graph_def()); //將模型導入會話Session中;
	  if (!status_create.ok()) {
	        std::cout << "ERROR: Creating graph in session failed..." << status_create.ToString() << std::endl;
	        return -1;
	  }
	  cout << "Session successfully created.Load model successfully!"<< endl;

	  // 讀入預先訓練好的模型的權重
	  const std::string checkpointPath = "/media/root/Ubuntu311/projects/Ecology_projects/JPMVCNN_AlgaeAnalysisMathTestDemo/model-0723/model";
	  Tensor checkpointPathTensor(DT_STRING, TensorShape());
	  checkpointPathTensor.scalar<std::string>()() = checkpointPath;
	  status = session->Run(
			  {{ graphdef.saver_def().filename_tensor_name(), checkpointPathTensor },},
			  {},{graphdef.saver_def().restore_op_name()},nullptr);
	  if (!status.ok())
	  {
		  throw runtime_error("Error loading checkpoint from " + checkpointPath + ": " + status.ToString());
	  }
	  cout << "Load weights successfully!"<< endl;


	  int cnnrows=96;
	  int cnncols=224;
	  //read image for prediction...
	  char srcfile[200];
	  const int imgnum=1326;

	  const int batch=32;
	  double alltime=0.0;
	  //all image inference...
	  for(int imgind=0;imgind<imgnum/batch;imgind++)
	  {
		  //a batch inference...
		  tensorflow::Tensor input_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ batch, cnnrows, cnncols, 1 }));
		  auto input_tensor_mapped = input_tensor.tensor<float, 4>();

		  int batchind=0;
		  int imgrealind=imgind*batch;
		  for(;batchind!=batch;batchind++)
		  {
			  sprintf(srcfile, "/media/root/Ubuntu311/projects/Ecology_projects/copy/cnn-imgs96224/%d.JPG",imgrealind);
			  cv::Mat srcimg=cv::imread(srcfile,0);
			  if(!srcimg.data)
			  {
				  continue;
			  }
			  cv::Mat cameraImg(cnnrows, cnncols, CV_32FC1);
			  srcimg.convertTo(cameraImg, CV_32FC1);
			  cameraImg=cameraImg/255;

			  //convert batch cv image to tensor
		      for (int y = 0; y < cnnrows; ++y)
		      {
				  const float* source_row = (float*)cameraImg.data + (y * cnncols);
				  for (int x = 0; x < cnncols; ++x)
				  {
						const float* source_pixel = source_row + x;
						input_tensor_mapped(batchind, y, x, 0) = *source_pixel;
				  }
		      }
		      imgrealind++;
		  //a batch image transfer done...
		  }

		  vector<std::pair<string, Tensor> > inputs;
		  std::string Input1Name = "input";
		  inputs.push_back(std::make_pair(Input1Name, input_tensor));
		  Tensor is_training_val(DT_BOOL,TensorShape());
		  is_training_val.scalar<bool>()()=false;
		  std::string Input2Name = "is_training";
		  inputs.push_back(std::make_pair(Input2Name, is_training_val));

		  vector<tensorflow::Tensor> outputs;
		  string output="output";
		  cv::TickMeter timer;
	      timer.start();
	      Status status_run = session->Run(inputs, {output}, {}, &outputs);
	      if (!status_run.ok()) {
		   std::cout << "ERROR: RUN failed..."  << std::endl;
		   std::cout << status_run.ToString() << "\n";
		   return -1;
	      }

	      timer.stop();
	      cout<<"time of this batch inference is: "<<timer.getTimeSec()<<" s."<<endl;
	      alltime+=(timer.getTimeSec());
	      timer.reset();

	      auto finalOutputTensor  = outputs[0].tensor<float, 2>();
	      int output_dim = outputs[0].shape().dim_size(1);
	      for(int b=0; b<batch;b++)
	      {
			  for(int i=0; i<output_dim; i++)
			  {
				  cout << b << "the probability for class "<<i<<" is "<< finalOutputTensor(b, i) <<endl;
			  }
	      }
	  //all images inference done...
	  }

	  cout<<"all image have been predicted and time is: "<<alltime<<endl;

	return 0;
}

batch inference的時間是：

已對比測試過，多張預測batch inference與single image inference預測結果一致，證明代碼正確。

但是之前stackoverflow上有人說batch inference比single image inference快，所以我才嘗試batch inference的，但是我測出來並不快！！！

他說他single inference是0.02秒，batch=1560的inference只要0.03秒，提速了1560X0.02/0.03=幾乎1000倍！！！但是我這裏並沒有什麼提速的效果，（batchsize不能隨便設置要與training時的batch一樣：）

關於預測時間這個問題我已在 https://stackoverflow.com/questions/57460782/batch-inference-is-as-slow-as-single-image-inference-in-tensorflow-c 和 https://github.com/tensorflow/tensorflow/issues/31572 上提問了，目前沒有有效答覆。

另外谷歌上關於tensorflow C++預測時間的帖子我都看了，目前還是沒有找到提速的方法。

https://fast-depth-coding.readthedocs.io/en/latest/tf-speed.html 這個人的效果就很好，不知道他說的Running all samples in one session是不是我這樣，我也是隻用了一個session。然而我並沒有看到速度很快。他加了優化後也有很大提速，然而我這裏還是並沒有。

網上也有很多說tensorflow C++預測慢的 https://github.com/tensorflow/tensorflow/issues/10669

煩躁。

依舊放一張小不點的照片鎮樓

tensorflow c++ API預測多張圖片batch inference

DAPPER 事務 TRANSACTION

基於細胞自動機Cellular Automata(CA)的區域生長

學習CUDA準備Win10+cmake+opencv4.1+opencv_contrib4.1+CUDA+VS2015

cv::cuda與CUDA的NPP庫、距離變換和分水嶺並行版本嘗試

opencv4.1無法加載python-cnn模型，編譯第三方庫libtensorflow_cc.so巨坑

python/c++讀/存16bit圖像 tensorflow--different inference result in python/c++

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結