cuda學習--並行化實現圖像的RGB轉灰度圖

原創

2018-10-20 09:55

#include <iostream>
#include <string>
#include <cassert>

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/opencv.hpp>

#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>

#define checkCudaErrors(val) check((val),#val,__FILE__,__LINE__)

cv::Mat imageRGBA;
cv::Mat imageGrey;

//聲明GPU memory
uchar4 *d_rgbaImage__;
uchar  *d_greyImage__;

size_t numRows() { 
	return imageRGBA.rows;
}
size_t numCols() {
	return imageRGBA.cols;
}

template<typename T>
void check(T err, const char* const func, const char* const file, const int line) {
	if (err != cudaSuccess) {
		std::cerr << "CUDA error at:" << file << ":" << line << std::endl;
		std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
		exit(1);
	}
}

//圖片預處理
void preProcess(uchar4 **inputImage, unsigned char **greyImage, uchar4 **d_rgbaImage, 
	unsigned char **d_greyImage, const std::string &filename) {
	checkCudaErrors(cudaFree(0));

	//讀取圖片
	cv::Mat image;
	image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR);
	if (image.empty()) {
		std::cerr << "Couldn't open file:" << filename << std::endl;
		exit(1);
	}

	//把opencv讀取的BGR格式轉爲RGBA格式
	cv::cvtColor(image, imageRGBA, CV_BGR2RGBA);

	//生成一個和原圖一樣大小的imageGrey
	imageGrey.create(image.rows, image.cols, CV_8UC1);

	//判斷圖像是否連續存放
	if (!imageRGBA.isContinuous() || !imageGrey.isContinuous()) {
		std::cerr << "Images aren't continuous!! Exiting." << std::endl;
		exit(1);
	}

	//inputImage指向imageRGBA
	*inputImage = (uchar4 *)imageRGBA.ptr<unsigned char>(0);
	//greyImage指向imageGrey
	*greyImage = imageGrey.ptr<unsigned char>(0);

	//分配GPU memory
	const size_t numPixels = numRows()*numCols();
	checkCudaErrors(cudaMalloc(d_rgbaImage, sizeof(uchar4)*numPixels));
	checkCudaErrors(cudaMalloc(d_greyImage, sizeof(unsigned char)*numPixels));
	//cudaMemset在GPU上清空d_greyImage
	checkCudaErrors(cudaMemset(*d_greyImage, 0, numPixels * sizeof(unsigned char)));

	//把inputImage的數據複製給GPU的d_rgbaImage
	checkCudaErrors(cudaMemcpy(*d_rgbaImage, *inputImage, sizeof(uchar4)*numPixels, cudaMemcpyHostToDevice));

	d_rgbaImage__ = *d_rgbaImage;
	d_greyImage__ = *d_greyImage;
}

__global__
void rgba_to_greyscale(const uchar4* const rgbaImage, unsigned char* const greyImage, int numRows, int numCols) {
	int threadId = blockIdx.x*blockDim.x*blockDim.y + threadIdx.y*blockDim.x + threadIdx.x;
	if (threadId < numRows*numCols) {
		const unsigned char R = rgbaImage[threadId].x;
		const unsigned char G = rgbaImage[threadId].y;
		const unsigned char B = rgbaImage[threadId].z;
		greyImage[threadId] = .299f*R + .587f*G + .114f*B;
	}
}

void postProcess(const std::string& output_file, unsigned char* data_ptr) {
	cv::Mat output(numRows(), numCols(), CV_8UC1, (void*)data_ptr);
	cv::imwrite(output_file.c_str(), output);
}

void cleanup() {
	cudaFree(d_rgbaImage__);
	cudaFree(d_greyImage__);
}

int main(int argc, char* argv[]) {
	//定義輸入地址
	std::string input_file = "E:/code/study_cuda/study_reduce/study_reduce/cinque_terre_small.jpg";
	//定義輸出地址
	std::string output_file = "E:/code/study_cuda/study_reduce/study_reduce/cinque_terre_small_togray.jpg";

	//定義Host的指針
	uchar4 *h_rgbaImage, *d_rgbaImage;
	//定義device的指針
	unsigned char *h_greyImage, *d_greyImage;

	//圖片預處理（把要處理的數據賦值給h_rgbaImage，且複製給d_greyImage）
	preProcess(&h_rgbaImage, &h_greyImage, &d_rgbaImage, &d_greyImage, input_file);

	//並行化處理Kernel
	int thread = 16;
	int grid = (numRows()*numCols() + thread - 1) / (thread*thread);
	const dim3 blockSize(thread, thread);
	const dim3 gridSize(grid);
	rgba_to_greyscale <<<gridSize, blockSize >>> (d_rgbaImage, d_greyImage, numRows(), numCols());

	//只有GPU計算到這個位置後，CPU纔會開始接着
	cudaDeviceSynchronize();

	//GPU結果複製給CPU
	size_t numPixels = numRows()*numCols();
	checkCudaErrors(cudaMemcpy(h_greyImage, d_greyImage, sizeof(unsigned char)*numPixels, cudaMemcpyDeviceToHost));

	//寫入圖片
	postProcess(output_file, h_greyImage);
	
	//釋放
	cleanup();
}

僅記錄學習過程

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

cuda學習--並行化實現圖像的RGB轉灰度圖

如何在低代碼平臺中引用 JavaScript ？

探究職業發展的關鍵：能力模型解讀

高效率使用windows

如何使用 JavaScript 獲取當前頁面幀率 FPS

工程款拖欠，農民工怎麼了？就得一直忍着委屈求全嗎？

HarmonyOS 實現下拉刷新，上拉加載更多

語音信號處理中的“窗函數”

智能決策新時代：可視化大屏是否能夠超越傳統白板？

解密Prompt系列28. LLM Agent之金融領域摸索：FinMem & FinAgent

分享幾個.NET開源的AI和LLM相關項目框架

opencv(四) 改變jpg的dpi

opencv（三）指定位置替換圖片

改變jpg的dpi

opencv（一）旋轉90,180,270度

opecv(二) M存入大於255的數值

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結