CDUA 圖形學 Surface Reference實驗

下面是對Surfance Reference的實驗,代碼改自 CUDA C PROGRAMMING GUIDE (PG-02829-001_v10.0 | October 2018) p54,因爲這裏比較貼近計算機圖形學,故移到圖形學中去。

 

©版權所有!

/*
	下面是對CUDA的Surface Reference的實驗,實驗環境VS2017,CUDA 10,GTX 1060
	作者:呂翔宇,部分代碼改自CUDA手冊
	E-mail:[email protected]
	2019.3.4 19:01
*/
#define __CUDACC__
#define __cplusplus
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include<iostream>
#include<cmath>
#include<cstdlib>
#include<cuda.h>

//定義二維表層
surface<void, 2> inputSurfRef;
surface<void, 2> outputSurfRef;

//簡單的拷貝內核
__global__ void copyKernel(int width, int height) {
	//計算紋理座標
	unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
	unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
	if (x < width&&y < height) {
		uchar4 data;
		//讀取surface數據
		surf2Dread(&data, inputSurfRef, x * 4, y);
		//寫surface數據
		surf2Dwrite(data, outputSurfRef, x * 4, y);
	}
}

//主機代碼
int main() {
	std::ios::sync_with_stdio(false);//習慣性關閉同步流,看不懂的請忽略。
	//注意這裏使用cudaChannelFormatKindUnsigned,即處理的是unsigned int
	//準備實驗參數
	const int width = 16;
	const int height = 16;
	const size_t size = sizeof(unsigned int)*width*height;
	unsigned int *h_data;
	h_data = (int unsigned*)malloc(size);
	//init
	std::cout << "初始數據:\n";
	for (int i = 0; i < height; i++) {
		for (int j = 0; j < width; j++) {
			h_data[i*width + j] = rand() % 100;
			std::cout << h_data[i*width + j] << "\t";
		}
		std::cout << "\n";
	}

	//設備端分配CUDA數組
	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
	cudaArray *cuInputArray;
	cudaMallocArray(&cuInputArray, &channelDesc, width, height, cudaArraySurfaceLoadStore);
	cudaArray *cuOutputArray;
	cudaMallocArray(&cuOutputArray, &channelDesc, width, height, cudaArraySurfaceLoadStore);

	//搬運內存
	cudaMemcpyToArray(cuInputArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);

	//將CUDA數據綁定上surface reference
	cudaBindSurfaceToArray(inputSurfRef, cuInputArray);
	cudaBindSurfaceToArray(outputSurfRef, cuOutputArray);

	//定義內核參數
	dim3 dimBlock(16, 16);
	dim3 dimGrid(
		(width + dimBlock.x - 1) / dimBlock.x,
		(height + dimBlock.y - 1) / dimBlock.y
	);

	copyKernel << <dimGrid, dimBlock >> > (width, height);

	//輸出結果
	memset(h_data, 0, size);
	//cudaMemcpyFromArray(h_data, cuOutputArray, 0, 0, size, cudaMemcpyDeviceToHost);
	cudaMemcpy(h_data, cuOutputArray, size, cudaMemcpyDeviceToHost);
	std::cout << "處理後數據:\n";
	for (int i = 0; i < height; i++) {
		for (int j = 0; j < width; j++) {
			h_data[i*width + j] = rand() % 10;
			std::cout << h_data[i*width + j] << "\t";
		}
		std::cout << "\n";
	}

	//釋放設備內存
	cudaFreeArray(cuInputArray);
	cudaFreeArray(cuOutputArray);
	free(h_data);

	system("pause");
	return 0;
}

結果

???

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章