下面是對Surfance Reference的實驗,代碼改自 CUDA C PROGRAMMING GUIDE (PG-02829-001_v10.0 | October 2018) p54,因爲這裏比較貼近計算機圖形學,故移到圖形學中去。
©版權所有!
/*
下面是對CUDA的Surface Reference的實驗,實驗環境VS2017,CUDA 10,GTX 1060
作者:呂翔宇,部分代碼改自CUDA手冊
E-mail:[email protected]
2019.3.4 19:01
*/
#define __CUDACC__
#define __cplusplus
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include<iostream>
#include<cmath>
#include<cstdlib>
#include<cuda.h>
//定義二維表層
surface<void, 2> inputSurfRef;
surface<void, 2> outputSurfRef;
//簡單的拷貝內核
__global__ void copyKernel(int width, int height) {
//計算紋理座標
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < width&&y < height) {
uchar4 data;
//讀取surface數據
surf2Dread(&data, inputSurfRef, x * 4, y);
//寫surface數據
surf2Dwrite(data, outputSurfRef, x * 4, y);
}
}
//主機代碼
int main() {
std::ios::sync_with_stdio(false);//習慣性關閉同步流,看不懂的請忽略。
//注意這裏使用cudaChannelFormatKindUnsigned,即處理的是unsigned int
//準備實驗參數
const int width = 16;
const int height = 16;
const size_t size = sizeof(unsigned int)*width*height;
unsigned int *h_data;
h_data = (int unsigned*)malloc(size);
//init
std::cout << "初始數據:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
h_data[i*width + j] = rand() % 100;
std::cout << h_data[i*width + j] << "\t";
}
std::cout << "\n";
}
//設備端分配CUDA數組
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
cudaArray *cuInputArray;
cudaMallocArray(&cuInputArray, &channelDesc, width, height, cudaArraySurfaceLoadStore);
cudaArray *cuOutputArray;
cudaMallocArray(&cuOutputArray, &channelDesc, width, height, cudaArraySurfaceLoadStore);
//搬運內存
cudaMemcpyToArray(cuInputArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);
//將CUDA數據綁定上surface reference
cudaBindSurfaceToArray(inputSurfRef, cuInputArray);
cudaBindSurfaceToArray(outputSurfRef, cuOutputArray);
//定義內核參數
dim3 dimBlock(16, 16);
dim3 dimGrid(
(width + dimBlock.x - 1) / dimBlock.x,
(height + dimBlock.y - 1) / dimBlock.y
);
copyKernel << <dimGrid, dimBlock >> > (width, height);
//輸出結果
memset(h_data, 0, size);
//cudaMemcpyFromArray(h_data, cuOutputArray, 0, 0, size, cudaMemcpyDeviceToHost);
cudaMemcpy(h_data, cuOutputArray, size, cudaMemcpyDeviceToHost);
std::cout << "處理後數據:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
h_data[i*width + j] = rand() % 10;
std::cout << h_data[i*width + j] << "\t";
}
std::cout << "\n";
}
//釋放設備內存
cudaFreeArray(cuInputArray);
cudaFreeArray(cuOutputArray);
free(h_data);
system("pause");
return 0;
}
結果
???