grid是1D的,block是2D的
#include<opencv2\opencv.hpp>
#include<cuda_runtime.h>
#include<stdio.h>
#include "device_launch_parameters.h"
using namespace cv;
__global__ void revImg(uchar3* dev_c,int width,int height)
{
int tid = blockIdx.x*blockDim.x*blockDim.y + threadIdx.y*blockDim.x + threadIdx.x;
if (tid < width*height) {
dev_c[tid].x = 255 - dev_c[tid].x;
dev_c[tid].y = 255 - dev_c[tid].y;
dev_c[tid].z = 255 - dev_c[tid].z;
}
}
int main()
{
Mat image = imread("E:/code/study_cuda/study_reduce/study_reduce/cinque_terre_small.jpg");
imshow("src", image);
int width, height;
width = image.size().width;
height = image.size().height;
printf("width=%d height=%d", width, height);
int size = width * height;
uchar3 *dev_c ;
cudaMalloc((void**)&dev_c, size * sizeof(uchar3));
cudaMemcpy(dev_c, image.data, size * sizeof(uchar3), cudaMemcpyHostToDevice);
int thread = 16;
int grid = (size + thread - 1) / (thread*thread);
dim3 dimGrid(grid);
dim3 dimBlock(thread,thread);
revImg << <dimGrid, dimBlock >> > (dev_c,width,height);
cudaMemcpy(image.data, dev_c, size * sizeof(uchar3), cudaMemcpyDeviceToHost);
imshow("gpu", image);
waitKey(0);
cudaFree(dev_c);
return 0;
}