cuda julia & ripple

</pre><pre name="code" class="cpp">
// chaterConsole.cpp : 定義控制檯應用程序的入口點。
//


#include "stdafx.h"


/*
* Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation.
* Any use, reproduction, disclosure, or distribution of this software
* and related documentation without an express license agreement from
* NVIDIA Corporation is strictly prohibited.
*
* Please refer to the applicable NVIDIA end user license agreement (EULA)
* associated with this source code for terms and conditions that govern
* your use of this NVIDIA software.
*
*/




#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>


#include <opencv2\opencv.hpp>
#include <opencv2\gpu\gpu.hpp>
#define DIM 1024


__device__ struct cuComplex{
float   r;
float   i;
__device__ cuComplex(float a, float b) : r(a), i(b)  {}
__device__ float magnitude2(void) {
return r * r + i * i;
}
__device__ cuComplex operator*(const cuComplex& a) {
return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);
}
__device__ cuComplex operator+(const cuComplex& a) {
return cuComplex(r + a.r, i + a.i);
}
};


__device__ int julia(int x, int y) {
const float scale = 1.5;
float jx = scale * (float)(DIM / 2 - x) / (DIM / 2);
float jy = scale * (float)(DIM / 2 - y) / (DIM / 2);


cuComplex c(-0.8, 0.156);
cuComplex a(jx, jy);


int i = 0;
for (i = 0; i<200; i++) {
a = a * a + c;
if (a.magnitude2() > 1000)
return 0;
}


return 1;
}


__global__ void drawJuliaKernel(int *ptr)
{
int x = blockIdx.x;
int y = blockIdx.y;
int offset = x + y * gridDim.x;


float juliaValue = julia(x,y);
ptr[offset * 3 + 0] = 255 * juliaValue;
ptr[offset * 3 + 1] = 0;
ptr[offset * 3 + 2] = 0;
}


__global__ void drawRippleKernel(int *ptr) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;


// now calculate the value at that position
float fx = x - DIM / 2;
float fy = y - DIM / 2;
float d = sqrtf(fx * fx + fy * fy);
unsigned char grey = (unsigned char)(128.0f + 127.0f * cos(d / 10.0f) / (d / 10.0f + 1.0f));
ptr[offset * 3 + 0] = grey;
ptr[offset * 3 + 1] = grey;
ptr[offset * 3 + 2] = grey;
}




// Helper function for using CUDA to add vectors in parallel.
cudaError_t drawWithCuda(int *c, int *a, unsigned int size)
{
int *dev_c = 0;
int *dev_a = 0;
cudaError_t cudaStatus;


// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
goto Error;
}


// Allocate GPU buffers for three vectors (two input, one output)    .
cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}


cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}


dim3 grid(DIM, DIM);


// Launch a kernel on the GPU with one thread for each element.
drawJuliaKernel << <grid, 1 >> >(dev_c);






dim3    blocks(DIM / 16, DIM / 16);
dim3    threads(16, 16);
drawRippleKernel << <blocks, threads >> >(dev_a);




// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "drawJuliaKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}


// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching drawJuliaKernel!\n", cudaStatus);
goto Error;
}


// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}


// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(a, dev_a, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
Error:
cudaFree(dev_a);
cudaFree(dev_c);



return cudaStatus;
}






int main()
{
const int pixSize = DIM * DIM;
const int arraySize = DIM * DIM * 3;


int *c = (int *)malloc(arraySize * sizeof(int));
if (c)
std::cout << "Memory Allocated at" << c << std::endl;
else
std::cout << "Not Enough Memory!" << c << std::endl;


int *a = (int *)malloc(arraySize * sizeof(int));


if (c)
std::cout << "Memory Allocated at" << a << std::endl;
else
std::cout << "Not Enough Memory!" << a << std::endl;






// Add vectors in parallel.
cudaError_t cudaStatus = drawWithCuda(c, a, arraySize);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "drawWithCuda failed!");
return 1;
}


cv::Mat colorImg = cv::Mat::zeros(DIM, DIM, CV_8UC3);

cv::Mat rippleImg = cv::Mat::zeros(DIM, DIM, CV_8UC3);
//colorImg = cv::imread("kinect 1.bmp", CV_LOAD_IMAGE_COLOR);


uchar* p_mat = colorImg.data;
uchar* p_ripple = rippleImg.data;


for (size_t i = 0; i < pixSize; i++)
{
*p_mat = c[i * 3 + 0];
p_mat++;
*p_mat = c[i * 3 + 1];
p_mat++;
*p_mat = c[i * 3 + 2];
p_mat++;


*p_ripple = a[i * 3 + 0];
p_ripple++;
*p_ripple = a[i * 3 + 1];
p_ripple++;
*p_ripple = a[i * 3 + 2];
p_ripple++;
/*std::cout << c[i * 3 + 0] << " " << c[i * 3 + 1] << " " << c[i * 3 + 2] << " " << std::endl;*/
}

cv::imshow("color image", colorImg);
cv::imshow("ripple image", rippleImg);
cv::waitKey(3000);


// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceReset failed!");
return 1;
}


free(a);
free(c);

return 0;
}



發佈了38 篇原創文章 · 獲贊 0 · 訪問量 2萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章