本文介紹一下C語言使用Opencl的基本步驟,系統是windows10,IDE是Visual Studio2017。
step1:你需要安裝一下opencl的windows版本,然後新建一個工程,在屬性管理器裏面給這個工程導入opencl的頭文件。
step2:導入頭文件
#include <iostream>
#include <fstream>
#include <sstream>
#include <CL/cl.h>
#include <opencv2/opencv.hpp> //我這裏的核函數操作的是圖像,所以需要opencv的包
#include <opencv2/highgui.hpp>
#include <math.h>
#include <iostream>
#include <time.h>
#include <string>
using namespace cv;
using namespace std;
#pragma warning( disable : 4996 )
step3:獲取平臺和上下文
cl_context CreateContext()
{
cl_int errNum;
cl_uint numPlatforms;
cl_platform_id firstPlatformId;
cl_context context = NULL;
//選擇可用的平臺中的第一個
errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
if (errNum != CL_SUCCESS || numPlatforms <= 0)
{
std::cerr << "Failed to find any OpenCL platforms." << std::endl;
return NULL;
}
//創建一個OpenCL上下文環境
cl_context_properties contextProperties[] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)firstPlatformId,
0
};
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
NULL, NULL, &errNum);
return context;
}
step4:創建設備並創建命令隊列
cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)
{
cl_int errNum;
cl_device_id *devices;
cl_command_queue commandQueue = NULL;
size_t deviceBufferSize = -1;
// 獲取設備緩衝區大小
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
if (deviceBufferSize <= 0)
{
std::cerr << "No devices available.";
return NULL;
}
// 爲設備分配緩存空間
devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
//char name_data[100];
//clGetDeviceInfo(devices[0], CL_DEVICE_NAME,sizeof(name_data), name_data, NULL);
//printf("device:%s\n", name_data);
//選取可用設備中的第一個
commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
*device = devices[0];
//delete[] devices;
return commandQueue;
}
step5:構建並且編譯program
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
cl_int errNum;
cl_program program;
std::ifstream kernelFile(fileName, std::ios::in);
if (!kernelFile.is_open())
{
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
return NULL;
}
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,
(const char**)&srcStr,
NULL, NULL);
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
return program;
}
step6:創建kernel
// 一、選擇OpenCL平臺並創建一個上下文
cl_context context = CreateContext();
// 二、 創建設備並創建命令隊列
cl_command_queue commandQueue = CreateCommandQueue(context, &device);
cl_event histEvent = 0;
//創建和構建程序對象
cl_program program = CreateProgram(context, device, "main.cl");
cl_kernel kernel = clCreateKernel(program, "kernel_rgb2gray", NULL);
step7:新建文件main.cl,在裏面新建一個核函數
__kernel void kernel_rgb2gray(__global unsigned char * rgbImage,
__global int* result)
{
int x = get_global_id(0);
int y = get_global_id(1);
int index = x * height + y;
result[index] = rgbImage[index];
}
step8:爲核函數設置參數
Mat image = imread("D://b.jpg");
int imgSize = image.rows * image.cols;
Mat srcImage;
cvtColor(image, srcImage, CV_BGR2GRAY);
cl_mem memResult = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(int)*imgSize, NULL, NULL);
cl_mem memRgbImage = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
sizeof(uchar) * 3 * imgSize, srcImage.data, NULL);
errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memRgbImage);
errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memResult);
step9:執行核函數
int img_h = img.rows;
int img_w = img.cols;
size_t globalThreads[2] = { img_w, img_h };
//執行內核函數
errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 2, 0, globalThreads, NULL,
0, NULL, &histEvent);
step10:獲取執行結果
int result* = new int[imgSize];
errNum = clEnqueueReadBuffer(commandQueue, memResult, CL_TRUE, 0,
sizeof(float)*img_h*img_w, result, 1, &histEvent, NULL);