1.獲取指定系統上可用的計算平臺,使用函數:
cl_int clGetPlatformIDs( |
cl_uint num_entries, |
cl_platform_id *platforms, | |
cl_uint *num_platforms) |
eg:
cl_int err;
cl_uint nPlatform;
cl_platform_id *listPlatform;
cl_device_id *listDevice;
err = clGetPlatformIDs(0, NULL, &nPlatform);
if(err < 0) {
perror("Couldn't find any platforms.");
exit(1);
}
listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id));
clGetPlatformIDs(nPlatform, listPlatform, NULL);
clGetPlatformIDs 由應用程序調用兩次。
第一次調用:將NULL 和 cl_uint 指針(&nPlatform)分別傳遞給形參 *platforms 和 *num_platforms,獲取platform 數量;以便分配空間來存儲平臺信息。
第二次調用:將 cl_platform_id 指針(listPlatform)傳遞給形參 *platforms,爲平臺分配足夠空間的具體實現。
2.獲取某平臺上設備的名稱、類型、設備廠商等相關信息,使用函數:
cl_int clGetDeviceIDs( |
cl_platform_id platform, |
cl_device_type device_type, | |
cl_uint num_entries, | |
cl_device_id *devices, | |
cl_uint *num_devices) |
eg:
err = clGetDeviceIDs(listPlatform[i], CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice);
if(err < 0) {
perror("Couldn't find any devices");
exit(1);
}
listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id));
clGetDeviceIDs(listPlatform[i], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL);
clGetDeviceIDs由應用程序調用兩次,同 clGetPlatformIDs。
3.上下文(Context ),使用函數:
cl_context clCreateContext( |
cl_context_properties *properties, |
cl_uint num_devices, | |
const cl_device_id *devices, | |
void *pfn_notify (
const char *errinfo, ) , |
|
void *user_data, | |
cl_int *errcode_ret) |
eg:
context = clCreateContext(properties,1,&device,NULL,NULL,&error);
if(error != CL_SUCCESS){
LOGD("create context fail !\n");
exit(1);
}
作用:
1)協調主機同設備之間的交互機制;
2)管理設備上可用的內存對象;
3)跟蹤針對每個設備新建的kernel 和程序;
4.命令隊列(Command Queue),使用函數:
cl_command_queue clCreateCommandQueue( |
cl_context context, |
cl_device_id device, | |
cl_command_queue_properties properties, | |
cl_int *errcode_ret) |
eg:
queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
if(error != CL_SUCCESS){
LOGD("create command queue fail !\n");
exit(1);
}
作用:
1)通過提交命令到命令隊列開始與設備進行通信;
2)一種主機端向設備端發送請求的機制;
3)每個命令隊列只關聯一個設備,即每個設備創建一個命令隊列;
5.事件(Event),任何以clEnqueue 開頭的API 調用,都會產生一個事件:
作用:
1)表示依賴;
2)提供程序剖析機制;
6.內存(buffer)對象,使用函數:
cl_mem clCreateBuffer ( |
cl_context context, |
cl_mem_flags flags, | |
size_t size, | |
void *host_ptr, | |
cl_int *errcode_ret) |
eg:
cl_mem inbuf_a ;
cl_mem inbuf_b ;
cl_mem outbuf_r ;
inbuf_a = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_a,&error);
if(error!=CL_SUCCESS){
LOGD("create buffer inbuf_a fail !\n");
exit(1);
}
inbuf_b = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_b,&error);
if(error!=CL_SUCCESS){
LOGD("create buffer inbuf_b fail !\n");
exit(1);
}
outbuf_r = clCreateBuffer(context,CL_MEM_WRITE_ONLY,mem_size,NULL,&error);
if(error!=CL_SUCCESS){
LOGD("create buffer outbuf_r fail !\n");
exit(1);
}
作用:
1)新建buffer,進行上下文相關聯設備的輸入輸出數據;
7.OpenCL C代碼,稱爲kernel 的函數的集合,kernel是在設備上執行的程序:
eg:
const char *kernelSourceCode2 = KERNEL(
__kernel void warp(__global uint *buffer)
{
size_t gidx = get_global_id(0);
size_t gidy = get_global_id(1);
buffer[gidx + 4 * gidy] = (1 << gidx) | (0x10 << gidy);
}
);
或者:
const char* program_src = ""
"__kernel void vector_add_gpu (__global const float* src_a,\n"
" __global const float* src_b,\n"
" __global float* res,\n"
" const int num)\n"
"{\n"
" int idx = get_global_id(0);\n"
" if(idx<num){"
" res[idx]=src_a[idx]+src_b[idx];\n"
" }\n"
"}\n"
;
步驟如下:
1)源代碼以字符串形式存儲;
2)通過調用函數 clCreateProgramWithSource 將其轉換成一個cl_program對象;
cl_program clCreateProgramWithSource ( |
cl_context context, |
cl_uint count, | |
const char **strings, | |
const size_t *lengths, | |
cl_int *errcode_ret) |
eg:
cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
if(error!=CL_SUCCESS){
LOGD("create program fail !\n");
exit(1);
}
3)通過調用函數 clBuildProgram 在多個支持OpenCL 的設備上編譯程序對象;
cl_int clBuildProgram ( |
cl_program program, |
cl_uint num_devices, | |
const cl_device_id *device_list, | |
const char *options, | |
void (*pfn_notify)(cl_program, void *user_data), | |
void *user_data) |
eg:
error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
if(error!=CL_SUCCESS){
LOGD("build program fail !\n");
clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
LOGD("build_log : %s\n",build_log);
exit(1);
}
4)通過調用函數 clSetKernelArg 將所需的內存對象全部傳輸到設備上;
cl_int clSetKernelArg ( |
cl_kernel kernel, |
cl_uint arg_index, | |
size_t arg_size, | |
const void *arg_value) |
eg:
error = clSetKernelArg(vector_add_kernel,0,sizeof(cl_mem),&inbuf_a);
error |= clSetKernelArg(vector_add_kernel,1,sizeof(cl_mem),&inbuf_b);
error |= clSetKernelArg(vector_add_kernel,2,sizeof(cl_mem),&outbuf_r);
error |= clSetKernelArg(vector_add_kernel,3,sizeof(cl_int),&size);
if(error!=CL_SUCCESS){
LOGD("set kernel arg fail !\n");
exit(1);
}
5)通過調用 clEnqueueNDRangeKernel 開始執行kernel 程序;
cl_int clEnqueueNDRangeKernel ( |
cl_command_queue command_queue, |
cl_kernel kernel, | |
cl_uint work_dim, | |
const size_t *global_work_offset, | |
const size_t *global_work_size, | |
const size_t *local_work_size, | |
cl_uint num_events_in_wait_list, | |
const cl_event *event_wait_list, | |
cl_event *event) |
eg:
local_ws = 256;
nMaxWorkItemSizes[0]=256
global_ws = shrRoundUp(local_ws,size);
LOGD("local_ws=%d,global_ws=%d\n",local_ws,global_ws);
error = clEnqueueNDRangeKernel(queue,vector_add_kernel,1,NULL,&global_ws,&local_ws,0,NULL,NULL);
if(error!=CL_SUCCESS){
LOGD("enqueue kernel fail !\n");
exit(1);
}
參數解釋:
1)cl_command_queue command_queue :加入隊列
2)cl_kernel kernel :kernel句柄
3)cl_uint work_dim :項目維度(類似於for語句的幾重循環)
4)const size_t *global_work_offset :獲取初始索引號,(類似於for語句的循環從哪裏開始)
5)const size_t *global_work_size :總的項目數(類似於for語句總的執行次數)
6)const size_t *local_work_size :每個組內的項目數(必須能被總的項目是除盡)
7)cl_uint num_events_in_wait_list:等待事件數
8)const cl_event *event_wait_list:等待事件
9)cl_event *event:事件
8.將內存對象的結果回讀到Host,使用函數:
cl_int clEnqueueReadBuffer ( |
cl_command_queue command_queue, |
cl_mem buffer, | |
cl_bool blocking_read, | |
size_t offset, | |
size_t cb, | |
void *ptr, | |
cl_uint num_events_in_wait_list, | |
const cl_event *event_wait_list, | |
cl_event *event) |
eg:
error = clEnqueueReadBuffer(queue,outbuf_r,CL_TRUE,0,mem_size,vect_c,0,NULL,NULL);
if (status != CL_SUCCESS) {
LOGD("Error: Read buffer queue\n");
exit(1);
}
9.資源回收,使用函數:
status = clReleaseKernel(kernel);
status = clReleaseProgram(program);
status = clReleaseMemObject(outputBuffer);
status = clReleaseCommandQueue(commandQueue);
status = clReleaseContext(context);
10.基本API: