OpenCL的使用

例子: 對res數組求平均值ans爲結果

#include"CL/cl.h"
#include<iostream>
#define SIZE_X 10
#define SIZE_Y 10
const char* KernelAverage = 
	"const int dir[9][2] = { 1 , 0 , 0 , 1 , -1 , 0 , 0 , -1 , 1 , 1 , 1 , -1 , -1 , 1 , -1 , -1 , 0 , 0 };\n"
	"__kernel void GetAverage( __global int *res , __global int *ans ) \n"
	"{\n"
	"size_t x = get_global_id( 0 );\n"
	"size_t y = get_global_id( 1 );\n"
	"size_t x_size = get_global_size( 0 );\n"
	"size_t y_size = get_global_size( 1 );\n"
	"int tx , ty;\n"
	"int cnt = 0 , sum = 0;\n"
	"for( int i = 0 ; i < 9 ; ++i )\n"
	"{\n"
	"  tx = x + dir[i][0];\n"
	"  ty = y + dir[i][1];\n"
	"  if( tx < 0 || ty < 0 || tx >= x_size || ty >= y_size )\n"
	"  { continue; }\n"
	"  sum += res[tx + ty * x_size];\n"
	"  cnt++;\n"
	"}\n"
	"if( cnt != 0 )\n"
	"{ sum /= cnt; ans[x + y * x_size] = sum; }\n"
	"else\n"
	"{ ans[x + y * x_size] = 0; }\n"
	"}\n";

int main() {
	cl_int status = 0;
	cl_uint numPlatforms;
	/////////////////////// Get the number of the Platforms //////////////////////
	status = clGetPlatformIDs( 0 , NULL , &numPlatforms );
	if( status != CL_SUCCESS ) { 
		printf( "Error:Getting Platforms.(clGetPlatformsIDs)\n" );
		return EXIT_FAILURE;
	}
	//////////////////////////////////////////////////////////////////////////////

	if( numPlatforms <= 0 )
	{ printf("The number of the platform is ZERO\n"); return 0; }


	cl_platform_id *platforms = new cl_platform_id[numPlatforms];
	/////////////////////// Get the lists of the platforms ////////////////////////
	status = clGetPlatformIDs( numPlatforms , platforms , NULL );
	if ( status != CL_SUCCESS)
	{
		printf( "Error:Getting Platform Ids.(clGetPlatformsIDs)\n" );
		return -1;
	}
	//////////////////////// Get the AMD platform //////////////////////////////////
	cl_platform_id platform;
	for( unsigned int i = 0 ; i < numPlatforms ; ++i ) {
		char pff[100];
		status = clGetPlatformInfo( platforms[i] , CL_PLATFORM_VENDOR , sizeof( pff ) , pff , NULL );
		platform = platforms[i];
		if( !strcmp( pff , "Avanced Micro Devices , Inc." ) )
		{ break; }
	}
	delete []platforms;
	/////////////////////// Get the platform  context ////////////////////////////////
	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM , ( cl_context_properties )platform , 0 };
	cl_context_properties *cprops = ( NULL == platform ) ? NULL : cps;
	cl_context context = clCreateContextFromType( cprops , CL_DEVICE_TYPE_CPU , NULL , NULL , &status );
	if( status != CL_SUCCESS )
	{
		printf( "Error: Creating Context.( clCreateContextFromType )\n");
		return EXIT_FAILURE;
	}
	size_t deviceListSize;
	status = clGetContextInfo( context , CL_CONTEXT_DEVICES , 0 , NULL , &deviceListSize );
	if( status != CL_SUCCESS )
	{
		printf( "Error: Getting Context Info device list size. clGetContextInfo\n");
		return EXIT_FAILURE;
	}
	cl_device_id *devices = ( cl_device_id * )malloc( deviceListSize );
	if( devices == NULL )
	{
		printf( "Error: No devices found.\n");
		return EXIT_FAILURE;
	}
	status = clGetContextInfo( context , CL_CONTEXT_DEVICES , deviceListSize , devices , NULL );
	if( status != CL_SUCCESS )
	{
		printf( "Error: Getting Context Info device list , clGetContextInfo\n");
		return EXIT_FAILURE;
	}
	//////////////////////// Get the source code /////////////////////////////////////
	size_t sourceSize[] = { strlen( KernelAverage ) };
	//clCreateProgramWithBinary( context , 1 ,devices , sourceSize , , ,);
	cl_program program = clCreateProgramWithSource( context , 1 , &KernelAverage , sourceSize , &status );
	if( status != CL_SUCCESS ) {
		printf( "Error: Loading Binary into cl_program( clCreateProgramWithBinary )\n");
		return EXIT_FAILURE;
	}
	///////////////////////// Build Code /////////////////////////////////////////////
	status = clBuildProgram( program , 1 , devices , NULL , NULL , NULL );
	if( status != CL_SUCCESS ) {
		printf( "Error: Building Program( clBuildProgram )\n  %d" , status );
		if( status == CL_BUILD_PROGRAM_FAILURE ) {
			cl_int logStatus;
			char * buildLog = NULL;
			size_t buildLogSize = 0;
			logStatus = clGetProgramBuildInfo( program , devices[0] , CL_PROGRAM_BUILD_LOG , buildLogSize , buildLog , &buildLogSize );

			buildLog = ( char* )malloc( buildLogSize );
			memset( buildLog , 0 , buildLogSize );
			logStatus = clGetProgramBuildInfo( program , devices[0] , CL_PROGRAM_BUILD_LOG , buildLogSize , buildLog , NULL );

			std::cout << " \n\t\t\tBUILD LOG\n";
			std::cout << buildLog << std::endl;
			free( buildLog );
		}
		return EXIT_FAILURE;
	}
	//////////////////////// Create Kernel ///////////////////////////////////////////
	cl_kernel kernel = clCreateKernel( program , "GetAverage" , &status );
	if( status != CL_SUCCESS ){
		printf( "Error: Creating Kernel from program.(clCreateKernel)\n");
		return EXIT_FAILURE;
	}
	//////////////////////////// Create Command Queue /////////////////////////////////
	cl_command_queue commandQueue = clCreateCommandQueue( context , devices[0] , 0 , &status );
	if( status != CL_SUCCESS ) {
		printf( "Creating Command Queue.(clCreateCommandQueue)\n");
		return EXIT_FAILURE;
	}

	int ans[SIZE_Y][SIZE_X] = { 0 };
	int res[SIZE_Y][SIZE_X] = { 1 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
				  1 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
				  9 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
				  7 , 3 , 5 , 9 , 9 , 9 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 9 , 9 , 9 , 7 , 5 , 3 , 3 ,
				  1 , 3 , 5 , 9 , 9 , 9 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 };
	//memset( ans , 0 , sizeof( int ) * SIZE_X * SIZE_Y );
	
	///////////////////////// Create Argument /////////////////////////////////
	cl_mem resBuffer = clCreateBuffer( context , CL_MEM_USE_HOST_PTR , SIZE_X * SIZE_Y * sizeof( int ) ,
		res , &status );
	cl_mem ansBuffer = clCreateBuffer( context , CL_MEM_USE_HOST_PTR , SIZE_X * SIZE_Y * sizeof( int ) ,
		ans , &status );
	if( status != CL_SUCCESS ) {
		printf( "Error: clCreateBuffer.\n");
		return EXIT_FAILURE;
	}
	status = clSetKernelArg( kernel , 0 , sizeof( cl_mem ) ,( void* )&resBuffer );
	status = clSetKernelArg( kernel , 1 , sizeof( cl_mem ) ,( void* )&ansBuffer );
	if( status != CL_SUCCESS ) {
		printf( "Error: Setting kernel argument.\n");
		return EXIT_FAILURE;
	}

	//////////////////// Implement kernel ////////////////////////////////////////////
	size_t globalThread[] = { 10 , 10 };
	size_t localThread[] = { 2 , 2 };
	status = clEnqueueNDRangeKernel( commandQueue , kernel , 2 , NULL , globalThread , localThread , 0 , NULL , NULL );
	if( status != CL_SUCCESS ) {
		printf( "Error: Enqueueing kernel\n");
		return EXIT_FAILURE;
	}
	///////////////////////// Wait for kernel to finish ////////////////////////////////////
	status = clFinish( commandQueue );
	if( status != CL_SUCCESS ) {
		printf( "Error: Finish command queue\n");
		return EXIT_FAILURE;
	}
	//////////////////////////////////////////////////////////////////////////////////
	printf("out:\n");
	for( int i = 0 ; i < 10 ; ++i ) {
		for( int j = 0 ; j < 10 ; ++j ) {
			printf( "%3d " , ans[i][j] );
		}
        printf("\n");
	}
	return 0;
}


注意:在OpenCL中在可能的情況下,儘量少用global類型的數據,要多用local和private,頻繁的使用global會很影響性能。



發佈了69 篇原創文章 · 獲贊 4 · 訪問量 12萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章