#include "device_functions.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "stdlib.h"
#include <iostream>
using namespace std;
#define LENG 117//數組長度
//返回thread和block
int getThreadNum()
{
cudaDeviceProp prop;//cudaDeviceProp的一個對象
int count = 0;//GPU的個數
cudaGetDeviceCount(&count);
std::cout << "gpu 的個數:" << count << '\n';
cudaGetDeviceProperties(&prop, 0);//第二參數爲那個gpu
cout << "最大線程數:" << prop.maxThreadsPerBlock << endl;
cout << "最大網格類型:" << prop.maxGridSize[0] << '\t' << prop.maxGridSize[1] << '\t' << prop.maxGridSize[2] << endl;
return prop.maxThreadsPerBlock;
}
//形參:枚舉類型
//判斷是否成功分配內存
void GetCudaCalError(cudaError err)
{
if (err != cudaSuccess)
{
cout << "分配內存失敗!程序結束!";
}
return;
}
//這個函數有侷限性:不能放到多個block上運行
__global__ void sumReduced(float*aGpu, float*sumGpu, int countNum)
{
const int id = threadIdx.x;
//定義一個共享內存
__shared__ float sData[LENG];
//爲其賦值
sData[id] = aGpu[id];
//等待每個線程賦值完成
__syncthreads();
//實現歸約求和
/*
1、每個數和當前索引加上總數一半的值相加,如果超出最大索引數就加0
2、等到所有線程計算完畢
3、線程數減半,直到減到1
*/
int leng = LENG;
for (int i = countNum / 2.0 + 0.5; i > 1; i = i / 2.0 + 0.5)
{
if (id < i)
{
if (id + i < leng)
{
sData[id] += sData[id + i];
}
//sData[id] += sData[id + i];
}
__syncthreads();
leng = leng / 2.0 + 0.5;
}
if (id == 0)
{
sumGpu[0] = sData[0]+ sData[1];
}
}
int main()
{
float a[LENG];
float asum = 0;
for (int i = 0; i < LENG; ++i)
{
a[i] = i * (i + 1);
//cout << a[i] << '\t';
}
//定義Device上的內存
float *aGpu = 0;
float *sumGpu = 0;
//爲其開闢內存
GetCudaCalError(cudaMalloc(&aGpu, LENG * sizeof(float)));
GetCudaCalError(cudaMalloc(&sumGpu, 1 * sizeof(float)));
//給aGpu 賦值
cudaMemcpy(aGpu, a, LENG * sizeof(float), cudaMemcpyHostToDevice);
//開一個block,每個block裏面有16個thread
sumReduced << <1, LENG >> > (aGpu, sumGpu, LENG);
//將結果傳回host
cudaMemcpy(&asum, sumGpu, 1 * sizeof(float), cudaMemcpyDeviceToHost);
cout << "cuda數組和爲:" << asum << endl;
cudaFree(aGpu);
cudaFree(sumGpu);
float testSum = 0;
for (int i = 0; i < LENG; ++i)
{
testSum += a[i];
}
cout << "for數組和爲:" << testSum << endl;
}
cuda歸約求和最終版
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.