CUDA束表決函數
束表決函數:簡單的理解就是在一個warp內進行表決
__all(int predicate):指的是predicate與0進行比較,如果當前線程所在的Wrap所有線程對應predicate不爲0,則返回1。
__any(int predicate):指的是predicate與0進行比較,如果當前線程所在的Wrap有一個線程對應的predicate值不爲0,則返回1。
__ballot(int predicate):指的是當前線程所在的Wrap中第N個線程對應的predicate值不爲0,則將整數0的第N位進行置位。
//
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
#include <stdio.h>
__global__ void vote_all(int *a, int *b, int n)
{
int tid = threadIdx.x;
if (tid > n)
{
return;
}
int temp = a[tid];
b[tid] = __all(temp >100);
}
__global__ void vote_any(int *a, int *b, int n)
{
int tid = threadIdx.x;
if (tid > n)
{
return;
}
int temp = a[tid];
b[tid] = __any(temp >100);
}
__global__ void vote_ballot(int *a, int *b, int n)
{
int tid = threadIdx.x;
if (tid > n)
{
return;
}
int temp = a[tid];
b[tid] = __ballot(temp >100);
}
int main()
{
int *h_a, *h_b, *d_a, *d_b;
int n = 256, m = 10;
int nsize = n * sizeof(int);
h_a = (int *)malloc(nsize);
h_b = (int *)malloc(nsize);
for (int i = 0; i < n; ++i)
{
h_a[i] = i;
}
memset(h_b, 0, nsize);
cudaMalloc(&d_a, nsize);
cudaMalloc(&d_b, nsize);
cudaMemcpy(d_a, h_a, nsize, cudaMemcpyHostToDevice);
cudaMemset(d_b, 0, nsize);
vote_all<< <1, 256 >> >(d_a, d_b, n);
cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost);
printf("vote_all():");
for (int i = 0; i < n; ++i)
{
if (!(i % m))
{
printf("\n");
}
printf("%d", h_b[i]);
}
printf("\n");
vote_any<<<1, 256 >> >(d_a, d_b, n);
cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost);
printf("vote_any():");
for (int i = 0; i < n; ++i)
{
if (!(i % m))
{
printf("\n");
}
printf("%d", h_b[i]);
}
printf("\n");
vote_ballot<< <1, 256 >> >(d_a, d_b, n);
cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost);
printf("vote_ballot():");
for (int i = 0; i < n; ++i)
{
if (!(i % m))
{
printf("\n");
}
printf("%d", h_b[i]);
}
printf("\n");
}
#include <iostream>
using namespace std;
int main()
{
int state = 0;
int start = 10;
for (int i = start; i <32; ++i)
{
state |= (1<< i);
}
cout<< state<< endl;
}
置位可以用或操作符“|”實現:y = x | (1 << n) 對x的第n位進行置位
清楚可以用與操作符”&“實現:y = x & (~(1 << n))
取反可以用異或操作符”^“實現: y = x ^ (1 << n)
Bit提取操作: bit = (x | (1 << n)) >> n;