更多darknet源代码学习笔记,参看:darknet源码学习:预测分类函数float *network_predict_gpu(network net, float *input)
将主机host的数据拷贝到GPU设备x_gpu中
void cuda_push_array(float *x_gpu, float *x, size_t n)
{
size_t size = sizeof(float)*n;
//cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
cudaError_t status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream());
CHECK_CUDA(status);
}
CudaDeviceSynchronize vs cudaThreadSynchronize vs cudaStreamSynchronize