CUDA Python -- 編程基礎以及圖像處理(代碼)

在這裏插入圖片描述
一個 Grid 分成 按維度分成多個Block,個數爲 GridDim.x * GridDim.y
遍歷: blockIdx.x , blockIdx.y

一個Block 按維度分成多個Thread,個數爲 BlockDim.x * BlockDim.y
Thread 是最小的運行單元

遍歷:threadIdx.x , threadIdx.y

圖像處理中,一個像素對應到一個thread 中。
從 thread 映射 到 圖像 pix 方式 :

ix = blockIdx.x*blockDim.x + threadIdx.x;
iy = blockIdx.y*blockDim.y + threadIdx.y;

# -*- coding: utf-8 -*-

import cv2
import numpy as np
from numba import cuda
import time
import math

#GPU function
@cuda.jit
def process_gpu(img,channels):
    tx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
    ty = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
    for c in range(channels):
        color = img[tx,ty][c]*2.0 + 30
        if color>255:
            img[tx,ty][c] = 255
        elif color < 0:
            img[tx,ty][c] = 0
        else:
            img[tx,ty][c] = color

#cpu function
            
def process_cpu(img,channels):
    rows,cols,channels = img.shape
    for i in range(rows):
        for j in range(cols):
            for c in range(channels):
                color = img[i,j][c]*2.0 + 30
                if color>255:
                    img[i,j][c] = 255
                elif color < 0:
                    img[i,j][c] = 0
                else:
                    img[i,j][c] = color
 
if __name__ == "__main__":
    # 創建圖像
    img = cv2.imread("DSC00070.jpg")
    # print(img)
    rows,cols,channels = img.shape
    dst_cpu = img.copy()
    dst_gpu = img.copy()
    
    start_cpu = time.time()
    process_cpu(dst_cpu,channels)
    end_cpu = time.time()
    time_cpu = end_cpu - start_cpu
    print("CPU process time: "+str(time_cpu))
    
    # GPU function
    dImg = cuda.to_device(img)
    threadsperblock = (16,16)
    blockspergrid_x = int(math.ceil(rows/threadsperblock[0]))
    blockspergrid_y = int(math.ceil(cols/threadsperblock[1]))
    blockspergrid = (blockspergrid_x,blockspergrid_y)
    
    cuda.synchronize()
    start_gpu = time.time()
    process_gpu[blockspergrid,threadsperblock](dImg,channels)
    cuda.synchronize()
    end_gpu = time.time()
    dst_gpu = dImg.copy_to_host()
    time_gpu = end_gpu - start_gpu
    print("GPU process time: "+str(time_gpu))
    
    # print(type(dst_cpu))
    cv2.imwrite("result_cpu.jpg", dst_cpu)
    cv2.imwrite("result_gpu.jpg", dst_gpu)
    print("Done.")
    

結果:

CPU process time: 523.7896401882172
GPU process time: 0.17055010795593262
Done.

原圖:
在這裏插入圖片描述
處理後結果:
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章