先上代碼:
對於函數saxpy_kernel來說,變量x,y 有device屬性,編譯器會知道傳過來的是設備數組,故不需要host_data導語。
-
! openacc_main.f90
-
program main
-
use saxpy_mod
-
integer, parameter :: N = 2**20
-
real, dimension(N) :: X, Y
-
-
X(:) = 1.0
-
Y(:) = 0.0
-
-
!$acc data copy(y) copyin(x)
-
call saxpy(N, 2.0, x, y)
-
!$acc end data
-
-
print *, y(1)
- end program
-
! kernels.cuf
-
module saxpy_mod
-
contains
-
attributes(global) &
-
subroutine saxpy_kernel(n, a, x, y)
-
real :: x(:), y(:), a
-
integer :: n,i
-
attributes(value) :: a,n
-
i = threadIdx%x+(blockIdx%x-1)*blockDim%x
-
if (i<=n) y(i) = y(i) + a*x(i)
-
end subroutine
-
subroutine saxpy (n, a, x, y)
-
use cudafor
-
real, device :: x(:), y(:)
-
real :: a
-
integer :: n
-
call saxpy_kernel<<<4096,256>>>(n, a, x, y)
-
end subroutine
- end module saxpy_mod