CUDA FORTRAN 测试程序 – 我的随笔小记

module simpleOps_m
contains
attributes(global) subroutine inc(a, b)
implicit none
real :: a(:,:)
real, value :: b
integer :: i, j, n(2)
i = (blockIdx%x-1)*blockDim%x + threadIdx%x
j = (blockIdx%y-1)*blockDim%y + threadIdx%y
!threadIdx%x，threadIdx%y每个线程块内的线程编号（二维）
!blockDim%x，blockDim%y每个线程块内含有的线程数（二维）
!blockIdx%x，blockIdx%y线程块编号（二维）
n = size(a)
if (i<=n(1) .and. j<=n(2)) a(i,j) = a(i,j) + b
!不能越界
end subroutine inc
end module simpleOps_m

program incTest
use cudafor
use simpleOps_m
implicit none
integer, parameter :: nx=1024, ny=512
!定义1024行512列的矩阵
real :: a(nx,ny), b
real, device :: a_d(nx,ny)
type(dim3) :: grid, tBlock
a = 1; b = 3
tBlock = dim3(32,8,1)
!单位线程块行计算线程数32，单位线程块列计算线程数为8
grid = dim3(ceiling(real(nx)/tBlock%x), ceiling(real(ny)/tBlock%y), 1)
!参与行计算的线程块数块为32，参与列计算的线程块数为64
!线程块数不能大于max grid dims
a_d = a
call inc<<<grid,tBlock>>>(a_d, b)
!调用格式kernelfunction<<<dimGrid,dimBlock>>>(parameters)
a = a_d
write(*,*) 'Max error: ', maxval(abs(a-4))
end program incTest

From：http://blog.163.com/zpfzcjndx@126/blog/static/635456812014121104122345/

Post Views: 1,223

打赏赞

一	二	三	四	五	六	日
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31