北京并行科技GPU-CUDA Fortran计算平台配置

启用MPI OpenMP

module load nvhpc/nvhpc/21.7_cuda11.0_openmpi4.0.5_gcc7.5_qd

调用ifort 编译器

module load intel/parallelstudio/2017.1.5

Step1 抢占一个GPU计算节点

[scv1172@ln01 xyqtest2022]$ salloc --gpus=1

salloc:Pending job allocation 308347

salloc:job 308347 queued and waiting for resources

salloc:job 308347 has been allocated resources

salloc:Granted job allocation 308347

salloc:Waiting for resource configuration

salloc:Nodes g0001 are ready for job (找到的计算节点为g0001)

Step2 用ssh命令加载找到的节点 g0001

[scv1172@ln01 xyqtest2022]$ ssh g0001

Warning:Permanently added 'g0001,192.168.11.1' (ECDSA) to the list of known hosts.

[scv1172@g0001 ~]$

以上结果表明加载g0001节点成功

Step3 查询NVHPC：用 module ava命令查找是否安装nvhpc，以及版本

[scv1172@g0001 ~]$ module ava nvhpc

-------------------
/data/apps/modulefiles --------------------------

nvhpc/nvhpc/20.7_cuda10.1_openmpi3.1.5

nvhpc/nvhpc/20.7_cuda10.2_openmpi3.1.5

nvhpc/nvhpc/21.7_cuda11.0_openmpi4.0.5_gcc7.5_qd

nvhpc/nvhpc_byo_compiler/20.7_cuda10.1

nvhpc/nvhpc_byo_compiler/20.7_cuda10.2

nvhpc/nvhpc_byo_compiler/21.7_cuda11.0_gcc7.5

nvhpc/nvhpc_nompi/20.7_cuda10.1

nvhpc/nvhpc_nompi/20.7_cuda10.2

nvhpc/nvhpc_nompi/21.7_cuda11.0_gcc7.5_qd

Step 4 用module load加载相应版本的nvhpc

[scv1172@g0001 ~]$ module load nvhpc/nvhpc_nompi/21.7_cuda11.0_gcc7.5_qd

Step 5 用which命令检查nvhpc的加载是否成功

[scv1172@g0001 xyqtest2022]$ which nvfortran

/data/apps/nvhpc/nvhpc_21.7/Linux_x86_64/21.7/compilers/bin/nvfortran

[scv1172@g0001
xyqtest2022]$

以上结果表明相应版本的nvhpc加载成功

利用以上5步则可以对Cuda-GPU的计算节点进性配置

测试代码(命名为test.cuf)~~~~~~

program deviceQuery
use cudafor
type (cudaDeviceProp) :: prop
integer :: nDevices=0, i, ierr
! Number of CUDA -capable devices
ierr = cudaGetDeviceCount(nDevices)
if (nDevices == 0) then
write(*,"(/,'No CUDA devices found ',/)")
stop
else if (nDevices == 1) then
write(*,"(/,'One CUDA device found ',/)")
else
write(*,"(/,i0,' CUDA devices found ',/)") nDevices
end if

! Loop over devices
do i = 0, nDevices -1
write(*,"('Device Number: ',i0)") i
ierr = cudaGetDeviceProperties(prop , i)
! General device info
write(*,"(' Device Name: ',a)") trim(prop%name)
write(*,"(' Compute Capability: ',i0,'.',i0)") &
prop%major , prop%minor
write(*,"(' Number of Multiprocessors: ',i0)") &
prop%multiProcessorCount
write(*,"(' Max Threads per Multiprocessor: ',i0)") &
prop%maxThreadsPerMultiprocessor
write(*,"(' Global Memory (GB): ',f9.3,/)") &
prop%totalGlobalMem /1024.0**3

! Execution Configuration
write(*,"(' Execution Configuration Limits ')")
write(*,"(' Max Grid Dims: ',2(i0,' x '),i0)") &
prop%maxGridSize
write(*,"(' Max Block Dims: ',2(i0,' x '),i0)") &
prop%maxThreadsDim
write(*,"(' Max Threads per Block: ',i0 ,/)") &
prop%maxThreadsPerBlock
enddo
end program deviceQuery

~编译运行~~~~~

[scv1172@g0001 xyqtest2022]$ nvfortran *.cuf -o out

[scv1172@g0001 xyqtest2022]$ ./out
One CUDA device found
Device Number: 0
Device Name: Tesla V100-SXM2-32GB
Compute Capability: 7.0
Number of Multiprocessors: 80
Max Threads per Multiprocessor: 2048
Global Memory (GB): 31.749
Execution Configuration Limits
Max Grid Dims: 2147483647 x 65535 x 65535
Max Block Dims: 1024 x 1024 x 64
Max Threads per Block: 1024

Post Views: 970

打赏赞

一	二	三	四	五	六	日
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30	31