▶ 书上第十三章,用一系列步骤优化一个云水参数化方案。用于熟悉 Fortran 以及 OpenACC 在旗下的表现
● 代码,文件较多,放在一起了
! main.f90
PROGRAM main
USE m_config, ONLY: nstop
USE m_physics, ONLY: physics
USE m_io, ONLY: write_output
USE m_setup, ONLY: initialize, cleanup
USE m_timing, ONLY: start_timer, end_timer, print_timers IMPLICIT NONE INTEGER :: ntstep
INTEGER, parameter :: itimloop = CALL initialize() ! 初始化计时器和设备 WRITE(*,"(A)") "Start of time loop"
CALL start_timer(itimloop, "Time loop") DO ntstep = , nstop ! 计算
CALL physics()
CALL write_output( ntstep )
END DO CALL end_timer( itimloop )
WRITE(*,"(A)") "End of time loop" CALL print_timers()
CALL cleanup() END PROGRAM main ! m_config.f90,运行参数
MODULE m_config
INTEGER, parameter :: nx = ! 经度网格数
INTEGER, parameter :: ny = ! 纬度网格数
INTEGER, parameter :: nz = ! 海拔网格数
INTEGER, parameter :: nstop = ! 时间步数
INTEGER, parameter :: nout = ! 输出间隔 END MODULE m_config ! m_fields.f90,场参数
MODULE m_fields
REAL*, ALLOCATABLE :: qv(:,:,:) ! 水蒸汽含量
REAL*, ALLOCATABLE :: t(:,:,:) ! 温度 END MODULE m_fields ! m_io.f90,输入输出函数
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep ! 当前时间片
INTEGER :: i, j, k
REAL* :: qv_mean ! 水蒸汽含量平均值(标量) IF (MOD(ntstep, nout) /= ) RETURN ! 当前时间片不作输出 qv_mean = .0D0 ! 计算均值并输出
DO k = , nz
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90,参数化方案
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) ! 参数化方案一
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev ! 输入维度
REAL*, INTENT(IN) :: t(:,:,:) ! 温度
REAL*, INTENT(OUT) :: qc(:,:,:) ! 云水含量
REAL*, INTENT(INOUT) :: qv(:,:,:) ! 水蒸汽含量
INTEGER :: i, j, k DO k = , nlev
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv) ! 参数化方案二
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k DO k = , nlev
DO j = , npy
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_physics.f90,参数化方案的执行
MODULE m_physics
USE m_config, ONLY: nx, ny, nz
USE m_fields, ONLY: qv, t
USE m_parametrizations, ONLY: saturation_adjustment, microphysics IMPLICIT NONE CONTAINS
SUBROUTINE physics()
IMPLICIT NONE
REAL* :: qc(nx,ny,nz) ! 云水含量临时变量
CALL saturation_adjustment(nx, ny, nz, t, qc, qv) ! 第一物理参数化
CALL microphysics(nx, ny, nz, t, qc, qv) ! 第二物理参数化
END SUBROUTINE physics END MODULE m_physics ! m_timming.f90,计时器
MODULE m_timing
IMPLICIT NONE INTEGER, PARAMETER :: ntimer= ! 计时器数量
REAL* :: rtimer(ntimer) ! 计时器
CHARACTER() :: timertag(ntimer) ! 计时器标签
INTEGER :: icountold(ntimer), & ! tick (start of timer section)
icountrate, & ! countrate of SYSTEM_CLOCK()
icountmax ! maximum counter value of SYSTEM_CLOCK() CONTAINS
SUBROUTINE init_timers() ! 初始化计时器
IMPLICIT NONE rtimer(:) = .0D0
timertag(:) = ""
icountold(:) = CALL SYSTEM_CLOCK( COUNT_RATE=icountrate, COUNT_MAX=icountmax )
END SUBROUTINE init_timers SUBROUTINE start_timer(id, tag) ! 开始计时
IMPLICIT NONE INTEGER, INTENT(IN) :: id
CHARACTER(*), INTENT(IN) :: tag IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceeds maximum timer number", ntimer
STOP
END IF IF (LEN_TRIM(timertag(id)) /= ) THEN ! 检查计时器是否已经开始运行
WRITE(*,"(A,I4)") "Error: timer already started previously, id:", id
STOP
END IF IF (LEN_TRIM(tag) == ) THEN ! 检查计时器标签是否非空
WRITE(*,"(A,I4)") "Error: empty tag provided, id:", id
STOP
END IF timertag(id) = TRIM(tag) ! 保存标签
!$acc wait CALL SYSTEM_CLOCK( COUNT=icountold(id) ) ! 开始计时
END SUBROUTINE start_timer SUBROUTINE end_timer(id) ! 结束计时
IMPLICIT NONE INTEGER, INTENT(IN) :: id
INTEGER :: icountnew IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceed max timer number", ntimer
STOP
END IF IF (LEN_TRIM(timertag(id)) == ) THEN ! 检查计时器是否已经开始运行
WRITE(*,"(A,I4)") "Error: Need to call start_timer before end_timing, id:", id
STOP
END IF
!$acc wait CALL SYSTEM_CLOCK( COUNT=icountnew ) ! 获取当前时间,计算耗时
rtimer(id) = ( REAL(icountnew - icountold(id), KIND(rtimer(id))) ) / REAL(icountrate, KIND(rtimer(id)))
END SUBROUTINE end_timer SUBROUTINE print_timers() ! 打印计时
IMPLICIT NONE INTEGER :: id WRITE(*,"(A)") "----------------------------"
WRITE(*,"(A)") "Timers:"
WRITE(*,"(A)") "----------------------------"
DO id = , ntimer
IF ( rtimer(id) > .0D0 ) THEN
WRITE(*,"(A15,A2,F8.2,A)") timertag(id), ": ", rtimer(id)*.0D3, " ms"
END IF
END DO
WRITE(*,"(A)") "----------------------------"
END SUBROUTINE print_timers END MODULE m_timing ! m_setup.f90,初始化和清理
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit = ! 计时器编号
INTEGER :: i, j, k #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#endif WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO #ifdef _OPENACC
CALL initialize_gpu()
#endif CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()! 让 GPU 跑一个小内核来初始化
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()! 清扫 t 和 qv 的内存
IMPLICIT NONE DEALLOCATE( t, qv )
END SUBROUTINE cleanup END MODULE m_setup
● OpenMP 优化,改了 m_io.f90,m_parametrizations.f90,m_setup.f90
! m_io.f90
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep
INTEGER :: i, j, k
REAL* :: qv_mean IF (MOD(ntstep, nout) /= ) RETURN qv_mean = .0D0
DO k = , nz
!$OMP PARALLEL DO PRIVATE(i,j) SHARED(k,qv) REDUCTION(+:qv_mean)
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(IN) :: t(:,:,:)
REAL*, INTENT(OUT) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$OMP PARALLEL
DO k = , nlev
!$OMP DO PRIVATE(i,j)
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
!$OMP END PARALLEL
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$OMP PARALLEL
DO k = , nlev
!$OMP DO PRIVATE(i,j)
DO j = , npy
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
!$OMP END PARALLEL
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_setup.f90
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit = ! timer ID
INTEGER :: i, j, k ! loop indices
INTEGER :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#ifdef _OPENMP
!$OMP PARALLEL
IF (OMP_GET_THREAD_NUM()==) THEN
WRITE(*,"(A,I4,A)") "Running with OpenMP with ", OMP_GET_NUM_THREADS(), " threads"
END IF
!$OMP END PARALLEL
#endif
#endif
WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO #ifdef _OPENACC
CALL initialize_gpu()
#endif CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()
IMPLICIT NONE DEALLOCATE( t, qv )
END SUBROUTINE cleanup END MODULE m_setup
● OpenACC 优化,改了 m_io.f90,m_parametrizations.f90,m_physics.f90,m_setup.f90。树上的优化 04 设计算法改动,没有参与比较
! m_io.f90
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep
INTEGER :: i, j, k
REAL* :: qv_mean IF (MOD(ntstep, nout) /= ) RETURN !$acc data present(qv)
qv_mean = .0D0
!$acc parallel
!$acc loop gang vector collapse() reduction(+:qv_mean)
DO k = , nz
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
!$acc end parallel
!$acc end data
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(IN) :: t(:,:,:)
REAL*, INTENT(OUT) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$acc data present(t,qv,qc)
!$acc parallel
!$acc loop gang vector collapse()
DO k = , nlev
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
!$acc end parallel
!$acc end data
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k
!$acc data present(t,qv,qc)
!$acc parallel
!$acc loop seq
DO k = , nlev
!$acc loop gang
DO j = , npy
!$acc loop vector
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
!$acc end parallel
!$acc end data
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_physics.f90
MODULE m_physics
USE m_config, ONLY: nx, ny, nz
USE m_fields, ONLY: qv, t
USE m_parametrizations, ONLY: saturation_adjustment, microphysics IMPLICIT NONE REAL*, ALLOCATABLE :: qc(:,:,:) ! 提前声明,由 init_physics 和 finalize_physics 来申请和释放 CONTAINS
SUBROUTINE physics()
IMPLICIT NONE CALL saturation_adjustment(nx, ny, nz, t, qc, qv)
CALL microphysics(nx, ny, nz, t, qc, qv)
END SUBROUTINE physics SUBROUTINE init_physics()
IMPLICIT NONE ALLOCATE( qc(nx,ny,nz) )
!$acc enter data create(qc)
END SUBROUTINE init_physics SUBROUTINE finalize_physics()
IMPLICIT NONE !$acc exit data delete(qc)
DEALLOCATE(qc)
END SUBROUTINE finalize_physics END MODULE m_physics ! m_setup.f90
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer
USE m_physics, ONLY: init_physics, finalize_physics
IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit =
INTEGER :: i, j, k #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#endif WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) !$acc enter data create(t,qv)
DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO
!$acc update device(t,qv) #ifdef _OPENACC
CALL initialize_gpu()
#endif
CALL init_physics()
CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()
IMPLICIT NONE !$acc exit data delete(t,qv)
DEALLOCATE( t, qv )
CALL finalize_physics()
END SUBROUTINE cleanup END MODULE m_setup
● 所有的输出结果。单独编译一个模式(而不使用默认的 makefile)时,在命令 pgf90 中要使用参数 -Mpreprocess,意思是将预编译器作用到 fortran 文件中,否则 m_setup.f90 中的 # 预编译命令会被当成错误
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ make example_serial example_openmp example_openacc1 example_openacc2 example_openacc3 example_openacc4
make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
compiling m_config.f90
compiling m_fields.f90
compiling m_io.f90
compiling m_parametrizations.f90
compiling m_physics.f90
compiling m_timing.f90
compiling m_setup.f90
compiling main.f90
make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openmp' ... ! 类似上面的过程 make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openacc4'
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_serial/example_serial
Running without OpenACC
Initialize
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 17.28 ms
Time loop : 978.08 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openmp/example_openmp
Running without OpenACC
Running with OpenMP with threads
Initialize
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 17.96 ms
Time loop : 898.92 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc1/example_openacc1
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 191.11 ms
Time loop : 1044.35 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc2/example_openacc2
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 176.72 ms
Time loop : 142.11 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc3/example_openacc3
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 162.15 ms
Time loop : 121.77 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc4/example_openacc4
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 152.47 ms
Time loop : 166.53 ms
----------------------------
● 所有的结果在 nvprof 中的图形。三张图分别为 “仅计算优化无数据优化”,“计算优化与数据优化”,“手工优化变量”