PukiWiki - 6.データ可視化のバックアップ差分(No.3)

追加された行はこの色です。
削除された行はこの色です。
現在（&lastmod;）作成中です。
既に書いている内容も&color(#ff0000){大幅に変わる};可能性が高いので注意。


-------

神戸大学 大学院システム情報学研究科 計算科学専攻 陰山 聡

-------
#contents
-------



* 前回のレポート（並列化）の解説 [#i30cd8ba]


** 問題設定の復習 [#vd5147b0]

#ref(problem_thermal.jpg)
- 2次元正方形領域 [0,1]×[0,1] での熱伝導を考える
- 境界をすべて0℃に固定
- 領域全体に一定の熱を加える
- ⇒ このとき，十分な時間が経った後での温度分布はどうなるか？

** 非並列版コードの復習 [#f30f5e5d]
** 非並列版コードの復習 [#x3196833]

熱伝導とは注目する一点が、「近所」の温度と等しくなろうという傾向である。
⇒周囲の温度の平均値になろうとする。
熱源があれば、一定の割合で温度が上がろうとする。


ヤコビ法アルゴリズム。
#ref(jacobi_method.jpg)

 do j=1, m
     do i=1, m
         uij(n+1) = (ui-1,j(n) + ui+1,j(n) + ui,j-1(n) + ui,j+1(n) ) / 4 + fij
     end do
 end do

このヤコビ法に基づいたコード heat1.f90を前回の演習で見た。

** heat1_wi_comments.f90 （詳細コメント付きバージョン） [#eb9ddd53]
** heat1_wi_comments.f90 （詳細コメント付きバージョン） [#zb7771b2]

今回は、まずこのheat1.f90を少し丁寧に見る。

 ! 
 ! heat1_wi_comments.f90
 !
 !-----------------------------------------------------------------------
 ! Time development form of the thermal diffusion equation is
 !    \partial T(x,y,t) / \partial t = \nabla^2 T(x,y,t) + heat_source.
 ! In the stationary state, 
 !    \nabla^2 T(x,y) + heat_source = 0.
 ! The finite difference method with grid spacings dx and dy leads to
 !    (T(i+1,j)-2*T(i,j)+T(i-1,j))/dx^2 
 !  + (T(i,j+1)-2*T(i,j)+T(i,j-1))/dy^2 + heat_source = 0.
 ! When dx=dy=h,
 !    T(i,j) = (T(i+1,j)+T(i-1,j)+T(i,j+1)+T(i,j-1))/4+heat_source*h^2/4.
 ! This suggests a relaxation method called Jacobi method adopted in
 ! this code.
 !-----------------------------------------------------------------------
 
 ! 
 ! heat1_wi_comments.f90
 !
 !-----------------------------------------------------------------------
 ! Time development form of the thermal diffusion equation is
 !    \partial T(x,y,t) / \partial t = \nabla^2 T(x,y,t) + heat_source.
 ! In the stationary state, 
 !    \nabla^2 T(x,y) + heat_source = 0.
 ! The finite difference method with grid spacings dx and dy leads to
 !    (T(i+1,j)-2*T(i,j)+T(i-1,j))/dx^2 
 !  + (T(i,j+1)-2*T(i,j)+T(i,j-1))/dy^2 + heat_source = 0.
 ! When dx=dy=h,
 !    T(i,j) = (T(i+1,j)+T(i-1,j)+T(i,j+1)+T(i,j-1))/4+heat_source*h^2/4.
 ! This suggests a relaxation method called Jacobi method adopted in
 ! this code.
 !-----------------------------------------------------------------------
 
 program heat1_wi_comments
   implicit none
   integer, parameter :: m=31            ! mesh size
   integer            :: nmax=20000      ! max loop counter
   integer :: i,j,n
   integer, parameter :: SP = kind(1.0)
   integer, parameter :: DP = selected_real_kind(2*precision(1.0_SP))
   real(DP), dimension(:,:), allocatable :: u    ! temperature field
   real(DP), dimension(:,:), allocatable :: un   ! work array
   real(DP) :: heat=1.0_DP       ! heat source term; uniform distribution.
   real(DP) :: h                 ! grid size
 
   !                    |<----- 1.0 ----->|
   !               j=m+1+-------u=0-------+  ---
   !               j=m  |                 |   ^
   !                .   |                 |   | 
   !                .   |     uiform      |   | 
   !                   u=0    heat       u=0 1.0   
   ! j-direction    .   |     source      |   | 
   !(y-direction)  j=3  |                 |   | 
   !     ^         j=2  |                 |   v 
   !     |         j=1  +-------u=0-------+  ---
   !     |             i=0 1 2 ...      i=m+1
   !     |
   !     +-------> i-direction (x-direction)
   !
   ! when m = 7, 
   !       |<-------------------- 1.0 -------------------->|
   !       |                                               |
   !       |  h     h     h     h     h     h     h     h  |
   !       |<--->|<--->|<--->|<--->|<--->|<--->|<--->|<--->|
   !       +-----+-----+-----+-----+-----+-----+-----+-----+
   !     i=0     1     2     3     4     5     6     7     8 
   !
 
   if ( mod(m,2)==0 ) then
      print *, 'm must be odd to have a grid on the center.'
      stop
   end if
 
   allocate( u(0:m+1,0:m+1) )    ! memory allocation of 2-D array.
                                 ! you can access each element of u
                                 ! as u(i,j), where 0<=i<=m+1 
                                 ! and  0<=j<=m+1.
 
   allocate( un(m,m) )           ! another memory allocation. in
                                 ! this case un(i,j) with
                                 ! 1<=i<=m and 1<=j<=m.
 
   h = 1.0_DP/(m+1)              ! grid spacing.
 
   u(:,:) = 0.0_DP  ! initial temperature is zero all over the square.
 
   do n = 1 , nmax     ! relaxation loop 
     do j = 1 , m      ! in the y-direction
       do i = 1 , m
         un(i,j)=(u(i-1,j)+u(i+1,j)+u(i,j-1)+u(i,j+1))/4.0_DP+heat*h*h
       end do
     end do                      ! same as the following do-loops:
     u(1:m,1:m) = un(1:m,1:m)    ! do j = 1 , m
                                 !   do i = 1 , m
                                 !     u(i,j) = un(i,j)
                                 !   end do
                                 ! end do
     if ( mod(n,100)==0 ) print *, n, u(m/2+1,m/2+1)  ! temperature at
   end do                                             ! the center
 end program heat1_wi_comments

 
** 再びheat1.f90（コメントなしコード） [#f1a80a08]
** 再びheat1.f90（コメントなしコード） [#sf947cfe]

次に詳しいコメントをはずしてもう一度もとのheat1.f90を見てみる。
（ただし、先週のコードから本質的でないところを少しだけ修正している。）

 program heat1
   implicit none
   integer, parameter :: m=31, nmax=20000
   integer :: i,j,n
   integer, parameter :: SP = kind(1.0)
   integer, parameter :: DP = selected_real_kind(2*precision(1.0_SP))
   real(DP), dimension(:,:), allocatable :: u, un
   real(DP) :: h, heat=1.0_DP
 
   if ( mod(m,2)==0 ) then
      print *, 'm must be odd to have a grid on the center.'
      stop
   end if
 
   allocate(u(0:m+1,0:m+1))
   allocate(un(m,m))
 
   h=1.0_DP/(m+1)
   u(:,:) = 0.0_DP
 
   do n=1, nmax
     do j=1, m
       do i=1, m
         un(i,j)=(u(i-1,j)+u(i+1,j)+u(i,j-1)+u(i,j+1))/4.0_DP+heat*h*h
       end do
     end do
     u(1:m,1:m)=un(1:m,1:m)
     if (mod(n,100)==0) print *, n, u(m/2+1,m/2+1)
   end do
 end program heat1



** 先週の課題（演習3-3） [#f55184af]
- heat1.f90 を MPI を用いて並列化せよ
// - 4 または 8 プロセスで実行し，heat1.f90 と出力結果が同じであることを確認せよ
// - 余裕があれば，プロセス数を 1，2，4，8，16 と変えて実行し，計算時間の変化を調べよ。また，加速率を求めよさらに余裕があれば，問題サイズ m を 100，200 と大きくして同様の実験を行い，加速率を調べよ

** 解答例（heat2.f90） [#jbb4d6d8]
** 解答例（heat2.f90） [#lea21b6e]

 program heat2
   use mpi
   implicit none
   integer, parameter :: m=31, nmax=20000
   integer :: i,j,jstart,jend,n
   integer, parameter :: SP = kind(1.0)
   integer, parameter :: DP = selected_real_kind(2*precision(1.0_SP))
   real(DP), dimension(:,:), allocatable :: u, un
   real(DP) :: h, heat=1.0_DP
   integer ::  nprocs,myrank,ierr,left,right
   integer, dimension(MPI_STATUS_SIZE) :: istat
 
   if ( mod(m,2)==0 ) then
      print *, 'm must be odd to have a grid on the center.'
      stop
   end if
 
   call mpi_init(ierr)
   call mpi_comm_size(MPI_COMM_WORLD,nprocs,ierr)
   call mpi_comm_rank(MPI_COMM_WORLD,myrank,ierr)
 
   jstart=m*myrank/nprocs+1
   jend=m*(myrank+1)/nprocs
   allocate(u(0:m+1,jstart-1:jend+1))
   allocate(un(m,jstart:jend))
 !
 ! when m = 7, nprocs = 3
 ! 
 !     |<-------------------- 1.0 -------------------->|      
 !     |                                               |
 !     |  h  |  h     h     h     h     h     h     h  |
 !     |<--->|<--->|<--->|<--->|<--->|<--->|<--->|<--->|
 !     |     |     |     |     |     |     |     |     |
 !     +-----+-----+-----+-----+-----+-----+-----+-----+
 !   j=0     1     2     3     4     5     6     7     8
 !     +-----+-----+-----+-----+-----+-----+-----+-----+
 !     |  jstart   |     |           |     |     |     | 
 !     |     |   jend    |           |   jstart  |     | 
 !     o-----rank=0------o           |     |    jend   |
 !                 |     |           |     |     |     |
 !                 |   jstart      jend    |     |     |
 !                 |     |           |     |     |     |
 !                 o------- rank=1---------o     |     |
 !                                   |     |     |     | 
 !                                   o------rank=3-----o
 !
   h = 1.0_DP/(m+1)
 
   jstart = m*myrank/nprocs+1
   jend   = m*(myrank+1)/nprocs
 
   allocate( u(0:m+1,jstart-1:jend+1))
   allocate(un(    m,  jstart:jend  ))
 
   u(:,:) = 0.0_DP
 
   left=myrank-1
   if (myrank==0) left=nprocs-1
   right=myrank+1
   if (myrank==nprocs-1) right=0
   left = myrank-1
   if ( myrank==0 )        left  = nprocs-1
   right = myrank+1
   if ( myrank==nprocs-1 ) right = 0
 
   do n=1, nmax
     call mpi_sendrecv(u(1,jend),m,MPI_DOUBLE_PRECISION,right,100, &
 &                     u(1,jstart-1),m,MPI_DOUBLE_PRECISION,left,100, &
 &                     MPI_COMM_WORLD,istat,ierr)
     call mpi_sendrecv(u(1,jstart),m,MPI_DOUBLE_PRECISION,left,100, &
 &                     u(1,jend+1),m,MPI_DOUBLE_PRECISION,right,100, &
 &                     MPI_COMM_WORLD,istat,ierr)
     call mpi_sendrecv(u(1,jend),m,MPI_DOUBLE_PRECISION,right,100,       &
                       u(1,jstart-1),m,MPI_DOUBLE_PRECISION,left,100,    &
                       MPI_COMM_WORLD,istat,ierr)
     call mpi_sendrecv(u(1,jstart),m,MPI_DOUBLE_PRECISION,left,100,      &
                       u(1,jend+1),m,MPI_DOUBLE_PRECISION,right,100,     &
                       MPI_COMM_WORLD,istat,ierr)
 
     if (myrank==0) u(1:m,0)=0.0_DP
     if (myrank==nprocs-1) u(1:m,m+1)=0.0_DP
     if (myrank==0)        u(1:m,  0) = 0.0_DP
     if (myrank==nprocs-1) u(1:m,m+1) = 0.0_DP
 
     do j=jstart, jend
       do i=1, m
     do j = jstart , jend
       do i = 1 , m
         un(i,j)=(u(i-1,j)+u(i+1,j)+u(i,j-1)+u(i,j+1))/4.0_DP+heat*h*h
       end do
     end do
     u(1:m,jstart:jend)=un(1:m,jstart:jend)
 
     if (jstart<=m/2+1 .and. jend>=m/2+1) then
       if (mod(n,100)==0) print *, n, u(m/2+1,m/2+1)
     end if
   end do
   call mpi_finalize(ierr)
 end program heat2




** コード解説 （コメント付きソースコード heat2_wi_comments.f90） [#nc308a69]
** コード解説 （コメント付きソースコード heat2_wi_comments.f90） [#x1dffcc8]
上のheat2.f90を解説するために詳しいコメントをつけたソースコードheat2_wi_comments.f90を以下に示す。

 program heat2_wi_comments
   use mpi
   implicit none
   integer, parameter :: m=31            ! mesh size
   integer, parameter :: nmax=20000      ! max loop counter
   integer :: i,j,jstart,jend,n
   integer, parameter :: SP = kind(1.0)
   integer, parameter :: DP = selected_real_kind(2*precision(1.0_SP))
   real(DP), dimension(:,:), allocatable :: u  ! temperature field
   real(DP), dimension(:,:), allocatable :: un ! work array
   real(DP) :: heat=1.0_DP   ! heat source term. uniform distribution.
   real(DP) :: h             ! grid spacing; dx=dy=h
   integer  :: nprocs        ! number of MPI processes
   integer  :: myrank        ! my rank number
   integer  :: left, right   ! nearest neighbor processes
   integer, dimension(MPI_STATUS_SIZE) :: istat ! used for MPI
   integer  :: ierr          ! used for mpi routines
 
   if ( mod(m,2)==0 ) then   ! to print out the centeral temperature.
      print *, 'm must be odd to have a grid on the center.'
      stop
   end if
 
   call mpi_init(ierr)
   call mpi_comm_size(MPI_COMM_WORLD,nprocs,ierr)
   call mpi_comm_rank(MPI_COMM_WORLD,myrank,ierr)
 
   h = 1.0_DP/(m+1)          ! grid spacings. see the comment fig below.
 
   jstart = m*myrank/nprocs+1   ! you are in charge of this point
   jend   = m*(myrank+1)/nprocs ! ...to this points. see the fig below.
 
 !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 !
 ! when m = 7, nprocs = 3
 ! 
 !     |<-------------------- 1.0 -------------------->|      
 !     |                                               |
 !     |  h  |  h     h     h     h     h     h     h  |
 !     |<--->|<--->|<--->|<--->|<--->|<--->|<--->|<--->|
 !     |     |     |     |     |     |     |     |     |
 !     +-----+-----+-----+-----+-----+-----+-----+-----+
 !   j=0     1     2     3     4     5     6     7     8
 !     +-----+-----+-----+-----+-----+-----+-----+-----+
 !     |  jstart   |     |           |     |     |     | 
 !     |     |   jend    |           |   jstart  |     | 
 !     o-----rank=0------o           |     |    jend   |
 !                 |     |           |     |     |     |
 !                 |   jstart      jend    |     |     |
 !                 |     |           |     |     |     |
 !                 o------- rank=1---------o     |     |
 !                                   |     |     |     | 
 !                                   o------rank=3-----o
 !                                   o------rank=2-----o
 !
 !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 !
 ! Another example, when m=17, nprocs=6
 !
 !  j=0  1  2  3  4  5  6  7  8  9  10 11 12 13 14 15 16 17 18
 !    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
 !    |        |        |     |           |        |        |
 !    o==rank0=o        |     o===rank3===o        |        |
 !          |           |        |     |           |        |
 !          o===rank1===o        |     o===rank4===o        |
 !                   |           |              |           |
 !                   o===rank2===o              o===rank5===o
 ! each process      |           |
 ! has 2D array  u[i,jstart-1:jend+1] 
 !
 !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
   allocate( u(0:m+1,jstart-1:jend+1)) ! 2-D array for temperature.
   allocate(un(m,jstart:jend))         ! 2-D work array.
   u(:,:) = 0.0_DP
 
   left  = myrank-1  ! left neighbor.
   if (myrank==0)        left = nprocs-1  ! a dummy periodic distribution
   right = myrank+1  ! right neighbor.    ! for code simplicity. this has
   if (myrank==nprocs-1) right = 0        ! actually no effect.
 
   do n = 1 , nmax   ! main loop for the relaxation, to equilibrium.
     call mpi_sendrecv(u(1,jend),    m,MPI_DOUBLE_PRECISION,right,100,   &
                       u(1,jstart-1),m,MPI_DOUBLE_PRECISION,left,100,    &
     !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     !
     !                  jend                          jstart      
     !   <(left)>        |                              |    <(right)>
     !      - --o--o--o--o--o                        o--o--o--o--o-- -
     !              send |  ^ recv                   ^  |
     !                   |  |                        |  |
     !                 [2]  [3]                    [1]  [4]
     !                   |  |                        |  |
     !              recv v  | send                   |  v
     !                   o--o--o--o-- - - --o--o--o--o--o
     !                      |                        |
     !                    jstart                    jend
     !                               <(myself)>
     !                     
     !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     call mpi_sendrecv(u(1,jend),    m,MPI_DOUBLE_PRECISION,right,100,  & ! [1]
                       u(1,jstart-1),m,MPI_DOUBLE_PRECISION,left,100,   & ! [2]
                       MPI_COMM_WORLD,istat,ierr)
     call mpi_sendrecv(u(1,jstart),m,MPI_DOUBLE_PRECISION,left, 100,     &
                       u(1,jend+1),m,MPI_DOUBLE_PRECISION,right,100,     &
     call mpi_sendrecv(u(1,jstart),m,MPI_DOUBLE_PRECISION,left, 100,    & ! [3]
                       u(1,jend+1),m,MPI_DOUBLE_PRECISION,right,100,    & ! [4]
                       MPI_COMM_WORLD,istat,ierr)
 
     if (myrank==0)        u(1:m,0  ) = 0.0_DP   ! to keep the boundary 
     if (myrank==nprocs-1) u(1:m,m+1) = 0.0_DP   ! condition (temp=0).
 
     do j = jstart , jend   ! Jacobi method. 
       do i = 1 , m         ! no need to calculate the boundary i=0 and m+1.
         un(i,j)=(u(i-1,j)+u(i+1,j)+u(i,j-1)+u(i,j+1))/4.0_DP+heat*h*h
       end do
     end do
     u(1:m,jstart:jend)=un(1:m,jstart:jend) ! this is actually doubled do-loops.
 
     if (jstart<=m/2+1 .and. jend>=m/2+1) then         ! print out the temperature
       if (mod(n,100)==0) print *, n, u(m/2+1,m/2+1)   ! at the central grid point.
     end if
   end do
 
   deallocate(un,u)        ! or you can call deallocate for two times for un and u.
   call mpi_finalize(ierr)    
 end program heat2_wi_comments


** MPI通信の確認 (heat2_sendrecv_check.f90) [#i5bd3a90]

前回の演習で説明されたMPI_SENDRECVの挙動を改めて確認しよう。
そのためのソースコード（heat2_sendrecv_check.f90）を以下に用意した。
これは上記のheat2.f90からヤコビ法による熱伝導問題の解法部分を除き、
MPIの通信に関係する部分だけを抜き出したものである。
 !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 !
 ! when m = 7, nprocs = 3
 ! 
 !     |<-------------------- 1.0 -------------------->|      
 !     |                                               |
 !     |  h  |  h     h     h     h     h     h     h  |
 !     |<--->|<--->|<--->|<--->|<--->|<--->|<--->|<--->|
 !     |     |     |     |     |     |     |     |     |
 !     +-----+-----+-----+-----+-----+-----+-----+-----+
 !   j=0     1     2     3     4     5     6     7     8
 !     +-----+-----+-----+-----+-----+-----+-----+-----+
 !     |  jstart   |     |           |     |     |     | 
 !     |     |   jend    |           |   jstart  |     | 
 !     o==== rank=0 =====o           |     |    jend   |
 !                 |     |           |     |     |     |
 !                 |   jstart      jend    |     |     |
 !                 |     |           |     |     |     |
 !                 o======= rank=1 ========o     |     |
 !                                   |     |     |     | 
 !                                   o===== rank=2 ====o
 !
 !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 !
 ! Another example, when m=17, nprocs=6
 !
 !  j=0  1  2  3  4  5  6  7  8  9  10 11 12 13 14 15 16 17 18
 !    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
 !    |        |        |     |           |        |        |
 !    o==rank0=o        |     o===rank3===o        |        |
 !          |           |        |     |           |        |
 !          o===rank1===o        |     o===rank4===o        |
 !                   |           |              |           |
 !                   o===rank2===o              o===rank5===o
 ! each process      |           |
 ! has 2D array  u[i,jstart-1:jend+1] 
 !
 !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 !  
 program heat2_sendrecv_check
   use mpi
   implicit none
   integer, parameter :: m=17
   integer :: i,j,jstart,jend,n
   integer, parameter :: SP = kind(1.0)
   integer, parameter :: DP = selected_real_kind(2*precision(1.0_SP))
   real(DP), dimension(:,:), allocatable :: u, un
   integer ::  nprocs,myrank,ierr,left,right
   integer, dimension(MPI_STATUS_SIZE) :: istat
 
   if ( mod(m,2)==0 ) then
      print *, 'm must be odd to have a grid on the center.'
      stop
   end if
 
   call mpi_init(ierr)
   call mpi_comm_size(MPI_COMM_WORLD,nprocs,ierr)
   call mpi_comm_rank(MPI_COMM_WORLD,myrank,ierr)
 
   jstart = m*myrank/nprocs+1
   jend   = m*(myrank+1)/nprocs
 
   allocate( u(0:m+1,jstart-1:jend+1))
   allocate(un(    m,  jstart:jend  ))
 
   u(:,jstart-1:jend+1) = myrank
 
   call print('before',myrank, jstart, jend, u(m/2+1,:))
 
   left = myrank-1
   if ( myrank==0 )        left  = nprocs-1
   right = myrank+1
   if ( myrank==nprocs-1 ) right = 0
 
   !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   !
   !                  jend                          jstart      
   !   <(left)>        |                              |    <(right)>
   !      - --o--o--o--o--o                        o--o--o--o--o-- -
   !              send |  ^ recv                   ^  |
   !                   |  |                        |  |
   !                 [2]  [3]                    [1]  [4]
   !                   |  |                        |  |        
   !                   |  |                        |  |
   !              recv v  | send                   |  v
   !                   o--o--o--o-- - - --o--o--o--o--o
   !                      |                        |
   !                    jstart                    jend
   !                               <(myself)>
   !                     
   !- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
   call mpi_sendrecv(u(1,jend),    m,MPI_DOUBLE_PRECISION,right,100,   & ! [1]
                     u(1,jstart-1),m,MPI_DOUBLE_PRECISION, left,100,   & ! [2]
                     MPI_COMM_WORLD,istat,ierr)
   call mpi_sendrecv(u(1,jstart),m,MPI_DOUBLE_PRECISION, left,100,     & ! [3]
                     u(1,jend+1),m,MPI_DOUBLE_PRECISION,right,100,     & ! [4]
                     MPI_COMM_WORLD,istat,ierr)
 
 !   if (myrank==0)        u(1:m,  0) = 0.0_DP
 !   if (myrank==nprocs-1) u(1:m,m+1) = 0.0_DP
 
   call print(' after', myrank, jstart, jend, u(m/2+1,:))
 
   deallocate(u,un)
   call mpi_finalize(ierr)
 
 contains
 
   subroutine print(message,myrank,jstart,jend,u)
     character(len=*), intent(in)                     :: message
     integer,          intent(in)                     :: myrank
     integer,          intent(in)                     :: jstart
     integer,          intent(in)                     :: jend
     real(DP), dimension(jstart-1:jend+1), intent(in) :: u
 
     integer, dimension(0:m+1) :: iu          ! work array of integer
     integer                   :: rank, ierr
     character(len=100)        :: line        ! printed line on stdout
     character(len=20)         :: format      ! print format
     
     iu(:) = -1    ! initialization of the work integer array.
     iu(jstart-1:jend+1) = u(jstart-1:jend+1) ! copy the data (double to int).
 
     write(format,'(i3"(i3)")') m+2     ! you are not expected to 
     write(line,format) (iu(i),i=0,m+1) ! understand this trick.
 
     do rank = 0 , nprocs-1                   ! to avoid random printing
        call mpi_barrier(MPI_COMM_WORLD,ierr) ! by each process in random order,
        if ( rank==myrank ) then              ! we here put the loop and barrier.
           print *, message//':'//trim(line)
        end if
     end do
   end subroutine print
 
 end program heat2_sendrecv_check

このコード（heat2_sendrecv_check.f90）をscalarに投入するためのジョブスクリプトを
以下のようにすると、このコードのコメントにあるとおり、
グリッドサイズmが17で、プロセス数が6のジョブを投入できる。

* サンプルジョブスクリプト [#rfa41aaa]

 #PBS -l cputim_job=00:05:00
 #PBS -l memsz_job=2gb
 #PBS -l cpunum_job=1
 #PBS -T vltmpi
 #PBS -b 6
 #PBS -q PCL-A
 
 cd /home/users/yourID/YourDirectory
 mpirun_rsh -np 6 ${NQSII_MPIOPTS} ./a.out
 
これをたとえば heat2_sendrecv_check.jsという名前のファイルに保存して
 qsub heat2_sendrecv_check.js
とすればジョブが投入できる。

*  &color(#0000ff){【演習】}; [#n213fe1f] 
上のheat2_sendrecv_check.f90をマシン「scalar」で計算せよ。

** 結果例 [#y3738855]
 
 %NQSII(INFO): ------- Output from job:0000 -------
 [0]  before:  0  0  0  0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [0]   after:  5  0  0  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 %NQSII(INFO): ------- Output from job:0001 -------
 [1]  before: -1 -1  1  1  1  1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [1]   after: -1 -1  0  1  1  1  2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 %NQSII(INFO): ------- Output from job:0002 -------
 [2]  before: -1 -1 -1 -1 -1  2  2  2  2  2 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [2]   after: -1 -1 -1 -1 -1  1  2  2  2  3 -1 -1 -1 -1 -1 -1 -1 -1 -1
 %NQSII(INFO): ------- Output from job:0003 -------
 [3]  before: -1 -1 -1 -1 -1 -1 -1 -1  3  3  3  3  3 -1 -1 -1 -1 -1 -1
 [3]   after: -1 -1 -1 -1 -1 -1 -1 -1  2  3  3  3  4 -1 -1 -1 -1 -1 -1
 %NQSII(INFO): ------- Output from job:0004 -------
 [4]  before: -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  4  4  4  4  4 -1 -1 -1
 [4]   after: -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  3  4  4  4  5 -1 -1 -1
 %NQSII(INFO): ------- Output from job:0005 -------
 [5]  before: -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  5  5  5  5  5
 [5]   after: -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  4  5  5  5  0


* 可視化の必要性 [#re54a6a6]

* 可視化とは [#r232f3e1]
上の例では、MPIプロセスがもつ配列uの値をテキストデータとして書き出すことで、
プロセス間のMPI_SENDRECVによる通信の様子が分かりやすく表示された。
このように人間の視覚を使って情報を引き出す操作を一般に可視化という。

ここではスカラーデータの可視化のみ。
科学技術計算においては、可視化が様々な局面で重要な役割を果たす。
特に、計算結果として出力される数値データに潜む情報を引き出すための可視化―これをデータ可視化という―は、
不可欠と言ってよい。

データ可視化の身近な例は天気図であろう。

NKHラジオの「気象通報」という番組を知っているだろうか？
各地の風向、風力、気圧などの数字をアナウンサーが次々に読み上げていくラジオ番組である。 
これをただ耳で聞いているだけでは何もわからない。
しかしペンと地図を手にし、番組を聞きながら読み上げられる各地点に気圧などを書き込み（専用の地図を売っている）、
最後に同じ気圧になりそうな点を曲線（等高線）でつなげていくと、
低気圧や高気圧が図―天気図―としてあらわれる。
知識のある人が天気図を見れば、 明日の天気がだいたいわかる。

この天気図に入っている情報は、
もともとラジオのアナウンサーが読み上げる数値データに全て含まれているものである。
だが、そのような数値データの羅列をいくら注意深く聞いても、
あるいはそれを数表にして紙の上に書き、それをじっと睨んでみても、
明日の天気を知るのはほとんど不可能である。
紙の上に天気図という図にしてみてはじめてデータに隠されていた情報を引き出すことができる。
これが可視化である。


* 可視化の例 [#r232f3e1]

データの可視化には可視化の目的や、対象データの種類の応じて様々な方法が開発されている。
その中で、ここでは、スカラー場の可視化について簡単に説明する。

** 1次元グラフ [#s18621f8]

** 2次元等高線アルゴリズム [#c1baa40b]
xのある関数f(x)を直感的に理解するためには、グラフを書くのが一番である。
例えばxのx乗の関数、つまりf(x)=x^xはどんな形だろうか？
下のグラフを見ればわかる。

** 3次元等値面アルゴリズム [#ua86e1fc]
#ref(graph_1d.png)

** 2次元等高線 [#c1baa40b]

* コードのリファクタリング [#debf70a8]
xとyの関数、つまり2次元の関数f(x,y)の形を理解するには、等高線を描くのがよい。
地表面での大気の圧力pの分布p(x,y)の等高線は天気図でおなじみである。

今後の演習で行うコードの改訂
#ref(graph_2d.png)

- 1次元可視化機能の追加（中心点の温度の収束の様子を示す）
- 2次元可視化機能の追加（温度場全体の収束の様子をアニメーションで示す）
- 2次元領域分割による並列化
- 問題の3次元化
上の図では等高線の高さを色で表示してさらに分かりやすくしている。

ここで等高線の描画アルゴリズムを紹介しよう。
計算格子点上に定義されたデータから一本の等高線を描くには、下の図のように短い線分をつなげていけばよい。

#ref(contour_line01.png)

一つの線分は4つの計算格子点で定義された長方形領域（セル）の中で直線を描く。
例えばf(x,y)=1.0の値の等高線を描く場合を考えよう。
あるセルの4つの頂点におけるfの値が全て1.0よりも大きいか、
あるいは1.0未満であれば、f=1.0の等高線はこのセルを通らないのは明らかである。

セルを周囲の4つの辺、それぞれの両端の頂点でのfの値が1.0を「挟めば」その辺を等高線が通る。
辺上のどの位置を等高線が横切るかは、線形補間をすればよい。
下の図はちょうど中点を通る例である。

#ref(contour_line02.png)

このアルゴリズムはmarching squaresと呼ばれる。
等高線による可視化とは2次平面上に分布するスカラー場f(x,y)を
等高線という曲線を見ることで理解する方法である。


** 3次元等値面 [#ua43bab9]

等値面の「3次元版」も考えられる。
この場合は3次元空間中の曲面の形状を見ることで3次元空間に分布する関数f(x,y,z)の分布を理解する。
たとえば、f(x,y,z)がある値（例えば1.0）をとる点の集合は、
曲面の方程式 f(x,y,z)=1.0 で定義される。

下の図の白い矢印で示している物体は等値面の例である。
これは核融合実験装置（LHD）内部のプラズマの圧力分布の等値面である。

#ref(isosurface_sample.png)

このような等値面を描くためのアルゴリズムとしてMarching Cubesがある。

#ref(marching_cubes.png)

このMarching Cubesアルゴリズムは、
1985年に米国で特許となったが、20年を経過したので、現在では自由に利用できる。


* 熱伝導問題コードのリファクタリング [#debf70a8]

可視化アルゴリズムの紹介はこれくらいにとどめて、
これから前回の講義で学んだ2次元正方形領域での熱伝導問題を例にとり、
簡単な可視化を実践的に演習してみよう。
目標は、heat1.f90コードと、heat2.f90に

- 1次元可視化機能の追加（中心点の温度が収束する様子をグラフで示す）
- 2次元可視化機能の追加（温度場全体が収束していく様子をアニメーションで示す）

という可視化機能を追加することである。
ただし、グラフや等高線などの可視化にはここではgnuplotというフリーウェアを利用する。

また、これまで学んだMPIによる並列化は、
1方向のみの分割による並列化（1次元領域分割による並列化）だけであったが、
次回以降の演習で、もう少し実際的な並列化問題を扱う。
具体的には、

- 2次元熱伝導問題の2次元領域分割による並列化
- 熱伝導問題の3次元化

を行う予定である。

したがって、今後扱うコードは少しづつ複雑化していくので、
ここでまずはコードのリファクタリングをしておこう。

* thermal_diffusion.f90コード [#ib0a4848]
 !
 ! thermal_diffusion.f90
 !
 module constants
   implicit none
   integer, parameter :: SP = kind(1.0)
   integer, parameter :: DP = selected_real_kind(2*precision(1.0_SP))
   integer, parameter :: MESH_SIZE = 61
   integer, parameter :: LOOP_MAX = 10000
 end module constants
 
 module parallel
   use constants
   use mpi
   implicit none
   private
   public :: parallel__initialize,       &
             parallel__finalize,         &
             parallel__sendrecv,         &
             parallel__set_prof_1d,      &
             parallel__set_prof_2d,      &
             parallel__tellme
   
   integer :: nprocs
   integer :: myrank, left, right
   integer :: jstart, jend
 
 contains
 
   subroutine parallel__finalize
     integer :: ierr
     call mpi_finalize(ierr)
   end subroutine parallel__finalize
   
   subroutine parallel__initialize
     integer :: ierr
     call mpi_init(ierr)
     call mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr)
     call mpi_comm_rank(MPI_COMM_WORLD, myrank, ierr)
     left  = myrank - 1
     right = myrank + 1
     if ( myrank==0 )        left  = nprocs-1
     if ( myrank==nprocs-1 ) right = 0
     jstart = MESH_SIZE * myrank / nprocs + 1
     jend   = MESH_SIZE * (myrank+1) / nprocs
   end subroutine parallel__initialize
 
   subroutine parallel__sendrecv(send_vector,rank_target,        &
                                 recv_vector,rank_source)
     real(DP), dimension(:), intent(in)  :: send_vector
     integer,                intent(in)  :: rank_target
     real(DP), dimension(:), intent(out) :: recv_vector
     integer,                intent(in)  :: rank_source
 
     integer, dimension(MPI_STATUS_SIZE) :: istat
     integer :: tag_dummy = 100
     integer :: n, ierr
     n = size(send_vector,dim=1)
     call mpi_sendrecv(send_vector,              &
                       n,                        &
                       MPI_DOUBLE_PRECISION,     & 
                       rank_target,              &
                       tag_dummy,                &
                       recv_vector,              &
                       n,                        &
                       MPI_DOUBLE_PRECISION,     &
                       rank_source,              &
                       tag_dummy,                &
                       MPI_COMM_WORLD,           &
                       istat,                    &
                       ierr)
   end subroutine parallel__sendrecv
 
   function parallel__set_prof_1d(jstt_,jend_,myprof) result(prof_1d)
     integer,  intent(in)                         :: jstt_
     integer,  intent(in)                         :: jend_
     real(DP), dimension(jstt_:jend_), intent(in) :: myprof
     real(DP), dimension(0:MESH_SIZE+1)           :: prof_1d
 
     real(DP), dimension(0:MESH_SIZE+1) :: work
     integer                            :: ierr
 
     work(:) = 0.0_DP
     work(jstt_:jend_) = myprof(jstt_:jend_)
     call mpi_allreduce(work,                    &  ! source
                        prof_1d,                 &  ! target
                        MESH_SIZE+2,             &
                        MPI_DOUBLE_PRECISION,    &
                        MPI_SUM,                 &
                        MPI_COMM_WORLD,          &
                        ierr)
   end function parallel__set_prof_1d
 
   function parallel__set_prof_2d(jstt_,jend_,myprof) result(prof_2d)
     integer,  intent(in)                             :: jstt_
     integer,  intent(in)                             :: jend_
     real(DP), dimension(0:MESH_SIZE+1,jstt_:jend_),                    &
                                           intent(in) :: myprof
     real(DP), dimension(0:MESH_SIZE+1,0:MESH_SIZE+1) :: prof_2d
 
     real(DP), dimension(0:MESH_SIZE+1,0:MESH_SIZE+1) :: work
     integer :: ierr
     integer :: meshsize_p1_sq = (MESH_SIZE+1)**2
 
     work(:,:) = 0.0_DP
     work(:,jstt_:jend_) = myprof(:,jstt_:jend_)
     call mpi_allreduce(work(1,1),               &  ! source
                        prof_2d(1,1),            &  ! target
                        meshsize_p1_sq,          &
                        MPI_DOUBLE_PRECISION,    &
                        MPI_SUM,                 &
                        MPI_COMM_WORLD,          &
                        ierr)
   end function parallel__set_prof_2d
   
   function parallel__tellme(which) result(val)
     character(len=*), intent(in) :: which
     integer                      :: val
 
     select case (which)
       case  ('jend')
         val = jend
       case  ('jstart')
         val = jstart
       case  ('left')
         val = left
       case  ('right')
         val = right
       case  ('myrank')
         val = myrank
       case  ('nprocs')
         val = nprocs
     end select
   end function parallel__tellme
 
 end module parallel
 
 
 module temperature
   use constants
   use parallel
   implicit none
   private 
   public :: temperature__initialize,            &
             temperature__finalize,              &
             temperature__output_1d_profile,     &
             temperature__output_2d_profile,     &
             temperature__update
 
   real(DP), allocatable, dimension(:,:) :: temp
   real(DP), allocatable, dimension(:,:) :: work
   real(DP), parameter :: SIDE = 1.0_DP
   real(DP) :: h = SIDE / MESH_SIZE
   real(DP) :: heat
   integer  :: jstart, jend
   integer  :: myrank, right, left, nprocs
   
   !   
   !               |<----------- SIDE ---------->|
   !               |                             |
   !               |<-h->|                       |
   !               0     1     2    ...       MESH_SIZE+1
   !        jend+1 +-----+-----+-----+-----+-----+
   !               |     |     |     |     |     |
   !               |     |     |     |     |     |
   !          jend +-----+-----+-----+-----+-----+  
   !               |     |     |     |     |     |  
   !            .                                   
   !            .      temp(0:MESH_SIZE,0:jend+1)   
   !            .                                   
   !               |     |     |     |     |     |  
   !        jstart +-----+-----+-----+-----+-----+  
   !               |     |     |     |     |     |    
   !               |     |     |     |     |     |
   !      jstart-1 +-----+-----+-----+-----+-----+
   ! 
 
 contains
 
 !----------------
 !--- private  ---
 !----------------
 
   subroutine set_jstart_and_jend(jstart_, jend_)
     integer, intent(out) :: jstart_, jend_
     if ( myrank==0 ) then
        jstart_ = 0
        jend_   = jend
     else if ( myrank==nprocs-1 ) then
        jstart_ = jstart
        jend_   = MESH_SIZE+1
     else
        jstart_ = jstart
        jend_   = jend
     end if
   end subroutine set_jstart_and_jend
 
 !--------------
 !--- public ---
 !--------------
 
   subroutine temperature__initialize
     real(DP) :: heat_source = 4.0
     jstart = parallel__tellme('jstart')
     jend   = parallel__tellme('jend')
     myrank = parallel__tellme('myrank')
     right  = parallel__tellme('right')
     left   = parallel__tellme('left')
     nprocs = parallel__tellme('nprocs')
     allocate(temp(0:MESH_SIZE+1,jstart-1:jend+1))
     allocate(work(    MESH_SIZE,  jstart:jend)  )
     heat = (heat_source/4) * h * h
     temp(:,:) = 0.0_DP       ! initial condition
   end subroutine temperature__initialize
 
   subroutine temperature__finalize
     deallocate(work,temp)
   end subroutine temperature__finalize
 
   subroutine temperature__output_1d_profile
     real(DP), dimension(0:MESH_SIZE+1) :: prof
     integer                     :: counter = 0   ! saved
     integer                     :: ierr          ! use for MPI
     integer                     :: jstart_, jend_
     character(len=4)            :: serial_num    ! put on file name
     character(len=*), parameter :: base = "../data/temp.i=middle."
     integer, parameter          :: icut = MESH_SIZE/2+1 ! MESH_SIZE is odd
     !   nprocs = 2  
     !   MESH_SIZE = 9
     ! 
     !    0     1     2     3     4     5     6     7     8     9    10
     !    +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
     !    |<=====rank0=====>|     |<==rank1==>|     |<=====rank1=====>|
     !  jstart_ |         jend_   |           |     |           |     |
     !    |   jstart      jend  jstart_     jend_   |           |     |
     !    |     |           |   jstart      jend  jstart_       |   jend_  
     !    |     |           |     |           |   jstart      jend    |
     !    |     |           |     |           |     |           |     |
     !    +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
     !
     integer :: j
     call set_jstart_and_jend(jstart_,jend_)
     write(serial_num,'(i4.4)') counter
     prof(:) = parallel__set_prof_1d(jstart_,jend_,temp(icut,jstart_:jend_))
     if ( myrank==0 ) then
        open(10,file=base//serial_num)
        do j = 0 , MESH_SIZE+1
           write(10,*) j, prof(j)
        end do
        close(10)
     end if
     counter = counter + 1
   end subroutine temperature__output_1d_profile
 
   subroutine temperature__output_2d_profile
     real(DP), dimension(0:MESH_SIZE+1,    &
                         0:MESH_SIZE+1) :: prof
     integer                            :: counter = 0   ! saved
     integer                            :: ierr          ! use for MPI
     integer                            :: jstart_, jend_
     character(len=4)                   :: serial_num    ! put on file name
     character(len=*), parameter        :: base = "../data/temp.2d."
     !   nprocs = 2  
     !   MESH_SIZE = 9
     !    0     1     2     3     4     5     6     7     8     9    10
     !    +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
     !    |<=====rank0=====>|     |<==rank1==>|     |<=====rank1=====>|
     !  jstart_ |         jend_   |           |     |           |     |
     !    |   jstart      jend  jstart_     jend_   |           |     |
     !    |     |           |   jstart      jend  jstart_       |   jend_  
     !    |     |           |     |           |   jstart      jend    |
     !    |     |           |     |           |     |           |     |
     !    +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
     !
     integer :: i, j
     call set_jstart_and_jend(jstart_,jend_)
     write(serial_num,'(i4.4)') counter
     prof(:,:) = parallel__set_prof_2d(jstart_,jend_,temp(:,jstart_:jend_))
     if ( myrank==0 ) then
        open(10,file=base//serial_num)
        do j = 0 , MESH_SIZE+1
           do i = 0 , MESH_SIZE+1
              write(10,*) i, j, prof(i,j)
           end do
           write(10,*)' ' ! gnuplot requires a blank line here.
        end do
        close(10)
     end if
     counter = counter + 1
   end subroutine temperature__output_2d_profile
 
   subroutine temperature__update
     integer :: i, j
     call parallel__sendrecv(temp(1:MESH_SIZE,jend),     right,   &
                             temp(1:MESH_SIZE,jstart-1), left  )
     call parallel__sendrecv(temp(1:MESH_SIZE,jstart),   left,    &
                             temp(1:MESH_SIZE,jend+1),   right )
     if ( myrank==0 )        temp(1:MESH_SIZE,0          ) = 0.0_DP
     if ( myrank==nprocs-1 ) temp(1:MESH_SIZE,MESH_SIZE+1) = 0.0_DP
     !
     ! Time development form of the thermal diffusion equation is
     !    \partial T(x,y,t) / \partial t = \nabla^2 T(x,y,t) + heat_source.
     ! In the stationary state, 
     !    \nabla^2 T(x,y) + heat_source = 0.
     ! The finite difference method with grid spacings dx and dy leads to
     !    (T(i+1,j)-2*T(i,j)+T(i-1,j))/dx^2 
     !  + (T(i,j+1)-2*T(i,j)+T(i,j-1))/dy^2 + heat_source = 0.
     ! When dx=dy=h,
     !    T(i,j) = (T(i+1,j)+T(i-1,j)+T(i,j+1)+T(i,j-1))/4+heat_source*h^2/4.
     ! This suggests a relaxation method by the following 
     ! update procedure called Jacobi method;
     do j = jstart, jend
       do i = 1, MESH_SIZE
 !       work(i,j) = (temp(i-1,j)+temp(i+1,j)+temp(i,j-1)+temp(i,j+1))/4.0_DP   &
 !                 + heat_source*h*h   ! Operation time is saved in below.
         work(i,j) = (temp(i-1,j)+temp(i+1,j)+temp(i,j-1)+temp(i,j+1))*0.25_DP  &
                   + heat
       end do
     end do
     temp(1:MESH_SIZE,jstart:jend) = work(1:MESH_SIZE,jstart:jend)
   end subroutine temperature__update
 
 end module temperature
 
 
 program thermal_diffusion
   use constants
   use parallel
   use temperature
   implicit none
 
   integer :: loop
 
   call check
   call parallel__initialize
   call temperature__initialize
   call temperature__output_1d_profile
   call temperature__output_2d_profile
   do loop = 1 , LOOP_MAX
      call temperature__update
      if ( mod(loop,100)==0 ) call temperature__output_1d_profile
      if ( mod(loop,100)==0 ) call temperature__output_2d_profile
   end do
   call temperature__finalize
   call parallel__finalize
 
 contains
 
   subroutine check
     if ( mod(MESH_SIZE,2)==0 ) then
        print *, 'MESH_SIZE must be odd to have a grid on the center.'
        stop
     end if
   end subroutine check
 end program thermal_diffusion


* gnuplot入門 [#j704bdd3]

* 1次元可視化（中心点の温度の収束の様子を示す） [#y4f94ff6]


*  &color(#0000ff){【演習】}; [#n213fe1f]

* 2次元可視化（温度場全体の収束の様子のアニメーション） [#ha0ffd14]


*  &color(#0000ff){【演習】}; [#ze4352d6]

 

// * 授業アンケート [#p2e645d4]
* 授業アンケート [#p2e645d4]
// 今回の演習内容はどうでしたか？
//#vote(簡単すぎた[0], 難しすぎた[0], ちょうどよかった[4])

// コメント欄
// #pcomment


------------------------------

as of &_now; (&counter;)
6.データ可視化のバックアップ差分(No.3)

演習について

計算科学演習A1

計算科学演習A2

πコンピュータ利用ガイド

このサイトについて

お問い合わせ

サイト内検索

6.データ可視化 のバックアップ差分(No.3)

演習について

計算科学演習A1

計算科学演習A2

πコンピュータ利用ガイド

このサイトについて

お問い合わせ

サイト内検索

6.データ可視化のバックアップ差分(No.3)