[Q-e-commits] espresso/Modules fft_base.f90, 1.46, 1.47 fft_parallel.f90, 1.8, 1.9 fft_types.f90, 1.18, 1.19 mp_global.f90, 1.32, 1.33 task_groups.f90, 1.22, 1.23

ccavazzoni at qe-forge.org ccavazzoni at qe-forge.org
Fri May 29 17:48:42 CEST 2009


Update of /cvsroot/q-e/espresso/Modules
In directory qeforge.qe-forge.org:/tmp/cvs-serv6951

Modified Files:
	fft_base.f90 fft_parallel.f90 fft_types.f90 mp_global.f90 
	task_groups.f90 
Log Message:
- Some update in task groups stuff


Index: fft_base.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/fft_base.f90,v
retrieving revision 1.46
retrieving revision 1.47
diff -u -d -r1.46 -r1.47
--- fft_base.f90	23 May 2009 16:23:17 -0000	1.46
+++ fft_base.f90	29 May 2009 15:48:39 -0000	1.47
@@ -88,7 +88,7 @@
   USE parallel_include
 #endif
   use mp_global,   ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
-                          my_image_id, nogrp, pgrp_comm, nplist
+                          my_image_id, nogrp, pgrp_comm, nplist, me_pgrp, npgrp
   USE kinds,       ONLY : DP
 
   implicit none
@@ -99,7 +99,7 @@
 
 #ifdef __PARA
 
-  INTEGER :: dest, from, k, ip, proc, ierr, me, me_pgrp, nprocp, gproc, gcomm, i, kdest, kfrom
+  INTEGER :: dest, from, k, ip, proc, ierr, me, ipoffset, nprocp, gproc, gcomm, i, kdest, kfrom
   INTEGER :: sendcount(nproc_pool), sdispls(nproc_pool), recvcount(nproc_pool), rdispls(nproc_pool)
   INTEGER :: offset(nproc_pool)
   INTEGER :: sh(nproc_pool), rh(nproc_pool)
@@ -123,13 +123,13 @@
   !
   IF( use_tg_ ) THEN
     !  This is the number of procs. in the plane-wave group
-     nprocp = nproc_pool / nogrp 
-     CALL mpi_comm_rank( pgrp_comm, me_pgrp, ierr )
-     gcomm = pgrp_comm
+     nprocp   = npgrp 
+     ipoffset = me_pgrp
+     gcomm    = pgrp_comm
   ELSE
-     nprocp = nproc_pool
-     me_pgrp = me_pool
-     gcomm = intra_pool_comm
+     nprocp   = nproc_pool
+     ipoffset = me_pool
+     gcomm    = intra_pool_comm
   END IF
   !
   if ( nprocp == 1 ) return
@@ -178,7 +178,7 @@
      !
      ! "forward" scatter from columns to planes
      !
-     ! step one: store contiguously the slices
+     ! step one: store contiguously the slices and send
      !
      do ip = 1, nprocp
 
@@ -186,7 +186,7 @@
         ! proc in order to avoid that all procs send a msg at the same proc
         ! at the same time.
         !
-        proc = me_pgrp + 1 + ip
+        proc = ipoffset + 1 + ip
         IF( proc > nprocp ) proc = proc - nprocp
 
         gproc  = proc
@@ -208,12 +208,14 @@
               f_aux ( dest + (k - 1) * 2 - 1 + 2 ) =  f_in ( from + (k - 1) * nrx3 - 1 + 2 )
            enddo
         CASE ( 3 )
+!$omp parallel do
            do k = 1, ncp_ (me)
               f_aux ( dest + (k - 1) * 3 - 1 + 1 ) =  f_in ( from + (k - 1) * nrx3 - 1 + 1 )
               f_aux ( dest + (k - 1) * 3 - 1 + 2 ) =  f_in ( from + (k - 1) * nrx3 - 1 + 2 )
               f_aux ( dest + (k - 1) * 3 - 1 + 3 ) =  f_in ( from + (k - 1) * nrx3 - 1 + 3 )
            enddo
         CASE ( 4 )
+!$omp parallel do
            do k = 1, ncp_ (me)
               f_aux ( dest + (k - 1) * 4 - 1 + 1 ) =  f_in ( from + (k - 1) * nrx3 - 1 + 1 )
               f_aux ( dest + (k - 1) * 4 - 1 + 2 ) =  f_in ( from + (k - 1) * nrx3 - 1 + 2 )
@@ -241,11 +243,11 @@
         !
      end do
      !
-     ! step two: communication
+     ! step two: receive
      !
      do ip = 1, nprocp
         !
-        proc = me_pgrp + 1 - ip
+        proc = ipoffset + 1 - ip
         IF( proc < 1 ) proc = proc + nprocp
         !
         ! now post the receive 
@@ -297,7 +299,7 @@
 
         !  post the non blocking send
 
-        proc = me_pgrp + 1 + ip
+        proc = ipoffset + 1 + ip
         IF( proc > nprocp ) proc = proc - nprocp
 
         call mpi_isend( f_in( rdispls( proc ) + 1 ), recvcount( proc ), MPI_DOUBLE_COMPLEX, &
@@ -306,7 +308,7 @@
 
         !  post the non blocking receive
 
-        proc = me_pgrp + 1 - ip
+        proc = ipoffset + 1 - ip
         IF( proc < 1 ) proc = proc + nprocp
 
         CALL mpi_irecv( f_aux( sdispls( proc ) + 1 ), sendcount( proc ), MPI_DOUBLE_COMPLEX, &
@@ -372,12 +374,14 @@
                        f_in ( dest + (k - 1) * nrx3 - 1 + 2 ) = f_aux( from + (k - 1) * 2 - 1 + 2 )
                     enddo
                  CASE ( 3 )
+!$omp parallel do
                     do k = 1, ncp_ ( me )
                        f_in ( dest + (k - 1) * nrx3 - 1 + 1 ) = f_aux( from + (k - 1) * 3 - 1 + 1 )
                        f_in ( dest + (k - 1) * nrx3 - 1 + 2 ) = f_aux( from + (k - 1) * 3 - 1 + 2 )
                        f_in ( dest + (k - 1) * nrx3 - 1 + 3 ) = f_aux( from + (k - 1) * 3 - 1 + 3 )
                     enddo
                  CASE ( 4 )
+!$omp parallel do
                     do k = 1, ncp_ ( me )
                        f_in ( dest + (k - 1) * nrx3 - 1 + 1 ) = f_aux( from + (k - 1) * 4 - 1 + 1 )
                        f_in ( dest + (k - 1) * nrx3 - 1 + 2 ) = f_aux( from + (k - 1) * 4 - 1 + 2 )

Index: fft_parallel.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/fft_parallel.f90,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- fft_parallel.f90	24 May 2009 17:28:34 -0000	1.8
+++ fft_parallel.f90	29 May 2009 15:48:39 -0000	1.9
@@ -208,29 +208,14 @@
 
   SUBROUTINE pack_group_sticks()
 
-     INTEGER                     :: idx, ierr
-     INTEGER, DIMENSION(nogrp+1) :: send_cnt, send_displ, recv_cnt, recv_displ
+     INTEGER                     :: ierr
      !
      if( .NOT. use_tg ) return
      !
-     send_cnt(1)   = nx3 * dfft%nsw( me_p )
-     IF( nx3 * dfft%nsw( me_p ) > dfft%nnrx ) THEN
-        CALL errore( ' tg_cfft ', ' inconsistent dfft%nnrx ', 1 )
-     END IF
-     send_displ(1) = 0
-     recv_cnt(1)   = nx3 * dfft%nsw( nolist(1) + 1 )
-     recv_displ(1) = 0
-     DO idx = 2, nogrp
-        send_cnt(idx)   = nx3 * dfft%nsw( me_p )
-        send_displ(idx) = send_displ(idx-1) + dfft%nnrx
-        recv_cnt(idx)   = nx3 * dfft%nsw( nolist(idx) + 1 )
-        recv_displ(idx) = recv_displ(idx-1) + recv_cnt(idx-1)
-     ENDDO
-
-     IF( recv_displ(nogrp) + recv_cnt(nogrp) > SIZE( yf ) ) THEN
+     IF( dfft%tg_rdsp(nogrp) + dfft%tg_rcv(nogrp) > SIZE( yf ) ) THEN
         CALL errore( ' tg_cfft ', ' inconsistent size ', 1 )
      END IF
-     IF( send_displ(nogrp) + send_cnt(nogrp) > SIZE( f ) ) THEN
+     IF( dfft%tg_psdsp(nogrp) + dfft%tg_snd(nogrp) > SIZE( f ) ) THEN
         CALL errore( ' tg_cfft ', ' inconsistent size ', 2 )
      END IF
 
@@ -241,8 +226,8 @@
 
 #if defined __MPI
 
-     CALL MPI_ALLTOALLV( f(1), send_cnt, send_displ, MPI_DOUBLE_COMPLEX, yf(1), recv_cnt, &
-      &                     recv_displ, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
+     CALL MPI_ALLTOALLV( f(1), dfft%tg_snd, dfft%tg_psdsp, MPI_DOUBLE_COMPLEX, yf(1), dfft%tg_rcv, &
+      &                     dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
      IF( ierr /= 0 ) THEN
         CALL errore( ' tg_cfft ', ' alltoall error 1 ', ABS(ierr) )
      END IF
@@ -262,26 +247,14 @@
      !
      !  Bring pencils back to their original distribution
      !
-     INTEGER                     :: idx, ierr
-     INTEGER, DIMENSION(nogrp+1) :: send_cnt, send_displ, recv_cnt, recv_displ
+     INTEGER                     :: ierr
      !
      if( .NOT. use_tg ) return
      !
-     send_cnt  (1) = nx3 * dfft%nsw( nolist(1) + 1 )
-     send_displ(1) = 0
-     recv_cnt  (1) = nx3 * dfft%nsw( me_p )
-     recv_displ(1) = 0
-     DO idx = 2, NOGRP
-        send_cnt  (idx) = nx3 * dfft%nsw( nolist(idx) + 1 )
-        send_displ(idx) = send_displ(idx-1) + send_cnt(idx-1)
-        recv_cnt  (idx) = nx3 * dfft%nsw( me_p )
-        recv_displ(idx) = recv_displ(idx-1) + recv_cnt(idx-1)
-     ENDDO
-
-     IF( recv_displ(nogrp) + recv_cnt(nogrp) > SIZE( f ) ) THEN
+     IF( dfft%tg_usdsp(nogrp) + dfft%tg_snd(nogrp) > SIZE( f ) ) THEN
         CALL errore( ' tg_cfft ', ' inconsistent size ', 3 )
      END IF
-     IF( send_displ(nogrp) + send_cnt(nogrp) > SIZE( yf ) ) THEN
+     IF( dfft%tg_rdsp(nogrp) + dfft%tg_rcv(nogrp) > SIZE( yf ) ) THEN
         CALL errore( ' tg_cfft ', ' inconsistent size ', 4 )
      END IF
 
@@ -289,8 +262,8 @@
 
 #if defined __MPI
      CALL MPI_Alltoallv( yf(1), &
-          send_cnt, send_displ, MPI_DOUBLE_COMPLEX, f(1), &
-          recv_cnt, recv_displ, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
+          dfft%tg_rcv, dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, f(1), &
+          dfft%tg_snd, dfft%tg_usdsp, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
      IF( ierr /= 0 ) THEN
         CALL errore( ' tg_cfft ', ' alltoall error 2 ', ABS(ierr) )
      END IF

Index: fft_types.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/fft_types.f90,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -d -r1.18 -r1.19
--- fft_types.f90	29 Dec 2007 10:09:28 -0000	1.18
+++ fft_types.f90	29 May 2009 15:48:39 -0000	1.19
@@ -59,6 +59,11 @@
     INTEGER :: nnrx               ! maximum among nnr
     INTEGER, POINTER :: tg_nsw(:) ! number of sticks per task group ( wave func )
     INTEGER, POINTER :: tg_npp(:) ! number of "Z" planes per task group
+    INTEGER, POINTER :: tg_snd(:) ! number of element to be sent in group redist
+    INTEGER, POINTER :: tg_rcv(:) ! number of element to be received in group redist
+    INTEGER, POINTER :: tg_psdsp(:)! send displacement for all to all (pack)
+    INTEGER, POINTER :: tg_usdsp(:)! send displacement for all to all (unpack)
+    INTEGER, POINTER :: tg_rdsp(:)! receive displacement for all to all
     !
   END TYPE
 
@@ -100,6 +105,11 @@
     desc%have_task_groups = .FALSE.
     NULLIFY( desc%tg_nsw )
     NULLIFY( desc%tg_npp )
+    NULLIFY( desc%tg_snd )
+    NULLIFY( desc%tg_rcv )
+    NULLIFY( desc%tg_psdsp )
+    NULLIFY( desc%tg_usdsp )
+    NULLIFY( desc%tg_rdsp )
 
   END SUBROUTINE fft_dlay_allocate
 
@@ -121,6 +131,11 @@
     IF( desc%have_task_groups ) THEN
        IF ( ASSOCIATED( desc%tg_nsw ) )   DEALLOCATE( desc%tg_nsw )
        IF ( ASSOCIATED( desc%tg_npp ) )   DEALLOCATE( desc%tg_npp )
+       IF ( ASSOCIATED( desc%tg_snd ) )   DEALLOCATE( desc%tg_snd )
+       IF ( ASSOCIATED( desc%tg_rcv ) )   DEALLOCATE( desc%tg_rcv )
+       IF ( ASSOCIATED( desc%tg_psdsp ) )   DEALLOCATE( desc%tg_psdsp )
+       IF ( ASSOCIATED( desc%tg_usdsp ) )   DEALLOCATE( desc%tg_usdsp )
+       IF ( ASSOCIATED( desc%tg_rdsp ) )   DEALLOCATE( desc%tg_rdsp )
     END IF
     desc%have_task_groups = .FALSE.
   END SUBROUTINE fft_dlay_deallocate

Index: mp_global.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/mp_global.f90,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -d -r1.32 -r1.33
--- mp_global.f90	25 Feb 2009 16:50:31 -0000	1.32
+++ mp_global.f90	29 May 2009 15:48:39 -0000	1.33
@@ -43,6 +43,7 @@
   INTEGER :: my_image_id = 0  ! index of my image
   INTEGER :: me_ortho(2) = 0  ! coordinates of the processors
   INTEGER :: me_ortho1   = 0  ! task id for the ortho group
+  INTEGER :: me_pgrp     = 0  ! task id for plane wave task group
   !
   INTEGER :: npool       = 1  ! number of "k-points"-pools
   INTEGER :: nimage      = 1  ! number of "path-images"-pools
@@ -93,6 +94,7 @@
        my_image_id      = 0
        me_pool          = mpime
        me_image         = mpime
+       me_pgrp          = me_pool
        root_pool        = root
        root_image       = root
        inter_pool_comm  = group_i
@@ -325,6 +327,7 @@
         IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' pgrp_comm rank ', itsk )
      END IF
   END DO
+  me_pgrp = itsk
 #endif
 
   

Index: task_groups.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/task_groups.f90,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -d -r1.22 -r1.23
--- task_groups.f90	2 Jan 2008 17:59:28 -0000	1.22
+++ task_groups.f90	29 May 2009 15:48:39 -0000	1.23
@@ -107,6 +107,49 @@
    dffts%tg_npp(1) = num_planes
 #endif
 
+   ALLOCATE( dffts%tg_snd( nogrp ) )
+   ALLOCATE( dffts%tg_rcv( nogrp ) )
+   ALLOCATE( dffts%tg_psdsp( nogrp ) )
+   ALLOCATE( dffts%tg_usdsp( nogrp ) )
+   ALLOCATE( dffts%tg_rdsp( nogrp ) )
+
+   dffts%tg_snd(1)   = dffts%nr3x * dffts%nsw( me_pool + 1 )
+   IF( dffts%nr3x * dffts%nsw( me_pool + 1 ) > dffts%nnrx ) THEN
+      CALL errore( ' task_groups_init ', ' inconsistent dffts%nnrx ', 1 )
+   END IF
+   dffts%tg_psdsp(1) = 0
+   dffts%tg_usdsp(1) = 0
+   dffts%tg_rcv(1)  = dffts%nr3x * dffts%nsw( nolist(1) + 1 )
+   dffts%tg_rdsp(1) = 0
+   DO i = 2, nogrp
+      dffts%tg_snd(i)  = dffts%nr3x * dffts%nsw( me_pool + 1 )
+      dffts%tg_psdsp(i) = dffts%tg_psdsp(i-1) + dffts%nnrx
+      dffts%tg_usdsp(i) = dffts%tg_usdsp(i-1) + dffts%tg_snd(i-1)
+      dffts%tg_rcv(i)  = dffts%nr3x * dffts%nsw( nolist(i) + 1 )
+      dffts%tg_rdsp(i) = dffts%tg_rdsp(i-1) + dffts%tg_rcv(i-1)
+   ENDDO
+  
+   ! ALLOCATE( dffts%tg_sca_snd( nproc_pool / nogrp )  )
+   ! ALLOCATE( dffts%tg_sca_rcv( nproc_pool / nogrp )  )
+   ! ALLOCATE( dffts%tg_sca_sdsp( nproc_pool / nogrp )  )
+   ! ALLOCATE( dffts%tg_sca_rdsp( nproc_pool / nogrp )  )
+   ! ALLOCATE( dffts%tg_sca_off( nproc_pool / nogrp )  )
+
+   ! do i = 1, nproc_pool / nogrp
+   !    dffts%tg_sca_snd (i) = dffts%tg_npp ( nplist( i ) + 1 ) * dffts%tg_nsw ( me_pool + 1 )
+   !    dffts%tg_sca_rcv (i) = dffts%tg_npp ( me_pool + 1 )     * dffts%tg_nsw ( nplist( i ) + 1 )
+   ! end do
+   ! dffts%tg_sca_off(1) = 0
+   ! do i = 2, nproc_pool / nogrp
+   !    dffts%tg_sca_off(i) = dffts%tg_sca_off(i - 1) + dffts%tg_npp ( nplist( i - 1 ) + 1 )
+   ! end do
+   ! dffts%tg_sca_sdsp (1) = 0
+   ! dffts%tg_sca_rdsp (1) = 0
+   ! do i = 2, nproc_pool / nogrp
+   !    dffts%tg_sca_sdsp (i) = dffts%tg_sca_sdsp (i - 1) + dffts%tg_sca_snd (i - 1)
+   !    dffts%tg_sca_rdsp (i) = dffts%tg_sca_rdsp (i - 1) + dffts%tg_sca_rcv (i - 1)
+   ! enddo
+
    dffts%have_task_groups = .TRUE.
 
    RETURN



More information about the Q-e-commits mailing list