[Q-e-commits] espresso/Modules fft_base.f90, 1.46, 1.47 fft_parallel.f90, 1.8, 1.9 fft_types.f90, 1.18, 1.19 mp_global.f90, 1.32, 1.33 task_groups.f90, 1.22, 1.23
ccavazzoni at qe-forge.org
ccavazzoni at qe-forge.org
Fri May 29 17:48:42 CEST 2009
Update of /cvsroot/q-e/espresso/Modules
In directory qeforge.qe-forge.org:/tmp/cvs-serv6951
Modified Files:
fft_base.f90 fft_parallel.f90 fft_types.f90 mp_global.f90
task_groups.f90
Log Message:
- Some update in task groups stuff
Index: fft_base.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/fft_base.f90,v
retrieving revision 1.46
retrieving revision 1.47
diff -u -d -r1.46 -r1.47
--- fft_base.f90 23 May 2009 16:23:17 -0000 1.46
+++ fft_base.f90 29 May 2009 15:48:39 -0000 1.47
@@ -88,7 +88,7 @@
USE parallel_include
#endif
use mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
- my_image_id, nogrp, pgrp_comm, nplist
+ my_image_id, nogrp, pgrp_comm, nplist, me_pgrp, npgrp
USE kinds, ONLY : DP
implicit none
@@ -99,7 +99,7 @@
#ifdef __PARA
- INTEGER :: dest, from, k, ip, proc, ierr, me, me_pgrp, nprocp, gproc, gcomm, i, kdest, kfrom
+ INTEGER :: dest, from, k, ip, proc, ierr, me, ipoffset, nprocp, gproc, gcomm, i, kdest, kfrom
INTEGER :: sendcount(nproc_pool), sdispls(nproc_pool), recvcount(nproc_pool), rdispls(nproc_pool)
INTEGER :: offset(nproc_pool)
INTEGER :: sh(nproc_pool), rh(nproc_pool)
@@ -123,13 +123,13 @@
!
IF( use_tg_ ) THEN
! This is the number of procs. in the plane-wave group
- nprocp = nproc_pool / nogrp
- CALL mpi_comm_rank( pgrp_comm, me_pgrp, ierr )
- gcomm = pgrp_comm
+ nprocp = npgrp
+ ipoffset = me_pgrp
+ gcomm = pgrp_comm
ELSE
- nprocp = nproc_pool
- me_pgrp = me_pool
- gcomm = intra_pool_comm
+ nprocp = nproc_pool
+ ipoffset = me_pool
+ gcomm = intra_pool_comm
END IF
!
if ( nprocp == 1 ) return
@@ -178,7 +178,7 @@
!
! "forward" scatter from columns to planes
!
- ! step one: store contiguously the slices
+ ! step one: store contiguously the slices and send
!
do ip = 1, nprocp
@@ -186,7 +186,7 @@
! proc in order to avoid that all procs send a msg at the same proc
! at the same time.
!
- proc = me_pgrp + 1 + ip
+ proc = ipoffset + 1 + ip
IF( proc > nprocp ) proc = proc - nprocp
gproc = proc
@@ -208,12 +208,14 @@
f_aux ( dest + (k - 1) * 2 - 1 + 2 ) = f_in ( from + (k - 1) * nrx3 - 1 + 2 )
enddo
CASE ( 3 )
+!$omp parallel do
do k = 1, ncp_ (me)
f_aux ( dest + (k - 1) * 3 - 1 + 1 ) = f_in ( from + (k - 1) * nrx3 - 1 + 1 )
f_aux ( dest + (k - 1) * 3 - 1 + 2 ) = f_in ( from + (k - 1) * nrx3 - 1 + 2 )
f_aux ( dest + (k - 1) * 3 - 1 + 3 ) = f_in ( from + (k - 1) * nrx3 - 1 + 3 )
enddo
CASE ( 4 )
+!$omp parallel do
do k = 1, ncp_ (me)
f_aux ( dest + (k - 1) * 4 - 1 + 1 ) = f_in ( from + (k - 1) * nrx3 - 1 + 1 )
f_aux ( dest + (k - 1) * 4 - 1 + 2 ) = f_in ( from + (k - 1) * nrx3 - 1 + 2 )
@@ -241,11 +243,11 @@
!
end do
!
- ! step two: communication
+ ! step two: receive
!
do ip = 1, nprocp
!
- proc = me_pgrp + 1 - ip
+ proc = ipoffset + 1 - ip
IF( proc < 1 ) proc = proc + nprocp
!
! now post the receive
@@ -297,7 +299,7 @@
! post the non blocking send
- proc = me_pgrp + 1 + ip
+ proc = ipoffset + 1 + ip
IF( proc > nprocp ) proc = proc - nprocp
call mpi_isend( f_in( rdispls( proc ) + 1 ), recvcount( proc ), MPI_DOUBLE_COMPLEX, &
@@ -306,7 +308,7 @@
! post the non blocking receive
- proc = me_pgrp + 1 - ip
+ proc = ipoffset + 1 - ip
IF( proc < 1 ) proc = proc + nprocp
CALL mpi_irecv( f_aux( sdispls( proc ) + 1 ), sendcount( proc ), MPI_DOUBLE_COMPLEX, &
@@ -372,12 +374,14 @@
f_in ( dest + (k - 1) * nrx3 - 1 + 2 ) = f_aux( from + (k - 1) * 2 - 1 + 2 )
enddo
CASE ( 3 )
+!$omp parallel do
do k = 1, ncp_ ( me )
f_in ( dest + (k - 1) * nrx3 - 1 + 1 ) = f_aux( from + (k - 1) * 3 - 1 + 1 )
f_in ( dest + (k - 1) * nrx3 - 1 + 2 ) = f_aux( from + (k - 1) * 3 - 1 + 2 )
f_in ( dest + (k - 1) * nrx3 - 1 + 3 ) = f_aux( from + (k - 1) * 3 - 1 + 3 )
enddo
CASE ( 4 )
+!$omp parallel do
do k = 1, ncp_ ( me )
f_in ( dest + (k - 1) * nrx3 - 1 + 1 ) = f_aux( from + (k - 1) * 4 - 1 + 1 )
f_in ( dest + (k - 1) * nrx3 - 1 + 2 ) = f_aux( from + (k - 1) * 4 - 1 + 2 )
Index: fft_parallel.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/fft_parallel.f90,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- fft_parallel.f90 24 May 2009 17:28:34 -0000 1.8
+++ fft_parallel.f90 29 May 2009 15:48:39 -0000 1.9
@@ -208,29 +208,14 @@
SUBROUTINE pack_group_sticks()
- INTEGER :: idx, ierr
- INTEGER, DIMENSION(nogrp+1) :: send_cnt, send_displ, recv_cnt, recv_displ
+ INTEGER :: ierr
!
if( .NOT. use_tg ) return
!
- send_cnt(1) = nx3 * dfft%nsw( me_p )
- IF( nx3 * dfft%nsw( me_p ) > dfft%nnrx ) THEN
- CALL errore( ' tg_cfft ', ' inconsistent dfft%nnrx ', 1 )
- END IF
- send_displ(1) = 0
- recv_cnt(1) = nx3 * dfft%nsw( nolist(1) + 1 )
- recv_displ(1) = 0
- DO idx = 2, nogrp
- send_cnt(idx) = nx3 * dfft%nsw( me_p )
- send_displ(idx) = send_displ(idx-1) + dfft%nnrx
- recv_cnt(idx) = nx3 * dfft%nsw( nolist(idx) + 1 )
- recv_displ(idx) = recv_displ(idx-1) + recv_cnt(idx-1)
- ENDDO
-
- IF( recv_displ(nogrp) + recv_cnt(nogrp) > SIZE( yf ) ) THEN
+ IF( dfft%tg_rdsp(nogrp) + dfft%tg_rcv(nogrp) > SIZE( yf ) ) THEN
CALL errore( ' tg_cfft ', ' inconsistent size ', 1 )
END IF
- IF( send_displ(nogrp) + send_cnt(nogrp) > SIZE( f ) ) THEN
+ IF( dfft%tg_psdsp(nogrp) + dfft%tg_snd(nogrp) > SIZE( f ) ) THEN
CALL errore( ' tg_cfft ', ' inconsistent size ', 2 )
END IF
@@ -241,8 +226,8 @@
#if defined __MPI
- CALL MPI_ALLTOALLV( f(1), send_cnt, send_displ, MPI_DOUBLE_COMPLEX, yf(1), recv_cnt, &
- & recv_displ, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
+ CALL MPI_ALLTOALLV( f(1), dfft%tg_snd, dfft%tg_psdsp, MPI_DOUBLE_COMPLEX, yf(1), dfft%tg_rcv, &
+ & dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
IF( ierr /= 0 ) THEN
CALL errore( ' tg_cfft ', ' alltoall error 1 ', ABS(ierr) )
END IF
@@ -262,26 +247,14 @@
!
! Bring pencils back to their original distribution
!
- INTEGER :: idx, ierr
- INTEGER, DIMENSION(nogrp+1) :: send_cnt, send_displ, recv_cnt, recv_displ
+ INTEGER :: ierr
!
if( .NOT. use_tg ) return
!
- send_cnt (1) = nx3 * dfft%nsw( nolist(1) + 1 )
- send_displ(1) = 0
- recv_cnt (1) = nx3 * dfft%nsw( me_p )
- recv_displ(1) = 0
- DO idx = 2, NOGRP
- send_cnt (idx) = nx3 * dfft%nsw( nolist(idx) + 1 )
- send_displ(idx) = send_displ(idx-1) + send_cnt(idx-1)
- recv_cnt (idx) = nx3 * dfft%nsw( me_p )
- recv_displ(idx) = recv_displ(idx-1) + recv_cnt(idx-1)
- ENDDO
-
- IF( recv_displ(nogrp) + recv_cnt(nogrp) > SIZE( f ) ) THEN
+ IF( dfft%tg_usdsp(nogrp) + dfft%tg_snd(nogrp) > SIZE( f ) ) THEN
CALL errore( ' tg_cfft ', ' inconsistent size ', 3 )
END IF
- IF( send_displ(nogrp) + send_cnt(nogrp) > SIZE( yf ) ) THEN
+ IF( dfft%tg_rdsp(nogrp) + dfft%tg_rcv(nogrp) > SIZE( yf ) ) THEN
CALL errore( ' tg_cfft ', ' inconsistent size ', 4 )
END IF
@@ -289,8 +262,8 @@
#if defined __MPI
CALL MPI_Alltoallv( yf(1), &
- send_cnt, send_displ, MPI_DOUBLE_COMPLEX, f(1), &
- recv_cnt, recv_displ, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
+ dfft%tg_rcv, dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, f(1), &
+ dfft%tg_snd, dfft%tg_usdsp, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
IF( ierr /= 0 ) THEN
CALL errore( ' tg_cfft ', ' alltoall error 2 ', ABS(ierr) )
END IF
Index: fft_types.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/fft_types.f90,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -d -r1.18 -r1.19
--- fft_types.f90 29 Dec 2007 10:09:28 -0000 1.18
+++ fft_types.f90 29 May 2009 15:48:39 -0000 1.19
@@ -59,6 +59,11 @@
INTEGER :: nnrx ! maximum among nnr
INTEGER, POINTER :: tg_nsw(:) ! number of sticks per task group ( wave func )
INTEGER, POINTER :: tg_npp(:) ! number of "Z" planes per task group
+ INTEGER, POINTER :: tg_snd(:) ! number of element to be sent in group redist
+ INTEGER, POINTER :: tg_rcv(:) ! number of element to be received in group redist
+ INTEGER, POINTER :: tg_psdsp(:)! send displacement for all to all (pack)
+ INTEGER, POINTER :: tg_usdsp(:)! send displacement for all to all (unpack)
+ INTEGER, POINTER :: tg_rdsp(:)! receive displacement for all to all
!
END TYPE
@@ -100,6 +105,11 @@
desc%have_task_groups = .FALSE.
NULLIFY( desc%tg_nsw )
NULLIFY( desc%tg_npp )
+ NULLIFY( desc%tg_snd )
+ NULLIFY( desc%tg_rcv )
+ NULLIFY( desc%tg_psdsp )
+ NULLIFY( desc%tg_usdsp )
+ NULLIFY( desc%tg_rdsp )
END SUBROUTINE fft_dlay_allocate
@@ -121,6 +131,11 @@
IF( desc%have_task_groups ) THEN
IF ( ASSOCIATED( desc%tg_nsw ) ) DEALLOCATE( desc%tg_nsw )
IF ( ASSOCIATED( desc%tg_npp ) ) DEALLOCATE( desc%tg_npp )
+ IF ( ASSOCIATED( desc%tg_snd ) ) DEALLOCATE( desc%tg_snd )
+ IF ( ASSOCIATED( desc%tg_rcv ) ) DEALLOCATE( desc%tg_rcv )
+ IF ( ASSOCIATED( desc%tg_psdsp ) ) DEALLOCATE( desc%tg_psdsp )
+ IF ( ASSOCIATED( desc%tg_usdsp ) ) DEALLOCATE( desc%tg_usdsp )
+ IF ( ASSOCIATED( desc%tg_rdsp ) ) DEALLOCATE( desc%tg_rdsp )
END IF
desc%have_task_groups = .FALSE.
END SUBROUTINE fft_dlay_deallocate
Index: mp_global.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/mp_global.f90,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -d -r1.32 -r1.33
--- mp_global.f90 25 Feb 2009 16:50:31 -0000 1.32
+++ mp_global.f90 29 May 2009 15:48:39 -0000 1.33
@@ -43,6 +43,7 @@
INTEGER :: my_image_id = 0 ! index of my image
INTEGER :: me_ortho(2) = 0 ! coordinates of the processors
INTEGER :: me_ortho1 = 0 ! task id for the ortho group
+ INTEGER :: me_pgrp = 0 ! task id for plane wave task group
!
INTEGER :: npool = 1 ! number of "k-points"-pools
INTEGER :: nimage = 1 ! number of "path-images"-pools
@@ -93,6 +94,7 @@
my_image_id = 0
me_pool = mpime
me_image = mpime
+ me_pgrp = me_pool
root_pool = root
root_image = root
inter_pool_comm = group_i
@@ -325,6 +327,7 @@
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' pgrp_comm rank ', itsk )
END IF
END DO
+ me_pgrp = itsk
#endif
Index: task_groups.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/Modules/task_groups.f90,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -d -r1.22 -r1.23
--- task_groups.f90 2 Jan 2008 17:59:28 -0000 1.22
+++ task_groups.f90 29 May 2009 15:48:39 -0000 1.23
@@ -107,6 +107,49 @@
dffts%tg_npp(1) = num_planes
#endif
+ ALLOCATE( dffts%tg_snd( nogrp ) )
+ ALLOCATE( dffts%tg_rcv( nogrp ) )
+ ALLOCATE( dffts%tg_psdsp( nogrp ) )
+ ALLOCATE( dffts%tg_usdsp( nogrp ) )
+ ALLOCATE( dffts%tg_rdsp( nogrp ) )
+
+ dffts%tg_snd(1) = dffts%nr3x * dffts%nsw( me_pool + 1 )
+ IF( dffts%nr3x * dffts%nsw( me_pool + 1 ) > dffts%nnrx ) THEN
+ CALL errore( ' task_groups_init ', ' inconsistent dffts%nnrx ', 1 )
+ END IF
+ dffts%tg_psdsp(1) = 0
+ dffts%tg_usdsp(1) = 0
+ dffts%tg_rcv(1) = dffts%nr3x * dffts%nsw( nolist(1) + 1 )
+ dffts%tg_rdsp(1) = 0
+ DO i = 2, nogrp
+ dffts%tg_snd(i) = dffts%nr3x * dffts%nsw( me_pool + 1 )
+ dffts%tg_psdsp(i) = dffts%tg_psdsp(i-1) + dffts%nnrx
+ dffts%tg_usdsp(i) = dffts%tg_usdsp(i-1) + dffts%tg_snd(i-1)
+ dffts%tg_rcv(i) = dffts%nr3x * dffts%nsw( nolist(i) + 1 )
+ dffts%tg_rdsp(i) = dffts%tg_rdsp(i-1) + dffts%tg_rcv(i-1)
+ ENDDO
+
+ ! ALLOCATE( dffts%tg_sca_snd( nproc_pool / nogrp ) )
+ ! ALLOCATE( dffts%tg_sca_rcv( nproc_pool / nogrp ) )
+ ! ALLOCATE( dffts%tg_sca_sdsp( nproc_pool / nogrp ) )
+ ! ALLOCATE( dffts%tg_sca_rdsp( nproc_pool / nogrp ) )
+ ! ALLOCATE( dffts%tg_sca_off( nproc_pool / nogrp ) )
+
+ ! do i = 1, nproc_pool / nogrp
+ ! dffts%tg_sca_snd (i) = dffts%tg_npp ( nplist( i ) + 1 ) * dffts%tg_nsw ( me_pool + 1 )
+ ! dffts%tg_sca_rcv (i) = dffts%tg_npp ( me_pool + 1 ) * dffts%tg_nsw ( nplist( i ) + 1 )
+ ! end do
+ ! dffts%tg_sca_off(1) = 0
+ ! do i = 2, nproc_pool / nogrp
+ ! dffts%tg_sca_off(i) = dffts%tg_sca_off(i - 1) + dffts%tg_npp ( nplist( i - 1 ) + 1 )
+ ! end do
+ ! dffts%tg_sca_sdsp (1) = 0
+ ! dffts%tg_sca_rdsp (1) = 0
+ ! do i = 2, nproc_pool / nogrp
+ ! dffts%tg_sca_sdsp (i) = dffts%tg_sca_sdsp (i - 1) + dffts%tg_sca_snd (i - 1)
+ ! dffts%tg_sca_rdsp (i) = dffts%tg_sca_rdsp (i - 1) + dffts%tg_sca_rcv (i - 1)
+ ! enddo
+
dffts%have_task_groups = .TRUE.
RETURN
More information about the Q-e-commits
mailing list