[Q-e-commits] espresso/CPV chargedensity.f90, 1.45, 1.46 cplib.f90, 1.182, 1.183 exch_corr.f90, 1.43, 1.44 forces.f90, 1.35, 1.36 nl_base.f90, 1.30, 1.31 ortho_base.f90, 1.53, 1.54 phasefactor.f90, 1.14, 1.15 potentials.f90, 1.57, 1.58 runcp.f90, 1.52, 1.53
ccavazzoni at qe-forge.org
ccavazzoni at qe-forge.org
Sun May 24 19:29:54 CEST 2009
- Previous message: [Q-e-commits] espresso/Modules fft_parallel.f90,1.7,1.8
- Next message: [Q-e-commits] espresso/PW dqvan2.f90, 1.15, 1.16 newd.f90, 1.45, 1.46 qvan2.f90, 1.18, 1.19 sum_band.f90, 1.76, 1.77 v_of_rho.f90, 1.42, 1.43
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/q-e/espresso/CPV
In directory qeforge.qe-forge.org:/tmp/cvs-serv5626
Modified Files:
chargedensity.f90 cplib.f90 exch_corr.f90 forces.f90
nl_base.f90 ortho_base.f90 phasefactor.f90 potentials.f90
runcp.f90
Log Message:
- OpenMP parallelization
Index: chargedensity.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/chargedensity.f90,v
retrieving revision 1.45
retrieving revision 1.46
diff -u -d -r1.45 -r1.46
--- chargedensity.f90 12 Jan 2009 17:25:15 -0000 1.45
+++ chargedensity.f90 24 May 2009 17:29:52 -0000 1.46
@@ -487,7 +487,12 @@
! The size of psis is nnr: which is equal to the total number
! of local fourier coefficients.
!
- psis (:) = (0.d0, 0.d0)
+!$omp parallel default(shared), private(eig_offset, ig, eig_index )
+ !
+!$omp do
+ do ig = 1, SIZE(psis)
+ psis (ig) = (0.d0, 0.d0)
+ end do
!
! Loop for all local g-vectors (ngw)
! c: stores the Fourier expansion coefficients
@@ -515,6 +520,7 @@
! Otherwise we can do this once at the beginning, before the loop.
! we choose to do the latter one.
+!$omp do
do ig=1,ngw
psis(nms(ig)+eig_offset*dffts%nnrx)=conjg(c(ig,i+eig_index-1))+ci*conjg(c(ig,i+eig_index))
psis(nps(ig)+eig_offset*dffts%nnrx)=c(ig,i+eig_index-1)+ci*c(ig,i+eig_index)
@@ -525,6 +531,7 @@
ENDIF
!
end do
+!$omp end parallel
! 2*NOGRP are trasformed at the same time
! psis: holds the fourier coefficients of the current proccesor
@@ -584,6 +591,7 @@
IF( nr1sx * nr2sx * dffts%tg_npp( me_image + 1 ) > SIZE( psis ) ) &
CALL errore( ' rhoofr ', ' psis size too low ', nr1sx * nr2sx * dffts%tg_npp( me_image + 1 ) )
+!$omp parallel do default(shared)
do ir = 1, nr1sx * nr2sx * dffts%tg_npp( me_image + 1 )
tmp_rhos(ir,iss1) = tmp_rhos(ir,iss1) + sa1*( real(psis(ir)))**2
tmp_rhos(ir,iss2) = tmp_rhos(ir,iss2) + sa2*(aimag(psis(ir)))**2
@@ -656,27 +664,41 @@
!
ci = ( 0.0d0, 1.0d0 )
do iss = 1, nspin
+!$omp parallel default(shared), private(ig)
+!$omp do
do ig = 1, nnrx
v( ig ) = ( 0.0d0, 0.0d0 )
end do
+!$omp do
do ig=1,ngm
v(np(ig))= ci*tpiba*gx(1,ig)*rhog(ig,iss)
v(nm(ig))=CONJG(ci*tpiba*gx(1,ig)*rhog(ig,iss))
end do
+!$omp end parallel
+ !
call invfft( 'Dense', v, dfftp )
+ !
+!$omp parallel default(shared), private(ig,ir)
+!$omp do
do ir=1,nnrx
gradr(ir,1,iss)=DBLE(v(ir))
end do
+!$omp do
do ig=1,nnrx
v(ig)=(0.0d0,0.0d0)
end do
+!$omp do
do ig=1,ngm
v(np(ig))= tpiba*( ci*gx(2,ig)*rhog(ig,iss)- &
& gx(3,ig)*rhog(ig,iss) )
v(nm(ig))= tpiba*(CONJG(ci*gx(2,ig)*rhog(ig,iss)+ &
& gx(3,ig)*rhog(ig,iss)))
end do
+!$omp end parallel
+ !
call invfft( 'Dense', v, dfftp )
+ !
+!$omp parallel do default(shared)
do ir=1,nnrx
gradr(ir,2,iss)= DBLE(v(ir))
gradr(ir,3,iss)=AIMAG(v(ir))
Index: cplib.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/cplib.f90,v
retrieving revision 1.182
retrieving revision 1.183
diff -u -d -r1.182 -r1.183
--- cplib.f90 5 Apr 2009 08:47:35 -0000 1.182
+++ cplib.f90 24 May 2009 17:29:52 -0000 1.183
@@ -594,10 +594,13 @@
!
! calculate csc(k)=<cp(i)|cp(k)>, k<i
!
- ALLOCATE( temp( ngw ) )
-
kmax = i - 1
+ !
+!$omp parallel default(shared), private( temp, k, ig )
+ ALLOCATE( temp( ngw ) )
+
+!$omp do
DO k = 1, kmax
csc(k) = 0.0d0
IF ( ispin(i) .EQ. ispin(k) ) THEN
@@ -608,9 +611,15 @@
IF (gstart == 2) csc(k) = csc(k) - temp(1)
ENDIF
END DO
+!$omp end do
+
+ DEALLOCATE( temp )
+
+!$omp end parallel
CALL mp_sum( csc( 1:kmax ), intra_image_comm )
+ ALLOCATE( temp( ngw ) )
!
! calculate bec(i)=<cp(i)|beta>
!
@@ -1892,7 +1901,7 @@
!
INTEGER iss, isup, isdw, ig, ir, i, j, k, ij, is, ia
REAL(DP) vave, ebac, wz, eh, ehpre
- COMPLEX(DP) fp, fm, ci, drhop
+ COMPLEX(DP) fp, fm, ci, drhop, zpseu, zh
COMPLEX(DP), ALLOCATABLE :: rhotmp(:), vtemp(:)
! COMPLEX(DP), ALLOCATABLE :: drhotmp(:,:,:)
COMPLEX(DP), ALLOCATABLE :: drhot(:,:)
@@ -1999,15 +2008,29 @@
!
! calculation local potential energy
!
- vtemp=(0.d0,0.d0)
+ zpseu = 0.0d0
+ !
+!$omp parallel default(shared), private(ig,is)
+!$omp do
+ DO ig = 1, SIZE(vtemp)
+ vtemp(ig)=(0.d0,0.d0)
+ END DO
DO is=1,nsp
+!$omp do
DO ig=1,ngs
vtemp(ig)=vtemp(ig)+CONJG(rhotmp(ig))*sfac(ig,is)*vps(ig,is)
END DO
END DO
+!$omp do reduction(+:zpseu)
+ DO ig=1,ngs
+ zpseu = zpseu + vtemp(ig)
+ END DO
+!$omp end parallel
- epseu = wz * DBLE(SUM(vtemp))
- IF (gstart == 2) epseu=epseu-vtemp(1)
+ epseu = wz * DBLE(zpseu)
+ !
+ IF (gstart == 2) epseu = epseu - DBLE( vtemp(1) )
+ !
CALL mp_sum( epseu, intra_image_comm )
epseu = epseu * omega
@@ -2027,18 +2050,30 @@
!
IF( ttsic ) self_vloc = 0.d0
+ zh = 0.0d0
+
+!$omp parallel default(shared), private(ig,is)
+
DO is=1,nsp
+!$omp do
DO ig=1,ngs
rhotmp(ig)=rhotmp(ig)+sfac(ig,is)*rhops(ig,is)
END DO
END DO
!
- IF (gstart == 2) vtemp(1)=0.0d0
+!$omp do
DO ig = gstart, ng
vtemp(ig) = CONJG( rhotmp( ig ) ) * rhotmp( ig ) / g( ig )
END DO
-!
- eh = DBLE( SUM( vtemp ) ) * wz * 0.5d0 * fpi / tpiba2
+
+!$omp do reduction(+:zh)
+ DO ig = gstart, ng
+ zh = zh + vtemp(ig)
+ END DO
+
+!$omp end parallel
+
+ eh = DBLE( zh ) * wz * 0.5d0 * fpi / tpiba2
!
IF ( ttsic ) THEN
!
@@ -2082,15 +2117,20 @@
!
!
IF (gstart == 2) vtemp(1)=(0.d0,0.d0)
+
+!$omp parallel default(shared), private(ig,is)
+!$omp do
DO ig=gstart,ng
vtemp(ig)=rhotmp(ig)*fpi/(tpiba2*g(ig))
END DO
!
DO is=1,nsp
+!$omp do
DO ig=1,ngs
vtemp(ig)=vtemp(ig)+sfac(ig,is)*vps(ig,is)
END DO
END DO
+!$omp end parallel
!
! vtemp = v_loc(g) + v_h(g)
!
Index: exch_corr.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/exch_corr.f90,v
retrieving revision 1.43
retrieving revision 1.44
diff -u -d -r1.43 -r1.44
--- exch_corr.f90 12 Jan 2009 17:25:15 -0000 1.43
+++ exch_corr.f90 24 May 2009 17:29:52 -0000 1.44
@@ -673,7 +673,7 @@
real(DP) :: rup, rdw, ex, ec, vx(2), vc(2)
real(DP) :: rh, grh2, zeta
real(DP) :: sx, sc, v1x, v2x, v1c, v2c
- real(DP) :: rhox, arhox, e2, sx_dbg, sc_dbg
+ real(DP) :: rhox, arhox, e2
real(DP) :: grho2(2), arho, segno
real(DP) :: v1xup, v1xdw, v2xup, v2xdw
real(DP) :: v1cup, v1cdw
@@ -686,15 +686,13 @@
igcc_is_lyp = (get_igcc() == 3)
!
- sx_dbg = 0.0d0
- sc_dbg = 0.0d0
-
e2 = 1.0d0
etxc = 0.0d0
if( nspin == 1 ) then
!
! spin-unpolarized case
!
+!$omp parallel do private( rhox, arhox, ex, ec, vx, vc ), reduction(+:etxc)
do ir = 1, nnr
rhox = rhor (ir, nspin)
arhox = abs (rhox)
@@ -704,6 +702,7 @@
etxc = etxc + e2 * (ex + ec) * rhox
endif
enddo
+!$omp end parallel do
!
else
!
@@ -730,8 +729,6 @@
enddo
etxc = etxc + e2 * (ex + ec) * rhox
endif
- sx_dbg = sx_dbg + ex
- sc_dbg = sc_dbg + ec
enddo
endif
@@ -744,39 +741,29 @@
debug_xc = .false.
end if
- ! write(6,*) 'debug 1 = ', sx_dbg, sc_dbg
-
! now come the corrections
- sx_dbg = 0.0d0
- sc_dbg = 0.0d0
-
if( dft_is_gradient() ) then
- do k = 1, nnr
- !
- !
- do is = 1, nspin
- grho2 (is) = grhor(k, 1, is)**2 + grhor(k, 2, is)**2 + grhor(k, 3, is)**2
- enddo
+ if (nspin == 1) then
!
+ ! This is the spin-unpolarised case
!
- if (nspin == 1) then
- !
- ! This is the spin-unpolarised case
+!$omp parallel do &
+!$omp private( is, grho2, arho, segno, sx, sc, v1x, v2x, v1c, v2c ), reduction(+:etxc)
+ do k = 1, nnr
!
+ grho2 (1) = grhor(k, 1, 1)**2 + grhor(k, 2, 1)**2 + grhor(k, 3, 1)**2
arho = abs (rhor (k, 1) )
segno = sign (1.d0, rhor (k, 1) )
if (arho > epsr .and. grho2 (1) > epsg) then
call gcxc (arho, grho2(1), sx, sc, v1x, v2x, v1c, v2c)
-
!
! first term of the gradient correction : D(rho*Exc)/D(rho)
v (k, 1) = v (k, 1) + e2 * (v1x + v1c)
-
! HERE h contains D(rho*Exc)/D(|grad rho|) / |grad rho|
!
h (k, 1, 1) = e2 * (v2x + v2c)
@@ -784,15 +771,19 @@
else
h (k, 1, 1) = 0.d0
- sx = 0.0d0
- sc = 0.0d0
endif
!
- !
- else
- !
- ! spin-polarised case
- !
+ end do
+!$omp end parallel do
+ !
+ else
+ !
+ ! spin-polarised case
+ !
+ do k = 1, nnr
+ do is = 1, nspin
+ grho2 (is) = grhor(k, 1, is)**2 + grhor(k, 2, is)**2 + grhor(k, 3, is)**2
+ enddo
rup = rhor (k, 1)
rdw = rhor (k, 2)
call gcx_spin ( rup, rdw, grho2 (1), grho2 (2), sx, v1xup, v1xdw, v2xup, v2xdw)
@@ -844,13 +835,10 @@
etxc = etxc + e2 * (sx + sc)
!
!
- endif
- !
- sx_dbg = sx_dbg + ex
- sc_dbg = sc_dbg + ec
+ enddo
!
- enddo
-
+ endif
+ !
end if
return
@@ -888,29 +876,35 @@
if( dft_is_gradient() ) then
!
+!$omp parallel default(shared), private(ipol,k,grup,grdw)
if( nspin == 1 ) then
!
! h contains D(rho*Exc)/D(|grad rho|) * (grad rho) / |grad rho|
!
do ipol = 1, 3
+!$omp do
do k = 1, nnr
grhor (k, ipol, 1) = h (k, 1, 1) * grhor (k, ipol, 1)
enddo
+!$omp end do
end do
!
!
else
!
do ipol = 1, 3
+!$omp do
do k = 1, nnr
grup = grhor (k, ipol, 1)
grdw = grhor (k, ipol, 2)
grhor (k, ipol, 1) = h (k, 1, 1) * grup + h (k, 1, 2) * grdw
grhor (k, ipol, 2) = h (k, 2, 2) * grdw + h (k, 2, 1) * grup
enddo
+!$omp end do
enddo
!
end if
+!$omp end parallel
!
end if
Index: forces.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/forces.f90,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -d -r1.35 -r1.36
--- forces.f90 2 Jan 2008 17:59:46 -0000 1.35
+++ forces.f90 24 May 2009 17:29:52 -0000 1.36
@@ -124,20 +124,26 @@
!
IF( use_task_groups ) THEN
!
+!$omp parallel do
DO ir = 1, nr1sx * nr2sx * dffts%tg_npp( me_image + 1 )
psi(ir) = CMPLX( v(ir,iss1) * DBLE( psi(ir) ), v(ir,iss2) * AIMAG( psi(ir) ) )
END DO
+!$omp end parallel do
!
ELSE
!
IF( PRESENT( v1 ) ) THEN
+!$omp parallel do
DO ir=1,nnrsx
psi(ir)=CMPLX(v(ir,iss1)* DBLE(psi(ir)), v1(ir,iss2)*AIMAG(psi(ir)) )
END DO
+!$omp end parallel do
ELSE
+!$omp parallel do
DO ir=1,nnrsx
psi(ir)=CMPLX(v(ir,iss1)* DBLE(psi(ir)), v(ir,iss2)*AIMAG(psi(ir)) )
END DO
+!$omp end parallel do
END IF
!
END IF
@@ -165,20 +171,24 @@
fip = -0.5d0*f(i+idx)
endif
IF( use_task_groups ) THEN
+!$omp parallel do private( fp, fm )
DO ig=1,ngw
fp= psi(nps(ig)+eig_offset) + psi(nms(ig)+eig_offset)
fm= psi(nps(ig)+eig_offset) - psi(nms(ig)+eig_offset)
- df(igno)= fi *(tpiba2 * ggp(ig) * c(ig,idx+i-1)+cmplx(real (fp), aimag(fm)))
- da(igno)= fip*(tpiba2 * ggp(ig) * c(ig,idx+i )+cmplx(aimag(fp),-real (fm)))
- igno = igno + 1
+ df(ig+igno-1)= fi *(tpiba2 * ggp(ig) * c(ig,idx+i-1)+cmplx(real (fp), aimag(fm)))
+ da(ig+igno-1)= fip*(tpiba2 * ggp(ig) * c(ig,idx+i )+cmplx(aimag(fp),-real (fm)))
END DO
+!$omp end parallel do
+ igno = igno + ngw
ELSE
+!$omp parallel do private( fp, fm )
DO ig=1,ngw
fp= psi(nps(ig)) + psi(nms(ig))
fm= psi(nps(ig)) - psi(nms(ig))
df(ig)= fi*(tpiba2*ggp(ig)* c(ig,idx+i-1)+CMPLX(DBLE(fp), AIMAG(fm)))
da(ig)=fip*(tpiba2*ggp(ig)* c(ig,idx+i )+CMPLX(AIMAG(fp),-DBLE(fm)))
END DO
+!$omp end parallel do
END IF
END IF
@@ -217,6 +227,8 @@
fip= f(i+idx)
END IF
!
+!$omp parallel default(shared), private(iv,jv,ivoff,jvoff,dd,dv,inl,jnl,is,isa,ism)
+ !
DO is = 1, nsp
DO iv = 1, nh(is)
IF( program_name == 'FPMD' ) THEN
@@ -231,37 +243,41 @@
END DO
END IF
ELSE
- DO jv = 1, nh(is)
- isa = 0
- DO ism = 1, is-1
- isa = isa + na( ism )
- END DO
- dv = dvan(iv,jv,is)
- ivoff = ish(is)+(iv-1)*na(is)
- jvoff = ish(is)+(jv-1)*na(is)
- IF( i + idx - 1 /= n ) THEN
- DO ia=1,na(is)
- inl = ivoff + ia
- jnl = jvoff + ia
- isa = isa + 1
- dd = deeq(iv,jv,isa,iss1) + dv
- af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1)
- dd = deeq(iv,jv,isa,iss2) + dv
- aa(inl,igrp) = aa(inl,igrp) - fip * dd * bec(jnl,i+idx)
- END DO
- ELSE
- DO ia=1,na(is)
- inl = ivoff + ia
- jnl = jvoff + ia
- isa = isa + 1
- dd = deeq(iv,jv,isa,iss1) + dv
- af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1)
+ DO jv = 1, nh(is)
+ isa = 0
+ DO ism = 1, is-1
+ isa = isa + na( ism )
END DO
- END IF
- END DO
+ dv = dvan(iv,jv,is)
+ ivoff = ish(is)+(iv-1)*na(is)
+ jvoff = ish(is)+(jv-1)*na(is)
+ IF( i + idx - 1 /= n ) THEN
+!$omp do
+ DO ia=1,na(is)
+ inl = ivoff + ia
+ jnl = jvoff + ia
+ isa = isa + 1
+ dd = deeq(iv,jv,isa,iss1) + dv
+ af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1)
+ dd = deeq(iv,jv,isa,iss2) + dv
+ aa(inl,igrp) = aa(inl,igrp) - fip * dd * bec(jnl,i+idx)
+ END DO
+ ELSE
+!$omp do
+ DO ia=1,na(is)
+ inl = ivoff + ia
+ jnl = jvoff + ia
+ isa = isa + 1
+ dd = deeq(iv,jv,isa,iss1) + dv
+ af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1)
+ END DO
+ END IF
+ END DO
END IF
END DO
END DO
+
+!$omp end parallel
END IF
Index: nl_base.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/nl_base.f90,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -d -r1.30 -r1.31
--- nl_base.f90 12 Jan 2009 17:25:16 -0000 1.30
+++ nl_base.f90 24 May 2009 17:29:52 -0000 1.31
@@ -68,6 +68,7 @@
!
do iv = 1, nh( is )
!
+!$omp parallel default(shared), private(l,ixr,ixi,signre,signim,ig,arg,ia)
l = nhtol( iv, is )
!
if (l == 0) then
@@ -92,6 +93,7 @@
signim = 1.0d0
endif
!
+!$omp do
do ia=1,na(is)
!
! q = 0 component (with weight 1.0)
@@ -110,7 +112,9 @@
end do
!
end do
+!$omp end do
+!$omp end parallel
!
IF( nproc_image > 1 ) THEN
inl=(iv-1)*na(is)+1
@@ -218,6 +222,7 @@
!
do iv = 1, nh( is )
!
+!$omp parallel default(shared), private(l,ixr,ixi,signre,signim,ig,arg,ia)
l = nhtol( iv, is )
!
if (l == 0) then
@@ -242,6 +247,7 @@
signim = 1.0d0
endif
!
+!$omp do
do ia=1,na(is)
!
! q = 0 component (with weight 1.0)
@@ -260,6 +266,9 @@
end do
!
end do
+!$omp end do
+
+!$omp end parallel
!
IF( nproc_image > 1 ) THEN
@@ -381,6 +390,7 @@
!
! order of states: s_1 p_x1 p_z1 p_y1 s_2 p_x2 p_z2 p_y2
!
+!$omp parallel default(shared), private(l,ixr,ixi,signre,signim,ig,arg,ia)
l=nhtol(iv,is)
if (l.eq.0) then
ixr = 2
@@ -404,6 +414,7 @@
signim = 1.0d0
endif
!
+!$omp do
do ia=1,na(is)
! q = 0 component (with weight 1.0)
if (gstart == 2) then
@@ -417,6 +428,8 @@
wrk2(2,ig,ia) = signim*arg*eigr(ixi,ig,ia+isa)
end do
end do
+!$omp end do
+!$omp end parallel
inl=ish(is)+(iv-1)*na(is)+1
CALL DGEMM( 'T', 'N', na(is), n, 2*ngw, 1.0d0, wrk2, 2*ngw, c, 2*ngw, 0.0d0, becdr_repl( inl, 1 ), nkb )
end do
@@ -499,6 +512,7 @@
!
! order of states: s_1 p_x1 p_z1 p_y1 s_2 p_x2 p_z2 p_y2
!
+!$omp parallel default(shared), private(l,ixr,ixi,signre,signim,ig,arg,ia)
l=nhtol(iv,is)
if (l.eq.0) then
ixr = 2
@@ -522,6 +536,7 @@
signim = 1.0d0
endif
!
+!$omp do
do ia=1,na(is)
! q = 0 component (with weight 1.0)
if (gstart == 2) then
@@ -535,6 +550,8 @@
wrk2(2,ig,ia) = signim*arg*eigr(ixi,ig,ia+isa)
end do
end do
+!$omp end do
+!$omp end parallel
inl=ish(is)+(iv-1)*na(is)+1
CALL DGEMM( 'T', 'N', na(is), n, 2*ngw, 1.0d0, wrk2, 2*ngw, c, 2*ngw, 0.0d0, becdr( inl, 1, k ), nkb )
end do
@@ -582,11 +599,13 @@
!
! local
!
- real(DP) :: sum, sums(2)
+ real(DP) :: sumt, sums(2)
integer :: is, iv, jv, ijv, inl, jnl, isa, ism, ia, iss, i
!
ennl = 0.d0
!
+!$omp parallel default(shared), &
+!$omp private(is,iv,ijv,isa,ism,ia,inl,jnl,sums,i,iss,sumt), reduction(+:ennl)
do is = 1, nsp
do iv = 1, nh(is)
do jv = iv, nh(is)
@@ -595,6 +614,7 @@
do ism = 1, is - 1
isa = isa + na(ism)
end do
+!$omp do
do ia = 1, na(is)
inl = ish(is)+(iv-1)*na(is)+ia
jnl = ish(is)+(jv-1)*na(is)+ia
@@ -604,17 +624,19 @@
iss = ispin(i)
sums(iss) = sums(iss) + f(i) * bec(inl,i) * bec(jnl,i)
end do
- sum = 0.d0
+ sumt = 0.d0
do iss = 1, nspin
rhovan( ijv, isa, iss ) = sums( iss )
- sum = sum + sums( iss )
+ sumt = sumt + sums( iss )
end do
- if( iv .ne. jv ) sum = 2.d0 * sum
- ennl = ennl + sum * dvan( jv, iv, is)
+ if( iv .ne. jv ) sumt = 2.d0 * sumt
+ ennl = ennl + sumt * dvan( jv, iv, is)
end do
+!$omp end do
end do
end do
end do
+!$omp end parallel
!
return
end function ennl
@@ -998,6 +1020,9 @@
!
real(DP), allocatable :: tmpbec(:,:), tmpdr(:,:)
real(DP), allocatable :: fion_loc(:,:)
+#ifdef __OPENMP
+ INTEGER :: mytid, ntids, omp_get_thread_num, omp_get_num_threads
+#endif
!
call start_clock( 'nlfq' )
!
@@ -1010,16 +1035,30 @@
!
fion_loc = 0.0d0
- allocate ( tmpbec( nhm, nlax ), tmpdr( nhm, nlax ) )
!
- DO k=1,3
+ DO k = 1, 3
- isa=0
+!$omp parallel default(shared), &
+!$omp private(tmpbec,tmpdr,isa,is,ia,iss,nss,istart,ir,nr,ioff,iv,jv,inl,temp,i,mytid,ntids)
+#ifdef __OPENMP
+ mytid = omp_get_thread_num() ! take the thread ID
+ ntids = omp_get_num_threads() ! take the number of threads
+#endif
+
+ allocate ( tmpbec( nhm, nlax ), tmpdr( nhm, nlax ) )
+
+ isa = 0
+ !
DO is=1,nsp
DO ia=1,na(is)
isa=isa+1
+#ifdef __OPENMP
+ ! distribute atoms round robin to threads
+ !
+ IF( MOD( isa, ntids ) /= mytid ) CYCLE
+#endif
DO iss = 1, nspin
nss = nupdwn( iss )
@@ -1069,6 +1108,8 @@
END DO
END DO
END DO
+ deallocate ( tmpbec, tmpdr )
+!$omp end parallel
END DO
!
CALL mp_sum( fion_loc, intra_image_comm )
@@ -1079,8 +1120,6 @@
!
deallocate ( fion_loc )
!
- deallocate ( tmpbec, tmpdr )
-
call stop_clock( 'nlfq' )
!
return
Index: ortho_base.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/ortho_base.f90,v
retrieving revision 1.53
retrieving revision 1.54
diff -u -d -r1.53 -r1.54
--- ortho_base.f90 5 Apr 2009 08:37:40 -0000 1.53
+++ ortho_base.f90 24 May 2009 17:29:52 -0000 1.54
@@ -474,6 +474,7 @@
CALL sqr_tr_cannon( nss, tmp1, ldx, tr1, ldx, desc )
CALL sqr_tr_cannon( nss, tmp2, ldx, tr2, ldx, desc )
!
+!$omp parallel do default(shared), private(j)
DO i=1,nr
DO j=1,nc
x1(i,j) = sig(i,j)-tmp1(i,j)-tr1(i,j)-dd(i,j)
@@ -504,6 +505,7 @@
!
! g=ut*x1*u/d (g is stored in tmp1)
!
+!$omp parallel do default(shared), private(j)
DO i=1,nr
DO j=1,nc
tmp1(i,j)=tmp2(i,j)/(diag(i+ir-1)+diag(j+ic-1))
Index: phasefactor.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/phasefactor.f90,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- phasefactor.f90 26 Apr 2007 09:24:37 -0000 1.14
+++ phasefactor.f90 24 May 2009 17:29:52 -0000 1.15
@@ -167,6 +167,7 @@
call start_clock( 'strucf' )
+!$omp parallel do default(shared), private(ig1,ig2,ig3,isa,is,ia)
DO ig = 1, ngm
ig1 = mill( 1, ig )
ig2 = mill( 2, ig )
Index: potentials.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/potentials.f90,v
retrieving revision 1.57
retrieving revision 1.58
diff -u -d -r1.57 -r1.58
--- potentials.f90 8 May 2009 15:13:03 -0000 1.57
+++ potentials.f90 24 May 2009 17:29:52 -0000 1.58
@@ -869,6 +869,7 @@
ehte = 0.0d0
ehti = 0.0d0
+!$omp parallel do default(shared), private(rp,is,rhet,rhog,fpibg), reduction(+:eh,ehte,ehti)
DO ig = gstart, ngm
rp = (0.D0,0.D0)
Index: runcp.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/runcp.f90,v
retrieving revision 1.52
retrieving revision 1.53
diff -u -d -r1.52 -r1.53
--- runcp.f90 2 Jan 2008 17:59:46 -0000 1.52
+++ runcp.f90 24 May 2009 17:29:52 -0000 1.53
@@ -206,7 +206,9 @@
end do
- DEALLOCATE( c2, c3, tg_rhos )
+ DEALLOCATE( c2 )
+ DEALLOCATE( c3 )
+ DEALLOCATE( tg_rhos )
END IF
- Previous message: [Q-e-commits] espresso/Modules fft_parallel.f90,1.7,1.8
- Next message: [Q-e-commits] espresso/PW dqvan2.f90, 1.15, 1.16 newd.f90, 1.45, 1.46 qvan2.f90, 1.18, 1.19 sum_band.f90, 1.76, 1.77 v_of_rho.f90, 1.42, 1.43
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Q-e-commits
mailing list