[Q-e-commits] espresso/CPV chargedensity.f90, 1.45, 1.46 cplib.f90, 1.182, 1.183 exch_corr.f90, 1.43, 1.44 forces.f90, 1.35, 1.36 nl_base.f90, 1.30, 1.31 ortho_base.f90, 1.53, 1.54 phasefactor.f90, 1.14, 1.15 potentials.f90, 1.57, 1.58 runcp.f90, 1.52, 1.53

ccavazzoni at qe-forge.org ccavazzoni at qe-forge.org
Sun May 24 19:29:54 CEST 2009


Update of /cvsroot/q-e/espresso/CPV
In directory qeforge.qe-forge.org:/tmp/cvs-serv5626

Modified Files:
	chargedensity.f90 cplib.f90 exch_corr.f90 forces.f90 
	nl_base.f90 ortho_base.f90 phasefactor.f90 potentials.f90 
	runcp.f90 
Log Message:
- OpenMP parallelization


Index: chargedensity.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/chargedensity.f90,v
retrieving revision 1.45
retrieving revision 1.46
diff -u -d -r1.45 -r1.46
--- chargedensity.f90	12 Jan 2009 17:25:15 -0000	1.45
+++ chargedensity.f90	24 May 2009 17:29:52 -0000	1.46
@@ -487,7 +487,12 @@
             !  The size of psis is nnr: which is equal to the total number
             !  of local fourier coefficients.
             !
-            psis (:) = (0.d0, 0.d0)
+!$omp parallel default(shared), private(eig_offset, ig, eig_index )
+            !
+!$omp do
+            do ig = 1, SIZE(psis)
+               psis (ig) = (0.d0, 0.d0)
+            end do
             !
             !  Loop for all local g-vectors (ngw)
             !  c: stores the Fourier expansion coefficients
@@ -515,6 +520,7 @@
                   !  Otherwise we can do this once at the beginning, before the loop.
                   !  we choose to do the latter one.
 
+!$omp do
                   do ig=1,ngw
                      psis(nms(ig)+eig_offset*dffts%nnrx)=conjg(c(ig,i+eig_index-1))+ci*conjg(c(ig,i+eig_index))
                      psis(nps(ig)+eig_offset*dffts%nnrx)=c(ig,i+eig_index-1)+ci*c(ig,i+eig_index)
@@ -525,6 +531,7 @@
                ENDIF
                !
             end do
+!$omp end parallel
 
             !  2*NOGRP are trasformed at the same time
             !  psis: holds the fourier coefficients of the current proccesor
@@ -584,6 +591,7 @@
             IF( nr1sx * nr2sx * dffts%tg_npp( me_image + 1 ) > SIZE( psis ) ) &
                CALL errore( ' rhoofr ', ' psis size too low ', nr1sx * nr2sx * dffts%tg_npp( me_image + 1 ) )
 
+!$omp parallel do default(shared)
             do ir = 1, nr1sx * nr2sx * dffts%tg_npp( me_image + 1 )
                tmp_rhos(ir,iss1) = tmp_rhos(ir,iss1) + sa1*( real(psis(ir)))**2
                tmp_rhos(ir,iss2) = tmp_rhos(ir,iss2) + sa2*(aimag(psis(ir)))**2
@@ -656,27 +664,41 @@
       !
       ci = ( 0.0d0, 1.0d0 )
       do iss = 1, nspin
+!$omp parallel default(shared), private(ig)
+!$omp do
          do ig = 1, nnrx
             v( ig ) = ( 0.0d0, 0.0d0 )
          end do
+!$omp do
          do ig=1,ngm
             v(np(ig))=      ci*tpiba*gx(1,ig)*rhog(ig,iss)
             v(nm(ig))=CONJG(ci*tpiba*gx(1,ig)*rhog(ig,iss))
          end do
+!$omp end parallel
+         !
          call invfft( 'Dense', v, dfftp )
+         !
+!$omp parallel default(shared), private(ig,ir)
+!$omp do
          do ir=1,nnrx
             gradr(ir,1,iss)=DBLE(v(ir))
          end do
+!$omp do
          do ig=1,nnrx
             v(ig)=(0.0d0,0.0d0)
          end do
+!$omp do
          do ig=1,ngm
             v(np(ig))= tpiba*(      ci*gx(2,ig)*rhog(ig,iss)-           &
      &                                 gx(3,ig)*rhog(ig,iss) )
             v(nm(ig))= tpiba*(CONJG(ci*gx(2,ig)*rhog(ig,iss)+           &
      &                                 gx(3,ig)*rhog(ig,iss)))
          end do
+!$omp end parallel
+         !
          call invfft( 'Dense', v, dfftp )
+         !
+!$omp parallel do default(shared)
          do ir=1,nnrx
             gradr(ir,2,iss)= DBLE(v(ir))
             gradr(ir,3,iss)=AIMAG(v(ir))

Index: cplib.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/cplib.f90,v
retrieving revision 1.182
retrieving revision 1.183
diff -u -d -r1.182 -r1.183
--- cplib.f90	5 Apr 2009 08:47:35 -0000	1.182
+++ cplib.f90	24 May 2009 17:29:52 -0000	1.183
@@ -594,10 +594,13 @@
       !
       !     calculate csc(k)=<cp(i)|cp(k)>,  k<i
       !
-      ALLOCATE( temp( ngw ) )
-
       kmax = i - 1
+      !
+!$omp parallel default(shared), private( temp, k, ig )
 
+      ALLOCATE( temp( ngw ) )
+
+!$omp do
       DO k = 1, kmax
          csc(k) = 0.0d0
          IF ( ispin(i) .EQ. ispin(k) ) THEN
@@ -608,9 +611,15 @@
             IF (gstart == 2) csc(k) = csc(k) - temp(1)
          ENDIF
       END DO
+!$omp end do
+
+      DEALLOCATE( temp )
+
+!$omp end parallel
 
       CALL mp_sum( csc( 1:kmax ), intra_image_comm )
 
+      ALLOCATE( temp( ngw ) )
       !
       !     calculate bec(i)=<cp(i)|beta>
       !
@@ -1892,7 +1901,7 @@
       !
       INTEGER iss, isup, isdw, ig, ir, i, j, k, ij, is, ia
       REAL(DP) vave, ebac, wz, eh, ehpre
-      COMPLEX(DP)  fp, fm, ci, drhop
+      COMPLEX(DP)  fp, fm, ci, drhop, zpseu, zh
       COMPLEX(DP), ALLOCATABLE :: rhotmp(:), vtemp(:)
       ! COMPLEX(DP), ALLOCATABLE :: drhotmp(:,:,:)
       COMPLEX(DP), ALLOCATABLE :: drhot(:,:)
@@ -1999,15 +2008,29 @@
       !
       !     calculation local potential energy
       !
-      vtemp=(0.d0,0.d0)
+      zpseu = 0.0d0
+      !
+!$omp parallel default(shared), private(ig,is)
+!$omp do
+      DO ig = 1, SIZE(vtemp)
+         vtemp(ig)=(0.d0,0.d0)
+      END DO
       DO is=1,nsp
+!$omp do
          DO ig=1,ngs
             vtemp(ig)=vtemp(ig)+CONJG(rhotmp(ig))*sfac(ig,is)*vps(ig,is)
          END DO
       END DO
+!$omp do reduction(+:zpseu)
+      DO ig=1,ngs
+         zpseu = zpseu + vtemp(ig)
+      END DO
+!$omp end parallel
 
-      epseu = wz * DBLE(SUM(vtemp))
-      IF (gstart == 2) epseu=epseu-vtemp(1)
+      epseu = wz * DBLE(zpseu)
+      !
+      IF (gstart == 2) epseu = epseu - DBLE( vtemp(1) )
+      !
       CALL mp_sum( epseu, intra_image_comm )
 
       epseu = epseu * omega
@@ -2027,18 +2050,30 @@
       !
       IF( ttsic ) self_vloc = 0.d0 
 
+      zh = 0.0d0
+
+!$omp parallel default(shared), private(ig,is)
+
       DO is=1,nsp
+!$omp do
          DO ig=1,ngs
             rhotmp(ig)=rhotmp(ig)+sfac(ig,is)*rhops(ig,is)
          END DO
       END DO
       !
-      IF (gstart == 2) vtemp(1)=0.0d0
+!$omp do
       DO ig = gstart, ng
          vtemp(ig) = CONJG( rhotmp( ig ) ) * rhotmp( ig ) / g( ig )
       END DO
-!
-      eh = DBLE( SUM( vtemp ) ) * wz * 0.5d0 * fpi / tpiba2
+
+!$omp do reduction(+:zh)
+      DO ig = gstart, ng
+         zh = zh + vtemp(ig)
+      END DO
+
+!$omp end parallel
+
+      eh = DBLE( zh ) * wz * 0.5d0 * fpi / tpiba2
 !
       IF ( ttsic ) THEN
          !
@@ -2082,15 +2117,20 @@
       !
       !
       IF (gstart == 2) vtemp(1)=(0.d0,0.d0)
+
+!$omp parallel default(shared), private(ig,is)
+!$omp do
       DO ig=gstart,ng
          vtemp(ig)=rhotmp(ig)*fpi/(tpiba2*g(ig))
       END DO
 !
       DO is=1,nsp
+!$omp do
          DO ig=1,ngs
             vtemp(ig)=vtemp(ig)+sfac(ig,is)*vps(ig,is)
          END DO
       END DO
+!$omp end parallel
 !
 !     vtemp = v_loc(g) + v_h(g)
 !

Index: exch_corr.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/exch_corr.f90,v
retrieving revision 1.43
retrieving revision 1.44
diff -u -d -r1.43 -r1.44
--- exch_corr.f90	12 Jan 2009 17:25:15 -0000	1.43
+++ exch_corr.f90	24 May 2009 17:29:52 -0000	1.44
@@ -673,7 +673,7 @@
   real(DP) :: rup, rdw, ex, ec, vx(2), vc(2)
   real(DP) :: rh, grh2, zeta 
   real(DP) :: sx, sc, v1x, v2x, v1c, v2c
-  real(DP) :: rhox, arhox, e2, sx_dbg, sc_dbg
+  real(DP) :: rhox, arhox, e2
   real(DP) :: grho2(2), arho, segno
   real(DP) :: v1xup, v1xdw, v2xup, v2xdw
   real(DP) :: v1cup, v1cdw
@@ -686,15 +686,13 @@
 
   igcc_is_lyp = (get_igcc() == 3)
   !
-  sx_dbg = 0.0d0
-  sc_dbg = 0.0d0
-
   e2  = 1.0d0
   etxc = 0.0d0
   if( nspin == 1 ) then
      !
      ! spin-unpolarized case
      !
+!$omp parallel do private( rhox, arhox, ex, ec, vx, vc ), reduction(+:etxc)
      do ir = 1, nnr
         rhox = rhor (ir, nspin)
         arhox = abs (rhox)
@@ -704,6 +702,7 @@
            etxc = etxc + e2 * (ex + ec) * rhox
         endif
      enddo
+!$omp end parallel do
      !
   else
      !
@@ -730,8 +729,6 @@
            enddo
            etxc = etxc + e2 * (ex + ec) * rhox
         endif
-        sx_dbg = sx_dbg + ex
-        sc_dbg = sc_dbg + ec
      enddo
   endif
 
@@ -744,39 +741,29 @@
     debug_xc = .false.
   end if
 
-  ! write(6,*) 'debug 1 = ', sx_dbg, sc_dbg
-
   ! now come the corrections
 
-  sx_dbg = 0.0d0
-  sc_dbg = 0.0d0
-
   if( dft_is_gradient() ) then
 
-    do k = 1, nnr
-       !
-       !
-       do is = 1, nspin
-          grho2 (is) = grhor(k, 1, is)**2 + grhor(k, 2, is)**2 + grhor(k, 3, is)**2
-       enddo
+    if (nspin == 1) then
        !
+       !    This is the spin-unpolarised case
        !
-       if (nspin == 1) then
-          !
-          !    This is the spin-unpolarised case
+!$omp parallel do &
+!$omp private( is, grho2, arho, segno, sx, sc, v1x, v2x, v1c, v2c  ), reduction(+:etxc)
+       do k = 1, nnr
           !
+          grho2 (1) = grhor(k, 1, 1)**2 + grhor(k, 2, 1)**2 + grhor(k, 3, 1)**2
           arho = abs (rhor (k, 1) )
           segno = sign (1.d0, rhor (k, 1) )
           if (arho > epsr .and. grho2 (1) > epsg) then
 
              call gcxc (arho, grho2(1), sx, sc, v1x, v2x, v1c, v2c)
-
              !
              ! first term of the gradient correction : D(rho*Exc)/D(rho)
 
              v (k, 1) = v (k, 1) + e2 * (v1x + v1c)
 
-
              ! HERE h contains D(rho*Exc)/D(|grad rho|) / |grad rho|
              !
              h (k, 1, 1) = e2 * (v2x + v2c) 
@@ -784,15 +771,19 @@
 
           else
              h (k, 1, 1) = 0.d0
-             sx = 0.0d0
-             sc = 0.0d0
           endif
           !
-          !
-       else
-          !
-          !    spin-polarised case
-          !
+       end do
+!$omp end parallel do
+       !
+    else
+       !
+       !    spin-polarised case
+       !
+       do k = 1, nnr
+          do is = 1, nspin
+             grho2 (is) = grhor(k, 1, is)**2 + grhor(k, 2, is)**2 + grhor(k, 3, is)**2
+          enddo
           rup = rhor (k, 1)
           rdw = rhor (k, 2)
           call gcx_spin ( rup, rdw, grho2 (1), grho2 (2), sx, v1xup, v1xdw, v2xup, v2xdw)
@@ -844,13 +835,10 @@
           etxc = etxc + e2 * (sx + sc)
           !
           !
-       endif
-       !
-       sx_dbg = sx_dbg + ex
-       sc_dbg = sc_dbg + ec
+       enddo
        !
-    enddo
-
+    endif
+    !
   end if
 
   return
@@ -888,29 +876,35 @@
 
   if( dft_is_gradient() ) then
      !
+!$omp parallel default(shared), private(ipol,k,grup,grdw)
      if( nspin == 1 ) then
         !
         ! h contains D(rho*Exc)/D(|grad rho|) * (grad rho) / |grad rho|
         !
         do ipol = 1, 3
+!$omp do
            do k = 1, nnr
               grhor (k, ipol, 1) = h (k, 1, 1) * grhor (k, ipol, 1)
            enddo
+!$omp end do
         end do
         !
         !
      else
         !
         do ipol = 1, 3
+!$omp do
            do k = 1, nnr
              grup = grhor (k, ipol, 1)
              grdw = grhor (k, ipol, 2)
              grhor (k, ipol, 1) = h (k, 1, 1) * grup + h (k, 1, 2) * grdw
              grhor (k, ipol, 2) = h (k, 2, 2) * grdw + h (k, 2, 1) * grup
            enddo
+!$omp end do
         enddo
         !
      end if
+!$omp end parallel
      !
   end if
 

Index: forces.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/forces.f90,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -d -r1.35 -r1.36
--- forces.f90	2 Jan 2008 17:59:46 -0000	1.35
+++ forces.f90	24 May 2009 17:29:52 -0000	1.36
@@ -124,20 +124,26 @@
       !
       IF( use_task_groups ) THEN
          !
+!$omp parallel do 
          DO ir = 1, nr1sx * nr2sx * dffts%tg_npp( me_image + 1 )
             psi(ir) = CMPLX( v(ir,iss1) * DBLE( psi(ir) ), v(ir,iss2) * AIMAG( psi(ir) ) )
          END DO
+!$omp end parallel do 
          !
       ELSE
          !
          IF( PRESENT( v1 ) ) THEN
+!$omp parallel do 
             DO ir=1,nnrsx
                psi(ir)=CMPLX(v(ir,iss1)* DBLE(psi(ir)), v1(ir,iss2)*AIMAG(psi(ir)) )
             END DO
+!$omp end parallel do 
          ELSE
+!$omp parallel do 
             DO ir=1,nnrsx
                psi(ir)=CMPLX(v(ir,iss1)* DBLE(psi(ir)), v(ir,iss2)*AIMAG(psi(ir)) )
             END DO
+!$omp end parallel do 
          END IF
          !
       END IF
@@ -165,20 +171,24 @@
                fip = -0.5d0*f(i+idx)
             endif
             IF( use_task_groups ) THEN
+!$omp parallel do private( fp, fm )
                DO ig=1,ngw
                   fp= psi(nps(ig)+eig_offset) +  psi(nms(ig)+eig_offset)
                   fm= psi(nps(ig)+eig_offset) -  psi(nms(ig)+eig_offset)
-                  df(igno)= fi *(tpiba2 * ggp(ig) * c(ig,idx+i-1)+cmplx(real (fp), aimag(fm)))
-                  da(igno)= fip*(tpiba2 * ggp(ig) * c(ig,idx+i  )+cmplx(aimag(fp),-real (fm)))
-                  igno = igno + 1
+                  df(ig+igno-1)= fi *(tpiba2 * ggp(ig) * c(ig,idx+i-1)+cmplx(real (fp), aimag(fm)))
+                  da(ig+igno-1)= fip*(tpiba2 * ggp(ig) * c(ig,idx+i  )+cmplx(aimag(fp),-real (fm)))
                END DO
+!$omp end parallel do
+               igno = igno + ngw
             ELSE
+!$omp parallel do private( fp, fm )
                DO ig=1,ngw
                   fp= psi(nps(ig)) + psi(nms(ig))
                   fm= psi(nps(ig)) - psi(nms(ig))
                   df(ig)= fi*(tpiba2*ggp(ig)* c(ig,idx+i-1)+CMPLX(DBLE(fp), AIMAG(fm)))
                   da(ig)=fip*(tpiba2*ggp(ig)* c(ig,idx+i  )+CMPLX(AIMAG(fp),-DBLE(fm)))
                END DO
+!$omp end parallel do
             END IF
          END IF
 
@@ -217,6 +227,8 @@
                   fip= f(i+idx)
                END IF
                !
+!$omp parallel default(shared), private(iv,jv,ivoff,jvoff,dd,dv,inl,jnl,is,isa,ism)
+               !
                DO is = 1, nsp
                   DO iv = 1, nh(is)
                      IF( program_name == 'FPMD' ) THEN
@@ -231,37 +243,41 @@
                            END DO
                         END IF
                      ELSE
-                     DO jv = 1, nh(is)
-                        isa = 0
-                        DO ism = 1, is-1
-                           isa = isa + na( ism )
-                        END DO
-                        dv = dvan(iv,jv,is)
-                        ivoff = ish(is)+(iv-1)*na(is)
-                        jvoff = ish(is)+(jv-1)*na(is)
-                        IF( i + idx - 1 /= n ) THEN
-                           DO ia=1,na(is)
-                              inl = ivoff + ia
-                              jnl = jvoff + ia
-                              isa = isa + 1
-                              dd = deeq(iv,jv,isa,iss1) + dv
-                              af(inl,igrp) = af(inl,igrp) - fi  * dd * bec(jnl,i+idx-1)
-                              dd = deeq(iv,jv,isa,iss2) + dv
-                              aa(inl,igrp) = aa(inl,igrp) - fip * dd * bec(jnl,i+idx)
-                           END DO
-                        ELSE
-                           DO ia=1,na(is)
-                              inl = ivoff + ia
-                              jnl = jvoff + ia
-                              isa = isa + 1
-                              dd = deeq(iv,jv,isa,iss1) + dv
-                              af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1)
+                        DO jv = 1, nh(is)
+                           isa = 0
+                           DO ism = 1, is-1
+                              isa = isa + na( ism )
                            END DO
-                        END IF
-                     END DO
+                           dv = dvan(iv,jv,is)
+                           ivoff = ish(is)+(iv-1)*na(is)
+                           jvoff = ish(is)+(jv-1)*na(is)
+                           IF( i + idx - 1 /= n ) THEN
+!$omp do
+                              DO ia=1,na(is)
+                                 inl = ivoff + ia
+                                 jnl = jvoff + ia
+                                 isa = isa + 1
+                                 dd = deeq(iv,jv,isa,iss1) + dv
+                                 af(inl,igrp) = af(inl,igrp) - fi  * dd * bec(jnl,i+idx-1)
+                                 dd = deeq(iv,jv,isa,iss2) + dv
+                                 aa(inl,igrp) = aa(inl,igrp) - fip * dd * bec(jnl,i+idx)
+                              END DO
+                           ELSE
+!$omp do
+                              DO ia=1,na(is)
+                                 inl = ivoff + ia
+                                 jnl = jvoff + ia
+                                 isa = isa + 1
+                                 dd = deeq(iv,jv,isa,iss1) + dv
+                                 af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1)
+                              END DO
+                           END IF
+                        END DO
                      END IF
                   END DO
                END DO
+
+!$omp end parallel
       
             END IF
 

Index: nl_base.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/nl_base.f90,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -d -r1.30 -r1.31
--- nl_base.f90	12 Jan 2009 17:25:16 -0000	1.30
+++ nl_base.f90	24 May 2009 17:29:52 -0000	1.31
@@ -68,6 +68,7 @@
          !
          do iv = 1, nh( is )
             !
+!$omp parallel default(shared), private(l,ixr,ixi,signre,signim,ig,arg,ia)
             l = nhtol( iv, is )
             !
             if (l == 0) then
@@ -92,6 +93,7 @@
                signim =  1.0d0
             endif
 !
+!$omp do
             do ia=1,na(is)
                !
                !  q = 0   component (with weight 1.0)
@@ -110,7 +112,9 @@
                end do
                !
             end do
+!$omp end do
             
+!$omp end parallel
             !
             IF( nproc_image > 1 ) THEN
                inl=(iv-1)*na(is)+1
@@ -218,6 +222,7 @@
          !
          do iv = 1, nh( is )
             !
+!$omp parallel default(shared), private(l,ixr,ixi,signre,signim,ig,arg,ia)
             l = nhtol( iv, is )
             !
             if (l == 0) then
@@ -242,6 +247,7 @@
                signim =  1.0d0
             endif
 !
+!$omp do
             do ia=1,na(is)
                !
                !  q = 0   component (with weight 1.0)
@@ -260,6 +266,9 @@
                end do
                !
             end do
+!$omp end do
+            
+!$omp end parallel
             
             !
             IF( nproc_image > 1 ) THEN
@@ -381,6 +390,7 @@
                !
                !     order of states:  s_1  p_x1  p_z1  p_y1  s_2  p_x2  p_z2  p_y2
                !
+!$omp parallel default(shared), private(l,ixr,ixi,signre,signim,ig,arg,ia)
                l=nhtol(iv,is)
                if (l.eq.0) then
                   ixr = 2
@@ -404,6 +414,7 @@
                   signim =  1.0d0
                endif
 !    
+!$omp do
                do ia=1,na(is)
                   !    q = 0   component (with weight 1.0)
                   if (gstart == 2) then
@@ -417,6 +428,8 @@
                      wrk2(2,ig,ia) = signim*arg*eigr(ixi,ig,ia+isa)
                   end do
                end do
+!$omp end do
+!$omp end parallel 
                inl=ish(is)+(iv-1)*na(is)+1
                CALL DGEMM( 'T', 'N', na(is), n, 2*ngw, 1.0d0, wrk2, 2*ngw, c, 2*ngw, 0.0d0, becdr_repl( inl, 1 ), nkb )
             end do
@@ -499,6 +512,7 @@
                !
                !     order of states:  s_1  p_x1  p_z1  p_y1  s_2  p_x2  p_z2  p_y2
                !
+!$omp parallel default(shared), private(l,ixr,ixi,signre,signim,ig,arg,ia)
                l=nhtol(iv,is)
                if (l.eq.0) then
                   ixr = 2
@@ -522,6 +536,7 @@
                   signim =  1.0d0
                endif
 !    
+!$omp do
                do ia=1,na(is)
                   !    q = 0   component (with weight 1.0)
                   if (gstart == 2) then
@@ -535,6 +550,8 @@
                      wrk2(2,ig,ia) = signim*arg*eigr(ixi,ig,ia+isa)
                   end do
                end do
+!$omp end do
+!$omp end parallel 
                inl=ish(is)+(iv-1)*na(is)+1
                CALL DGEMM( 'T', 'N', na(is), n, 2*ngw, 1.0d0, wrk2, 2*ngw, c, 2*ngw, 0.0d0, becdr( inl, 1, k ), nkb )
             end do
@@ -582,11 +599,13 @@
       !
       ! local
       !
-      real(DP) :: sum, sums(2)
+      real(DP) :: sumt, sums(2)
       integer   :: is, iv, jv, ijv, inl, jnl, isa, ism, ia, iss, i
       !
       ennl = 0.d0
       !
+!$omp parallel default(shared), &
+!$omp private(is,iv,ijv,isa,ism,ia,inl,jnl,sums,i,iss,sumt), reduction(+:ennl)
       do is = 1, nsp
          do iv = 1, nh(is)
             do jv = iv, nh(is)
@@ -595,6 +614,7 @@
                do ism = 1, is - 1
                   isa = isa + na(ism)
                end do
+!$omp do
                do ia = 1, na(is)
                   inl = ish(is)+(iv-1)*na(is)+ia
                   jnl = ish(is)+(jv-1)*na(is)+ia
@@ -604,17 +624,19 @@
                      iss = ispin(i)
                      sums(iss) = sums(iss) + f(i) * bec(inl,i) * bec(jnl,i)
                   end do
-                  sum = 0.d0
+                  sumt = 0.d0
                   do iss = 1, nspin
                      rhovan( ijv, isa, iss ) = sums( iss )
-                     sum = sum + sums( iss )
+                     sumt = sumt + sums( iss )
                   end do
-                  if( iv .ne. jv ) sum = 2.d0 * sum
-                  ennl = ennl + sum * dvan( jv, iv, is)
+                  if( iv .ne. jv ) sumt = 2.d0 * sumt
+                  ennl = ennl + sumt * dvan( jv, iv, is)
                end do
+!$omp end do
             end do
          end do
       end do
+!$omp end parallel
       !
       return
    end function ennl
@@ -998,6 +1020,9 @@
   !
   real(DP), allocatable :: tmpbec(:,:), tmpdr(:,:) 
   real(DP), allocatable :: fion_loc(:,:)
+#ifdef __OPENMP 
+  INTEGER :: mytid, ntids, omp_get_thread_num, omp_get_num_threads
+#endif  
   !
   call start_clock( 'nlfq' )
   !
@@ -1010,16 +1035,30 @@
   !
   fion_loc = 0.0d0
 
-  allocate ( tmpbec( nhm, nlax ), tmpdr( nhm, nlax ) )
   !
-  DO k=1,3
+  DO k = 1, 3
 
-     isa=0
+!$omp parallel default(shared), &
+!$omp private(tmpbec,tmpdr,isa,is,ia,iss,nss,istart,ir,nr,ioff,iv,jv,inl,temp,i,mytid,ntids)
+#ifdef __OPENMP
+     mytid = omp_get_thread_num()  ! take the thread ID
+     ntids = omp_get_num_threads() ! take the number of threads
+#endif
+
+     allocate ( tmpbec( nhm, nlax ), tmpdr( nhm, nlax ) )
+
+     isa = 0
+     !
      DO is=1,nsp
         DO ia=1,na(is)
 
            isa=isa+1
 
+#ifdef __OPENMP
+           ! distribute atoms round robin to threads
+           !
+           IF( MOD( isa, ntids ) /= mytid ) CYCLE
+#endif  
            DO iss = 1, nspin
 
               nss = nupdwn( iss )
@@ -1069,6 +1108,8 @@
            END DO
         END DO
      END DO
+     deallocate ( tmpbec, tmpdr )
+!$omp end parallel
   END DO
   !
   CALL mp_sum( fion_loc, intra_image_comm )
@@ -1079,8 +1120,6 @@
   !
   deallocate ( fion_loc )
   !
-  deallocate ( tmpbec, tmpdr )
-
   call stop_clock( 'nlfq' )
   !
   return

Index: ortho_base.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/ortho_base.f90,v
retrieving revision 1.53
retrieving revision 1.54
diff -u -d -r1.53 -r1.54
--- ortho_base.f90	5 Apr 2009 08:37:40 -0000	1.53
+++ ortho_base.f90	24 May 2009 17:29:52 -0000	1.54
@@ -474,6 +474,7 @@
          CALL sqr_tr_cannon( nss, tmp1, ldx, tr1, ldx, desc )
          CALL sqr_tr_cannon( nss, tmp2, ldx, tr2, ldx, desc )
          !
+!$omp parallel do default(shared), private(j)
          DO i=1,nr
             DO j=1,nc
                x1(i,j) = sig(i,j)-tmp1(i,j)-tr1(i,j)-dd(i,j)
@@ -504,6 +505,7 @@
          !
          !       g=ut*x1*u/d  (g is stored in tmp1)
          !
+!$omp parallel do default(shared), private(j)
          DO i=1,nr
             DO j=1,nc
                tmp1(i,j)=tmp2(i,j)/(diag(i+ir-1)+diag(j+ic-1))

Index: phasefactor.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/phasefactor.f90,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- phasefactor.f90	26 Apr 2007 09:24:37 -0000	1.14
+++ phasefactor.f90	24 May 2009 17:29:52 -0000	1.15
@@ -167,6 +167,7 @@
 
       call start_clock( 'strucf' )
 
+!$omp parallel do default(shared), private(ig1,ig2,ig3,isa,is,ia)
       DO ig = 1, ngm
         ig1 = mill( 1, ig ) 
         ig2 = mill( 2, ig ) 

Index: potentials.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/potentials.f90,v
retrieving revision 1.57
retrieving revision 1.58
diff -u -d -r1.57 -r1.58
--- potentials.f90	8 May 2009 15:13:03 -0000	1.57
+++ potentials.f90	24 May 2009 17:29:52 -0000	1.58
@@ -869,6 +869,7 @@
       ehte  = 0.0d0
       ehti  = 0.0d0
 
+!$omp parallel do default(shared), private(rp,is,rhet,rhog,fpibg), reduction(+:eh,ehte,ehti)
       DO ig = gstart, ngm 
 
         rp   = (0.D0,0.D0)

Index: runcp.f90
===================================================================
RCS file: /cvsroot/q-e/espresso/CPV/runcp.f90,v
retrieving revision 1.52
retrieving revision 1.53
diff -u -d -r1.52 -r1.53
--- runcp.f90	2 Jan 2008 17:59:46 -0000	1.52
+++ runcp.f90	24 May 2009 17:29:52 -0000	1.53
@@ -206,7 +206,9 @@
 
         end do
 
-        DEALLOCATE( c2, c3, tg_rhos )
+        DEALLOCATE( c2 )
+        DEALLOCATE( c3 )
+        DEALLOCATE( tg_rhos )
 
      END IF
 



More information about the Q-e-commits mailing list