'Gauss Siedel iterative solver using OpenMP does not converge

I want to write a Gauss Siedel iterative solver using OpenMP. My solver does not converge to the correct result, and I could not figure it out why.

The governing equation is a steady-state heat equation: del^2(T)=S, where S is the heat source function. S=-35*pi/2*cos(pi*x/2)*cos(3*pi*y/2)*cos(5*pi*z/2)

I implement the OpenMP inside the dowhile loop since it does not let me start parallel form a do while loop. Is there any way to change the do while loop to a do loop?

The result converges without parallel computing, but after adding the openmp, then it does not converge anymore.

Here is my code:

    PROGRAM GAUSS_MP
        USE omp_lib
        IMPLICIT NONE

        REAL*8, DIMENSION(:,:,:), ALLOCATABLE :: S, T
        REAL*8 :: X, Y, Z
        REAL*8, PARAMETER :: PI=2*ASIN(1.0)
        REAL*8 :: DX                     ! STEP SIZE DX=DY=DZ
        REAL*8, PARAMETER :: TOL=1.0E-5  ! TOLERANCE 
        REAL*8 :: DUMAX
        REAL*8 :: T_OLD
        REAL*8 :: T1,T2

        INTEGER, PARAMETER :: N=100        ! GRID NUMBER, START FROM 1
        INTEGER, PARAMETER :: ITERMAX=1E5   ! MAXIMUM ITERATION
        INTEGER :: I, J, ITER, K
        INTEGER :: POINT_NUM
        INTEGER, PARAMETER :: NUM_THREADS=32
    !    INTEGER :: A

        ! INITIALIZE OPENMP THREADS
        CALL OMP_SET_NUM_THREADS(NUM_THREADS)


        ! COMPUTE STEP SIZE
        DX=2.0/REAL(N-1, KIND=8)  
    !    PRINT *, "DX=", DX
        
        ! INITIALIZE THE T ARRAYS AND S(I)
        ALLOCATE(S(N,N,N), T(N,N,N))
``````````````````````````````````````````````````````````````````````````````            
        ! INITIAL GUESS
        T=1.0
``````````````````````````````````````````````````````````````````````````````            
        ! BOUNDARY CONDITION
        T(1,:,:)=0.0; T(N,:,:)=0.0; T(:,:,1)=0.0; T(:,:,N)=0.0;
        T(:,1,:)=0.0; T(:,N,:)=0.0;
        
        X=0.0D0 ! COORDINATES
        Y=0.0D0

        S=0.0D0 ! SOURCE
``````````````````````````````````````````````````````````````````````````````            
        ! SOURCE MATRIX
        !$OMP PARALLEL DO PRIVATE(I,J,K)
         DO K=2,N-1
            Z=-1.0+(K-1)*DX
            DO I=2,N-1
                Y=-1.0+(I-1)*DX
                DO J=2,N-1
                    X=-1.0+(J-1)*DX
                    S(I,J,K)=-35.0*PI/2.0*COS(PI*X/2.0)*COS(3.0*PI*Y/2.0)&
                            *COS(5.0*PI*Z/2.0)
                END DO
            END DO
        END DO
        !$OMP END PARALLEL DO
``````````````````````````````````````````````````````````````````````````````            
        ! GAUSS-SEIDEL ITERATION
        PRINT *, 'PARALLEL START'
        T1=OMP_GET_WTIME()
      
        ITER=0
        DUMAX=1.0D0 ! UPDATE DIFFERENCE
        DO WHILE(ITER <= ITERMAX .AND. DUMAX >= TOL)
            ITER=ITER+1
            DUMAX=0.0D0
            !$OMP PARALLEL PRIVATE(T_OLD, K, I, J, DUMAX)
            !$OMP DO REDUCTION(MAX:DUMAX)
            DO K=2,N-1
                DO I=2, N-1
                    DO J=2, N-1
                        T_OLD=T(I,J,K)
                        T(I,J,K)=1.0/6.0*(T(I+1,J,K)+T(I-1,J,K)+T(I,J-1,K)+T(I,J+1,K) &
                                    +T(I,J,K+1)+T(I,J,K-1) &
                                    -DX**2*S(I,J,K))
                        DUMAX=MAX(DUMAX, ABS(T_OLD-T(I,J,K)))
                    END DO
                END DO
            END DO
            !$OMP END DO
           !$OMP END PARALLEL
        END DO
        
        T2=OMP_GET_WTIME()

    END PROGRAM GAUSS_MP


Solution 1:[1]

Just to clarify how the approach using ordered and depend mentioned by @Michael Klemm works, consider the following example of a parallel Gauss-Seidel solver which solves a Poisson problem in 3D.

SUBROUTINE gauss_seidel_par(u,f,N,itermax)
   REAL(dp), DIMENSION(:,:,:), INTENT(INOUT) :: u
   REAL(dp), DIMENSION(:,:,:), INTENT(IN) :: f
   INTEGER, INTENT(IN) :: N, itermax
   INTEGER :: i, j, k, l
   REAL(dp) :: factor1, factor2
   factor1 = 1.0_dp/6.0_dp              ! Coefficient of u(i,j,k)
   factor2 = (2.0_dp/REAL(N+1.0_dp))**2 ! Delta^2
   !$omp parallel
   DO l = 1,itermax                     ! run until maximum number of iterations is reached
   !$omp do schedule(static,1) ordered(2)
   DO k = 2,N-1
     DO j = 2,N-1
       !$omp ordered depend(sink: k-1,j) depend(sink: k,j-1)
       DO i = 2,N-1
         u(i,j,k) = factor1*(u(i+1,j,k)+u(i,j+1,k)+u(i,j,k+1)+u(i-1,j,k)+u(i,j-1,k)+u(i,j,k-1) + factor2*f(i,j,k))
       ENDDO
       !$omp ordered depend(source)
     ENDDO
   ENDDO
   ENDDO
   !$omp end parallel
   END SUBROUTINE gauss_seidel_par

The ordered clause states that the loops should be passed in order, i.e. 1,2,3.... instead of a random order. The depend clause then specifies which updated values the scheme depends on, in this case, the points on the stencil that have already been passed.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1