c - MPI_Allreduce无法正常工作-红黑SOR

我正在实现红黑SOR的并行版本。
MPIYALALREST部分，我想得到每个进程的最大误差，是不起作用的。它从不改变，它给出的值高于2.0，即使只有一个进程。发生什么事？?
下面是代码，有趣的部分在sor例程的末尾。
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <mpi.h>

#define M 700

int numtasks, rank, inext, iprev, ista, iend;

double unew[M + 2][M + 2] = {{ 0 }};
double solution[M + 2][M + 2] = {{ 0 }};

double uold[M + 2][M + 2] = {{ 0 }};

double bufs1[M + 2];
double bufs2[M + 2];

double bufr1[M + 2];
double bufr2[M + 2];

MPI_Request ireqs1;
MPI_Request ireqs2;

MPI_Request ireqr1;
MPI_Request ireqr2;

MPI_Status istatus;

double compute_error(double solution[][M + 2], double u[][M + 2], const int m);
int sor(double unew[][M + 2], double uold[][M + 2], double solution[][M + 2], const double omega, const double tol, const int m);
void para_range(const int n1, const int n2, const int nprocs, const int irank, int * restrict ista, int * restrict iend);
void shift(const int iflg);
inline int min(const int a, const int b);

int main(int argc, char * argv[])
{
    const int m = M;

    int ierr = MPI_Init(&argc, &argv);

    if(ierr != MPI_SUCCESS)
    {
        perror("MPI init failed. Terminating T800.\n");
        exit(1);
    }

    int i, j;

    const double begin = MPI_Wtime();

    const double pi = 4.0 * atan(1.0);

    const double h = pi / (m + 1);

    MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    for(i = 0; i < m + 2; ++i)
    {
       uold[i][M + 1] = sin(i * h);
    }

    for(i = 0; i < m + 2; ++i)
    {
    for(j = 0; j < m + 1; ++j)
        {
            uold[i][j] = j * h * uold[i][M + 1];
        }
    }

    for(i = 0; i < m + 2; ++i)
    {
        for(j = 0; j < m + 2; ++j)
        {
            solution[i][j] = sinh(j * h) * sin(i * h) / sinh(pi);
        }
    }

    para_range(0, m, numtasks, rank, &ista, &iend);

    //printf("rank %d: from %d to %d\n", rank, ista, iend);

    inext = rank + 1;
    iprev = rank - 1;

    if(inext == numtasks)
    {
        inext = MPI_PROC_NULL;
    }

    if(iprev == -1)
    {
        iprev = MPI_PROC_NULL;
    }

    const double omega = 2.0 / ( 1.0 + sin(pi / (m + 1)) );
    const double tol = 0.001;

    const int iters = sor(unew, uold, solution, omega, tol, m);

    const double end = MPI_Wtime();

    MPI_Finalize();

    if(rank == 0)
    {
        printf(" \n");
        printf(" Omega = %f\n", omega);
        printf(" It took %d iterations.\n", iters);

        printf("Total time = %f\n\n\n", end - begin);
    }

    return 0;
}

double compute_error(double solution[][M + 2], double u[][M + 2], const int m)
{
    double error = 0.0;
    int i, j;

    for(i = 1; i < m + 1; ++i)
    {
        for(j = 1; j < m + 1; ++j)
        {
            const double abs_diff = fabs(solution[i][j] - u[i][j]);

            if(error < abs_diff)
            {
                error = abs_diff;
            }
        }
    }

    return error;
}

int sor(double unew[][M + 2], double uold[][M + 2], double solution[][M + 2], const double omega, const double tol, const int m)
{
    int i, j;

    int iters = 0;
    double error = compute_error(solution, uold, m);
    double error2 = error;
    double temp;

    while(error2 > tol)
    {
        shift(0);

        for(i = 1; i < m + 1; ++i)
        {
            for(j = (i % 2) + 1; j < m + 1; j += 2)
            {
                temp = 0.25 * (uold[i][j - 1] + uold[i - 1][j]
                   + uold[i + 1][j] + uold[i][j + 1]) - uold[i][j];
                uold[i][j] += omega * temp;
            }
        }

        shift(1);

        for(i = 1; i < m + 1; ++i)
        {
            for(j = ((i + 1) % 2) + 1; j < m + 1; j += 2)
            {
                temp = 0.25 * (uold[i][j - 1] + uold[i - 1][j]
                   + uold[i + 1][j] + uold[i][j + 1]) - uold[i][j];
                uold[i][j] += omega * temp;
            }
        }

        ++iters;

        if(iters % 20 == 0)
        {
            // THIS IS NOT COOL
            error = compute_error(solution, uold, m);
            MPI_Allreduce(&error, &error2, 1, MPI_REAL, MPI_MAX, MPI_COMM_WORLD);

            //printf("%d=%f\n", rank, error2);
        }
    }

    return iters;
}

void para_range(const int n1, const int n2, const int nprocs, const int irank, int * restrict ista, int * restrict iend)
{
    const int iwork = ((n2 - n1) / nprocs) + 1;

    *ista = min(irank * iwork + n1, n2 + 1);
    *iend = min(*ista + iwork - 1, n2);
}

void shift(const int iflg)
{
    const int is1 = ((ista + iflg) % 2) + 1;
    const int is2 = ((iend + iflg) % 2) + 1;

    const int ir1 = 3 - is1;
    const int ir2 = 3 - is2;

    int i, icnt1=0, icnt2=0;

    if(rank != 0)
    {
        icnt1 = 0;

        for(i = is1; i < M; i += 2)
        {
            ++icnt1;
            bufs1[icnt1] = unew[i][ista];
        }
    }

    if (rank != numtasks - 1)
    {
        icnt2 = 0;

        for(i = is2; i < M; i += 2)
        {
            ++icnt2;
            bufs2[icnt2] = unew[i][iend];
        }
    }

    MPI_Isend(bufs1, icnt1, MPI_REAL, iprev, 1, MPI_COMM_WORLD, &ireqs1);
    MPI_Isend(bufs2, icnt2, MPI_REAL, inext, 1, MPI_COMM_WORLD, &ireqs2);

    MPI_Irecv(bufr1, M + 2, MPI_REAL, iprev, 1, MPI_COMM_WORLD, &ireqr1);
    MPI_Irecv(bufr2, M + 2, MPI_REAL, inext, 1, MPI_COMM_WORLD, &ireqr2);

    MPI_Wait(&ireqs1, &istatus);
    MPI_Wait(&ireqs2, &istatus);

    MPI_Wait(&ireqr1, &istatus);
    MPI_Wait(&ireqr2, &istatus);

    int icnt;

    if(rank != 0)
    {
        icnt = 0;

        for(i = ir1; i < M; i += 2)
        {
            ++icnt;
            unew[i][ista - 1] = bufr1[icnt];
        }
    }

    if(rank != numtasks - 1)
        {
        icnt = 0;

        for(i = ir2; i < M; i += 2)
        {
            ++icnt;
            unew[i][iend + 1] = bufr2[icnt];
        }
    }
}

inline int min(const int a, const int b)
{
    if(a > b)
    {
        return b;
    }

    return a;
}
最佳答案
MPI_REAL是Fortran的REAL类型，通常与C中的float相对应。您要减少的数据是double类型，因此应该改用MPI_DOUBLE。这同样适用于程序其他部分中的MPI调用。