我正在尝试并行化矩阵乘法的最内层循环。但是,只要有1个以上的线程,矩阵乘法就不会在输出数组中存储正确的值,因此我试图找出原因。

void matrix() {
int i,j,k,sum;
for (i = 0; i < N; i++) {
    for (j = 0; j < N; j++){
        sum = 0;
        #pragma omp parallel for shared(sum,i,j) private(k)
            for (k = 0; k < N; k++) {
                #pragma omp critical
                    sum = sum + A[i][k] * B[k][j];
            }
        C[i][j] = sum;
    }
}
}


我也尝试使用:

void matrix() {
int i,j,k,sum;
for (i = 0; i < N; i++) {
    for (j = 0; j < N; j++){
        sum = 0;
        #pragma omp parallel for shared(sum,i,j) private(k)
            for (k = 0; k < N; k++) {
                #pragma omp atomic
                    sum += A[i][k] * B[k][j];
            }
        C[i][j] = sum;
    }
}
}


但这也不起作用。我还尝试了第二个#pragma,但没有:

void matrixC() {
int i,j,k,sum,np;
for (i = 0; i < N; i++) {
    for (j = 0; j < N; j++){
        sum = 0;
        #pragma omp parallel for reduction(+:sum)
            for (k = 0; k < N; k++) {
                    sum = sum + A[i][k] * B[k][j];
            }
        C[i][j] = sum;
    }
}
}


我是OpenMP的新手,但从我在网上阅读的所有内容中,这些解决方案至少应能起作用。我知道加总和时可能是种族状况的问题,但我不知道为什么它仍然得到错误的总和。

编辑:这是代码的一个更完整的版本:

double A[N][N];
double B[N][N];
double C[N][N];
int CHOOSE = CH;

void matrixSequential() {
int i,j,k,sum;
for (i = 0; i < N; i++) {
    for (j = 0; j < N; j++) {
        sum = 0;
        for (k = 0; k < N; k++) {
            sum += A[i][k] * B[k][j];
        }
        C[i][j] = sum;
    }
}
}

void matrixParallel() {
int i,j,k,sum;
for (i = 0; i < N; i++) {
    for (j = 0; j < N; j++){
        sum = 0;
        #pragma omp parallel for shared (i,j) private(k) reduction(+:sum)
            for (k = 0; k < N; k++) {
                sum = sum + A[i][k] * B[k][j];
            }
        C[i][j] = sum;
    }
}
}

int main(int argc, const char * argv[]) {
//populating arrays
int i,j;
for(i=0; i < N; i++){
    for(j=0; j < N; j++){
        A[i][j] = i+j;
        B[i][j] = i+j;
    }
}

for(i=0; i < N; i++){
    for(j=0; j < N; j++){
        C[i][j] = 0;
    }
}

if (CHOOSE == 0) {
    matrixSequential();
}
else if(CHOOSE == 1) {
    matrixParallel();
}

//checking for correctness
double sum;
for(i=0; i < N; i++){
    sum += C[i][i];
}
printf("Sum of diagonal elements of array C: %f \n", sum);
return 0;
}

最佳答案

使sum为减少变量是正确的方法,并且应该起作用(请参见https://computing.llnl.gov/tutorials/openMP/#REDUCTION)。请注意,您仍然必须声明共享变量和私有变量,例如k

10-05 17:47