我正在使用gcc 4.8.1并使用“ -O2 -Wall -fopenmp”进行编译。

我读过类似的主题(OpenMP with 1 thread slower than sequential versionOpenMP overhead)-意见从无开销到大量开销都有所不同。我很好奇在我的特殊情况下(for循环和在parallell for内部)使用OpenMP的更好方法。

for (size_t k = 0 k < maxk; ++k) { // k is ~5000
    // init reduction variables
    const bool is_time_for_reduction = ;// init from k
    double mmin = INFINITY, mmax = -INFINITY;
    double sum = 0.0;

    #pragma omp parallel shared(m1, m2)
       // w, h are both  between 1000 and 2000
        #pragma omp for
        for (size_t i = 0; i < h; ++i) { // w,h - consts
            for (size_t j = 0; j < w; ++j) {
                // computations with matrices m1 and m2, using only m1,m2 and constants w,h

        if (is_time_for_reduction) {
            #pragma omp for reduction (max/min/sum: mmax,mmin,sum)
            for (size_t i = 0; i < h; ++i) {
                for (size_t j = 0; j < w; ++j) {
                    // reductions

    if (is_time_for_reduction) {
        // use "reduced" variables



for (size_t k = 0 k < maxk; ++k) {
    // init reduction variables
    const bool is_time_for_reduction = ;// init from k
    double mmin = INFINITY, mmax = -INFINITY;
    double sum = 0.0;

    #pragma omp parallel for
    for (size_t i = 0; i < h; ++i) { // w,h - consts
        for (size_t j = 0; j < w; ++j) {
            // computations with matrices m1 and m2, using only m1,m2 and constants w,h

    if (is_time_for_reduction) {
       #pragma omp parallel for reduction (max/min/sum: mmax,mmin,sum)
       for (size_t i = 0; i < h; ++i) {
           for (size_t j = 0; j < w; ++j) {
               // reductions
       // use "reduced" variables

关于c - 单线程OpenMP与顺序的开销,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/26892343/

10-09 06:00