无法填充在gpu上分配的数组

无法填充在gpu上分配的数组

本文介绍了无法填充在gpu上分配的数组的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

请帮帮我.我有以下代码

Please, help me. I have the following code

...
#include <accelmath.h>
#include <openacc.h>
const long int G=100000;
const unsigned int GL=100000;
const long int K=G;
const int LE=1.0f;
struct Particle
{
  float x;
  float rs;
};
Particle particles[GL];
int sort[GL];
int ind01[GL];
long int MAX_ELEMENT=1;
int POSITION1;
int POSITION0;
int LIFE=0;
bool start=true;
int mini;
int count0;
int count1;
int GL1;
int js;

#pragma acc declare device_resident(ind01,POSITION0,POSITION1,mini,GL1,js,MAX_ELEMENT,count0,count1,K)
#pragma acc declare create(LIFE,particles,sort)

 void function_device()
 {
   #pragma acc update host(LIFE,particles) async
    std::cout<<"LIFE before="<<LIFE<<std::endl;
   #pragma acc update device(LIFE,particles) async
   #pragma acc parallel num_gangs(1) vector_length(1) present(particles) async
   {
     count0=0;
     count1=0;
     if(LIFE<K)
     {
       particles[LIFE].x=5.0;
       particles[LIFE].rs=MAX_ELEMENT;
       ++MAX_ELEMENT;
       ++LIFE;
      }
    }
     #pragma acc loop vector reduction(+:count0,count1)
     for(int i=0; i<LIFE; ++i)
     {
       if(particles[i].x>=LE)
       {
         sort[i]=1;
         count1=count1+1;
       }
       else
       {
         sort[i]=0;
         count0=count0+1;
       }
     }
   #pragma acc parallel num_gangs(1) vector_length(1)
     {
       GL1=LIFE-1;
       count0=GL1;
       count1=0;
     }
     #pragma acc loop seq
     for(int i=0; i<LIFE; ++i)
     {
       if(sort[i]==1)
       {
         ind01[count1++]=i;
       }
       else
       {
         ind01[count0--]=i;
       }
     }
    #pragma acc parallel num_gangs(1) vector_length(1)
    {
      mini=GL1-count0;
      if(count1<mini) mini=count1;
      js=0;
    }
    ...
    #pragma acc update host(LIFE) async
  }
  int main(int argc, char **argv)
  {
    acc_init(acc_device_nvidia);
    int step=1;
while(start==true || LIFE>0)
{
      std::cout<<" LIFE="<<LIFE<<std::endl;
      start=false;
      function_device();
      std::cout<<"MAIN LOOP # "<<step<<std::endl;
      ++step;
    }
  }

在我填写了在gpu上分配的数组之后:

After i have filled in the array allocated on the gpu:

     particles[LIFE].x=5.0;

但打印输出:

     #pragma acc update host(LIFE,particles) async
     std::cout<<"LIFE after injector="<<LIFE<<std::endl;
     for(int i=0; i<LIFE; ++i) std::cout<<" particles: "<<particles[i]<<std::endl;

和:

    #pragma acc update host(LIFE,sort) async
     std::cout<<"LIFE after 1="<<LIFE<<" c0="<<count0<<" c1="<<count1<<std::endl;
     for(int i=0; i<LIFE; ++i) std::cout<<"sort: "<<sort[i]<<std::endl;

是LIFE = 1时,数组仅包含0,就好像我没有用5.0f填充数组粒子.如何填写粒子"数组?我可以在gpu上正确启动串行代码吗?

is LIFE=1 the arrays contain only 0, as if i did not fill the array particles with 5.0f. How to fill in the array "particles"? Do i correctly launch a serial code on the gpu:

    #pragma acc parallel num_gangs(1) vector_length(1)
    {
      mini=GL1-count0;
      if(count1<mini) mini=count1;
      js=0;
    }

如果在使用openacc指令时出现错误,请显示出来.为什么

If there are mistakes in the usage of openacc directives, please, show them. Why the

     #pragma acc update host(LIFE) async

如果没有异步指令,指令将无法正常工作?

directive does not work without async?

推荐答案

我更新了您的代码以使其正常工作.您有几个孤立的循环需要在它们上进行并行"处理,再加上一些变量是全局变量,因此您需要使用atomic.在原子中使用在声明创建"中定义的变量时,确实遇到了编译器问题,因此需要将一些变量移至数据区域.我将报告给我们的工程师.

I updated your code to get it to work. You had a few orphaned loops which needed "parallel" on them plus since several of the variables are globals, you need to use atomic. I did encounter a compiler issue when using variables defined in the "declare create" in an atomic so needed to move a few of the variables to a data region. I'll report this to our engineers.

请注意,当LIFE达到G时,我也处于终止状态,否则它似乎进入了无限循环.

Note that I also put in a terminating condition when LIFE reached G, otherwise it seemed to enter an infinite loop.

#include <iostream>
#include <accelmath.h>
#include <openacc.h>
const long int G=100000;
const unsigned int GL=100000;
const long int K=G;
const int LE=1.0f;
struct Particle
{
  float x;
  float rs;
};
Particle particles[GL];
int sort[GL];
int ind01[GL];
int MAX_ELEMENT;
int POSITION1;
int POSITION0;
int LIFE;
bool start=true;
int mini;
int GL1;
int js;
int count0;
int count1;

#pragma acc declare device_resident(ind01,POSITION0,POSITION1,mini,GL1,js)
#pragma acc declare create(particles,sort)

 void function_device()
 {
   #pragma acc parallel num_gangs(1) vector_length(1) present(particles)
   {
#pragma acc atomic write
      count0=0;
#pragma acc atomic write
      count1=0;
     if(LIFE<K)
     {
       particles[LIFE].x=5.0;
       particles[LIFE].rs=MAX_ELEMENT;
#pragma acc atomic update
       ++MAX_ELEMENT;
#pragma acc atomic update
       ++LIFE;
      }
    }
     #pragma acc parallel loop
     for(int i=0; i<LIFE; ++i)
     {
       if(particles[i].x>=LE)
       {
         sort[i]=1;
  #pragma acc atomic update
         count1=count1+1;
       }
       else
       {
         sort[i]=0;
  #pragma acc atomic update
         count0=count0+1;
       }
     }
   #pragma acc parallel num_gangs(1) vector_length(1)
     {
       GL1=LIFE-1;
       count0=GL1;
       count1=0;
     }
     #pragma acc parallel loop
     for(int i=0; i<LIFE; ++i)
     {
       int cnt;
       if(sort[i]==1)
       {
        #pragma acc atomic capture
        {
          cnt = count1++;
        }
        ind01[cnt]=i;
       }
       else
       {
        #pragma acc atomic capture
        {
          cnt = count0--;
        }
        ind01[cnt]=i;
       }
     }
    #pragma acc parallel num_gangs(1) vector_length(1)
    {
      mini=GL1-count0;
      if(count1<mini) mini=count1;
      js=0;
    }
  }
  int main(int argc, char **argv)
  {
    acc_init(acc_device_nvidia);
    int step=1;
    LIFE=0;
    MAX_ELEMENT=1;
   #pragma acc data copyin(LIFE,MAX_ELEMENT,count0,count1)
{
while(start==true || (LIFE>0 && LIFE < G ))
{
      std::cout<<" LIFE="<<LIFE<<std::endl;
      start=false;
      function_device();
   #pragma acc update self(LIFE)
      std::cout<<"MAIN LOOP # "<<step<<std::endl;
      ++step;
    }
}
  }

这篇关于无法填充在gpu上分配的数组的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!

07-30 02:13