问题描述
我可以在CUDA
下改进以下功能吗?
Can I improve the following function under CUDA
?
该功能的作用是
给出一个min
和max
,ELM1
和ELM
,检查在数组min到max
的任何行中是否找到三个数组ans[6]
. >,D2
,D3
,D4
,D5
,D6
,如果找到则返回1.
Given a min
and max
, ELM1
and ELM
, check if any three numbers of array ans[6]
are found in any row, from min
to max
, in array D1
, D2
, D3
, D4
, D5
, D6
, if found return 1.
我尝试使用loops
,OR
-ing,AND
-ing,将goto
替换为标志等.但这似乎是最快的方法.
I tried using loops
, OR
-ing, AND
-ing, replacing goto
with flag etc. etc. But this seems to be the fastest way.
__device__ bool THREEA(unsigned int n0, unsigned int n,unsigned int* ST1,unsigned int* D1, unsigned int* D2,unsigned int* D3,unsigned int* D4,unsigned int* D5,unsigned int* D6,unsigned int* ans)
{
unsigned int ELM, ELM1,flag;
ELM = ST1[n0]+n; //local.37
ELM1 = n; //local.33
while (ELM1 < ELM)
{
flag = 0;
if (D1[ELM1] == ans[0])
{
flag++;
}
if (D2[ELM1] == ans[0])
{
flag++;
}
if (D3[ELM1] == ans[0])
{
flag++;
}
if (D4[ELM1] == ans[0])
{
flag++;
}
if (D5[ELM1] == ans[0])
{
flag++;
}
if (D6[ELM1] == ans[0])
{
flag++;
}
if (flag != 1)
goto onethreefour;
if (D1[ELM1] == ans[1])
{
flag++;
}
if (D2[ELM1] == ans[1])
{
flag++;
}
if (D3[ELM1] == ans[1])
{
flag++;
}
if (D4[ELM1] == ans[1])
{
flag++;
}
if (D5[ELM1] == ans[1])
{
flag++;
}
if (D6[ELM1] == ans[1])
{
flag++;
}
if (flag != 2)
goto onethreefour;
if (D1[ELM1] == ans[2])
{
return 1;
}
if (D2[ELM1] == ans[2])
{
return 1;
}
if (D3[ELM1] == ans[2])
{
return 1;
}
if (D4[ELM1] == ans[2])
{
return 1;
}
if (D5[ELM1] == ans[2])
{
return 1;
}
if (D6[ELM1] == ans[2])
{
return 1;
}
if (D1[ELM1] == ans[3])
{
return 1;
}
if (D2[ELM1] == ans[3])
{
return 1;
}
if (D3[ELM1] == ans[3])
{
return 1;
}
if (D4[ELM1] == ans[3])
{
return 1;
}
if (D5[ELM1] == ans[3])
{
return 1;
}
if (D6[ELM1] == ans[3])
{
return 1;
}
if (D1[ELM1] == ans[4])
{
return 1;
}
if (D2[ELM1] == ans[4])
{
return 1;
}
if (D3[ELM1] == ans[4])
{
return 1;
}
if (D4[ELM1] == ans[4])
{
return 1;
}
if (D5[ELM1] == ans[4])
{
return 1;
}
if (D6[ELM1] == ans[4])
{
return 1;
}
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
onethreefour:
flag = 0;
if (D1[ELM1] == ans[0])
{
flag++;
}
if (D2[ELM1] == ans[0])
{
flag++;
}
if (D3[ELM1] == ans[0])
{
flag++;
}
if (D4[ELM1] == ans[0])
{
flag++;
}
if (D5[ELM1] == ans[0])
{
flag++;
}
if (D6[ELM1] == ans[0])
{
flag++;
}
if (flag != 1)
goto onefourfive;
if (D1[ELM1] == ans[2])
{
flag++;
}
if (D2[ELM1] == ans[2])
{
flag++;
}
if (D3[ELM1] == ans[2])
{
flag++;
}
if (D4[ELM1] == ans[2])
{
flag++;
}
if (D5[ELM1] == ans[2])
{
flag++;
}
if (D6[ELM1] == ans[2])
{
flag++;
}
if (flag != 2)
goto onefourfive;
if (D1[ELM1] == ans[3])
{
return 1;
}
if (D2[ELM1] == ans[3])
{
return 1;
}
if (D3[ELM1] == ans[3])
{
return 1;
}
if (D4[ELM1] == ans[3])
{
return 1;
}
if (D5[ELM1] == ans[3])
{
return 1;
}
if (D6[ELM1] == ans[3])
{
return 1;
}
if (D1[ELM1] == ans[4])
{
return 1;
}
if (D2[ELM1] == ans[4])
{
return 1;
}
if (D3[ELM1] == ans[4])
{
return 1;
}
if (D4[ELM1] == ans[4])
{
return 1;
}
if (D5[ELM1] == ans[4])
{
return 1;
}
if (D6[ELM1] == ans[4])
{
return 1;
}
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
onefourfive:
flag = 0;
if (D1[ELM1] == ans[0])
{
flag++;
}
if (D2[ELM1] == ans[0])
{
flag++;
}
if (D3[ELM1] == ans[0])
{
flag++;
}
if (D4[ELM1] == ans[0])
{
flag++;
}
if (D5[ELM1] == ans[0])
{
flag++;
}
if (D6[ELM1] == ans[0])
{
flag++;
}
if (flag != 1)
goto onefivesix;
if (D1[ELM1] == ans[3])
{
flag++;
}
if (D2[ELM1] == ans[3])
{
flag++;
}
if (D3[ELM1] == ans[3])
{
flag++;
}
if (D4[ELM1] == ans[3])
{
flag++;
}
if (D5[ELM1] == ans[3])
{
flag++;
}
if (D6[ELM1] == ans[3])
{
flag++;
}
if (flag != 2)
goto onefivesix;
if (D1[ELM1] == ans[4])
{
return 1;
}
if (D2[ELM1] == ans[4])
{
return 1;
}
if (D3[ELM1] == ans[4])
{
return 1;
}
if (D4[ELM1] == ans[4])
{
return 1;
}
if (D5[ELM1] == ans[4])
{
return 1;
}
if (D6[ELM1] == ans[4])
{
return 1;
}
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
onefivesix:
flag = 0;
if (D1[ELM1] == ans[0])
{
flag++;
}
if (D2[ELM1] == ans[0])
{
flag++;
}
if (D3[ELM1] == ans[0])
{
flag++;
}
if (D4[ELM1] == ans[0])
{
flag++;
}
if (D5[ELM1] == ans[0])
{
flag++;
}
if (D6[ELM1] == ans[0])
{
flag++;
}
if (flag != 1)
goto twothreefour;
if (D1[ELM1] == ans[4])
{
flag++;
}
if (D2[ELM1] == ans[4])
{
flag++;
}
if (D3[ELM1] == ans[4])
{
flag++;
}
if (D4[ELM1] == ans[4])
{
flag++;
}
if (D5[ELM1] == ans[4])
{
flag++;
}
if (D6[ELM1] == ans[4])
{
flag++;
}
if (flag != 2)
goto twothreefour;
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
twothreefour:
flag = 0;
if (D1[ELM1] == ans[1])
{
flag++;
}
if (D2[ELM1] == ans[1])
{
flag++;
}
if (D3[ELM1] == ans[1])
{
flag++;
}
if (D4[ELM1] == ans[1])
{
flag++;
}
if (D5[ELM1] == ans[1])
{
flag++;
}
if (D6[ELM1] == ans[1])
{
flag++;
}
if (flag != 1)
goto twofourfive;
if (D1[ELM1] == ans[2])
{
flag++;
}
if (D2[ELM1] == ans[2])
{
flag++;
}
if (D3[ELM1] == ans[2])
{
flag++;
}
if (D4[ELM1] == ans[2])
{
flag++;
}
if (D5[ELM1] == ans[2])
{
flag++;
}
if (D6[ELM1] == ans[2])
{
flag++;
}
if (flag != 2)
goto twofourfive;
if (D1[ELM1] == ans[3])
{
return 1;
}
if (D2[ELM1] == ans[3])
{
return 1;
}
if (D3[ELM1] == ans[3])
{
return 1;
}
if (D4[ELM1] == ans[3])
{
return 1;
}
if (D5[ELM1] == ans[3])
{
return 1;
}
if (D6[ELM1] == ans[3])
{
return 1;
}
if (D1[ELM1] == ans[4])
{
return 1;
}
if (D2[ELM1] == ans[4])
{
return 1;
}
if (D3[ELM1] == ans[4])
{
return 1;
}
if (D4[ELM1] == ans[4])
{
return 1;
}
if (D5[ELM1] == ans[4])
{
return 1;
}
if (D6[ELM1] == ans[4])
{
return 1;
}
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
twofourfive:
flag = 0;
if (D1[ELM1] == ans[1])
{
flag++;
}
if (D2[ELM1] == ans[1])
{
flag++;
}
if (D3[ELM1] == ans[1])
{
flag++;
}
if (D4[ELM1] == ans[1])
{
flag++;
}
if (D5[ELM1] == ans[1])
{
flag++;
}
if (D6[ELM1] == ans[1])
{
flag++;
}
if (flag != 1)
goto twofivesix;
if (D1[ELM1] == ans[3])
{
flag++;
}
if (D2[ELM1] == ans[3])
{
flag++;
}
if (D3[ELM1] == ans[3])
{
flag++;
}
if (D4[ELM1] == ans[3])
{
flag++;
}
if (D5[ELM1] == ans[3])
{
flag++;
}
if (D6[ELM1] == ans[3])
{
flag++;
}
if (flag != 2)
goto twofivesix;
if (D1[ELM1] == ans[4])
{
return 1;
}
if (D2[ELM1] == ans[4])
{
return 1;
}
if (D3[ELM1] == ans[4])
{
return 1;
}
if (D4[ELM1] == ans[4])
{
return 1;
}
if (D5[ELM1] == ans[4])
{
return 1;
}
if (D6[ELM1] == ans[4])
{
return 1;
}
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
twofivesix:
flag = 0;
if (D1[ELM1] == ans[1])
{
flag++;
}
if (D2[ELM1] == ans[1])
{
flag++;
}
if (D3[ELM1] == ans[1])
{
flag++;
}
if (D4[ELM1] == ans[1])
{
flag++;
}
if (D5[ELM1] == ans[1])
{
flag++;
}
if (D6[ELM1] == ans[1])
{
flag++;
}
if (flag != 1)
goto threefourfive;
if (D1[ELM1] == ans[4])
{
flag++;
}
if (D2[ELM1] == ans[4])
{
flag++;
}
if (D3[ELM1] == ans[4])
{
flag++;
}
if (D4[ELM1] == ans[4])
{
flag++;
}
if (D5[ELM1] == ans[4])
{
flag++;
}
if (D6[ELM1] == ans[4])
{
flag++;
}
if (flag != 2)
goto threefourfive;
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
threefourfive:
flag = 0;
if (D1[ELM1] == ans[2])
{
flag++;
}
if (D2[ELM1] == ans[2])
{
flag++;
}
if (D3[ELM1] == ans[2])
{
flag++;
}
if (D4[ELM1] == ans[2])
{
flag++;
}
if (D5[ELM1] == ans[2])
{
flag++;
}
if (D6[ELM1] == ans[2])
{
flag++;
}
if (flag != 1)
goto threefivesix;
if (D1[ELM1] == ans[3])
{
flag++;
}
if (D2[ELM1] == ans[3])
{
flag++;
}
if (D3[ELM1] == ans[3])
{
flag++;
}
if (D4[ELM1] == ans[3])
{
flag++;
}
if (D5[ELM1] == ans[3])
{
flag++;
}
if (D6[ELM1] == ans[3])
{
flag++;
}
if (flag != 2)
goto threefivesix;
if (D1[ELM1] == ans[4])
{
return 1;
}
if (D2[ELM1] == ans[4])
{
return 1;
}
if (D3[ELM1] == ans[4])
{
return 1;
}
if (D4[ELM1] == ans[4])
{
return 1;
}
if (D5[ELM1] == ans[4])
{
return 1;
}
if (D6[ELM1] == ans[4])
{
return 1;
}
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
threefivesix:
flag = 0;
if (D1[ELM1] == ans[2])
{
flag++;
}
if (D2[ELM1] == ans[2])
{
flag++;
}
if (D3[ELM1] == ans[2])
{
flag++;
}
if (D4[ELM1] == ans[2])
{
flag++;
}
if (D5[ELM1] == ans[2])
{
flag++;
}
if (D6[ELM1] == ans[2])
{
flag++;
}
if (flag != 1)
goto fourfivesix;
if (D1[ELM1] == ans[4])
{
flag++;
}
if (D2[ELM1] == ans[4])
{
flag++;
}
if (D3[ELM1] == ans[4])
{
flag++;
}
if (D4[ELM1] == ans[4])
{
flag++;
}
if (D5[ELM1] == ans[4])
{
flag++;
}
if (D6[ELM1] == ans[4])
{
flag++;
}
if (flag != 2)
goto fourfivesix;
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
fourfivesix:
flag = 0;
if (D1[ELM1] == ans[3])
{
flag++;
}
if (D2[ELM1] == ans[3])
{
flag++;
}
if (D3[ELM1] == ans[3])
{
flag++;
}
if (D4[ELM1] == ans[3])
{
flag++;
}
if (D5[ELM1] == ans[3])
{
flag++;
}
if (D6[ELM1] == ans[3])
{
flag++;
}
if (flag != 1)
goto increasecounter;
if (D1[ELM1] == ans[4])
{
flag++;
}
if (D2[ELM1] == ans[4])
{
flag++;
}
if (D3[ELM1] == ans[4])
{
flag++;
}
if (D4[ELM1] == ans[4])
{
flag++;
}
if (D5[ELM1] == ans[4])
{
flag++;
}
if (D6[ELM1] == ans[4])
{
flag++;
}
if (flag != 2)
goto increasecounter;
if (D1[ELM1] == ans[5])
{
return 1;
}
if (D2[ELM1] == ans[5])
{
return 1;
}
if (D3[ELM1] == ans[5])
{
return 1;
}
if (D4[ELM1] == ans[5])
{
return 1;
}
if (D5[ELM1] == ans[5])
{
return 1;
}
if (D6[ELM1] == ans[5])
{
return 1;
}
increasecounter:
ELM1++;
}
//If it is Three min
return 0;
}
推荐答案
通过将if语句转换为布尔表达式来删除它们.
remove if statements by converting them to Boolean expressions.
flag += (DN[ELM1] == ans[0])
确保数组位于寄存器或共享内存中,而不是全局
make sure your arrays are in registers or shared memory rather than global
此外,在这样简单的算法上,您会得到过于复杂的逻辑.将D数组的布局更改为D [N] [6],因为它将简化许多事情
also, on such simple algorithm you got way too complicated logic. change layout of D arrays to be D[N][6] as it will simplify many things
顺便说一句,您可能希望对您的帖子进行一些裁剪,以至于无法阅读
by the way, you may want to crop your post a little bit, way too much to read
3 x3示例
A
|0 0 0| |x x 0
D |0 0 0| -> ... -> |x x 0 -> reduce down -> |x x o| -> reduce across -> 2x
|0 0 0| |x x x
基本上,如果匹配A在数组D中,则将矩阵像元设置为true.在每次迭代中,如果整列为true,则将column减少为true.那么你算出的是真数字.
basically you are setting matrix cells to true if match A is in array D.on each iteration you reduce columns to true if entire column is true.then you count number of true.
这篇关于如何在CUDA下改善此功能?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!