问题描述
这是标准的Hello World CUDA文件:
#include< stdio.h>
#includehello.h
const int N = 7;
const int blocksize = 7;
__global__ void hello_kernel(char * a,int * b){
a [threadIdx.x] + = b [threadIdx.x];
#define cudaCheckError(){\
cudaError_t e = cudaGetLastError(); \
if(e!= cudaSuccess){\
printf(Cuda failure%s:%d:'%s'\\\
,__ FILE __,__ LINE __,cudaGetErrorString(e)); \
exit(0); \
} \
}
void hello(){
char a [N] =Hello;
int b [N] = {15,10,6,0,-11,1,0};
char * ad;
int * bd;
const int csize = N * sizeof(char);
const int isize = N * sizeof(int);
printf(%s,a);
cudaMalloc((void **)& ad,csize);
cudaMemcpy(ad,a,csize,cudaMemcpyHostToDevice);
cudaCheckError();
cudaMalloc((void **)& bd,isize);
cudaMemcpy(bd,b,isize,cudaMemcpyHostToDevice);
cudaCheckError();
dim3 dimBlock(blocksize,1);
dim3 dimGrid(1,1);
hello_kernel<<<<< dimGrid,dimBlock>>>(ad,bd);
cudaMemcpy(a,ad,csize,cudaMemcpyDeviceToHost);
cudaCheckError();
cudaFree(ad);
cudaCheckError();
printf(%s\,a);
}
及其标题:
- hello.h
externC
void hello();
这是一个调用此函数的Haskell文件:
- test.hs
{ - #LANGUAGE ForeignFunctionInterface# - }
import Foreign.C
import Foreign.Ptr Ptr,nullPtr)
国外进口ccallhellohello :: IO()
$ b $ main = hello
我正在编译它:
nvcc hello.c - c -o hello.o
ghc test.hs -o test hello.o -L -L / usr / local / cuda / lib -optl -lcudart
使用 ./ test
运行该程序会产生以下结果:
Hello Cuda失败hello.cu:32:'没有检测到支持CUDA的设备'
使用调用 hello
的C main()
运行相同的程序会产生 Hello World
,如预期。
如何让Haskell正确检测设备?
也许不相关,但是我可以在Mac上使用单独的板载和独立显卡重现您的错误。在系统偏好设置中启用自动图形切换(并且没有运行3D图形应用程序)时,我会得到相同的未检测到具有CUDA功能的设备错误。
当我关闭自动图形切换时,它强制Mac使用独立显卡,然后程序按预期运行。
纯粹基于C / CUDA版本的代码似乎不受此首选项的影响,并且始终可以使用自动切换是否启用。
This is the standard Hello World CUDA file:
#include <stdio.h>
#include "hello.h"
const int N = 7;
const int blocksize = 7;
__global__ void hello_kernel(char *a, int *b) {
a[threadIdx.x] += b[threadIdx.x];
}
#define cudaCheckError() { \
cudaError_t e=cudaGetLastError(); \
if(e!=cudaSuccess) { \
printf("Cuda failure %s:%d: '%s'\n",__FILE__,__LINE__,cudaGetErrorString(e)); \
exit(0); \
} \
}
void hello() {
char a[N] = "Hello ";
int b[N] = {15, 10, 6, 0, -11, 1, 0};
char *ad;
int *bd;
const int csize = N*sizeof(char);
const int isize = N*sizeof(int);
printf("%s", a);
cudaMalloc( (void**)&ad, csize );
cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice );
cudaCheckError();
cudaMalloc( (void**)&bd, isize );
cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice );
cudaCheckError();
dim3 dimBlock( blocksize, 1 );
dim3 dimGrid( 1, 1 );
hello_kernel<<<dimGrid, dimBlock>>>(ad, bd);
cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost );
cudaCheckError();
cudaFree( ad );
cudaCheckError();
printf("%s\n", a);
}
And its header:
-- hello.h
extern "C"
void hello();
That's a Haskell file that calls such function:
-- test.hs
{-# LANGUAGE ForeignFunctionInterface #-}
import Foreign.C
import Foreign.Ptr (Ptr,nullPtr)
foreign import ccall "hello" hello :: IO ()
main = hello
I'm compiling it with:
nvcc hello.c -c -o hello.o
ghc test.hs -o test hello.o -L/usr/local/cuda/lib -optl-lcudart
Running that program with ./test
results in:
Hello Cuda failure hello.cu:32: 'no CUDA-capable device is detected'
Running the same program with a C main()
that just calls hello
produces Hello World
, as expected.
How do I make Haskell detect the device correctly?
Maybe unrelated, but I was able to reproduce your error on a Mac with separate on-board and discrete graphics cards. When "Automatic graphics switching" is enabled in System Preferences (and no 3D graphics applications are running), I get the same "no CUDA-capable device is detected" error.
When I turn off automatic graphics switching, it forces the Mac to use the discrete graphics card, and then the program runs as expected.
The purely C/CUDA-based version of the code doesn't seem to be affected by this preference and always works whether automatic switching is enabled or not.
这篇关于调用CUDA“Hello World”来自Haskell使用FFI给出了错误的结果的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!