所以我正在编写一个在 CPU + GPU 上运行的 openCL 程序,并且在使用 clCreateProgramWithSource() 创建我的程序后,我目前正在尝试保存/缓存二进制文件。我使用 CL_DEVICE_TYPE_ALL 创建我的 clContext 和 clProgram 并使用这些规范构建源代码。
然后我将二进制文件存储到磁盘(每个设备一个二进制文件),以便在后续启动时我的程序自动调用 clBuildProgramWithBinary。
问题是,如果我将二进制文件保存到使用 CL_DEVICE_TYPE_ALL 设置创建的磁盘,则 CPU 的二进制文件会损坏,并且 clBuildProgramWithBinary 会引发错误。
为了将所有二进制文件正确保存到磁盘,我必须编辑我的代码以首先使用 CL_DEVICE_TYPE_CPU 运行并自行保存 CPU 二进制文件,然后再次编辑我的代码以使用 CL_DEVICE_TYPE_GPU 运行,保存 gpu 二进制文件并然后最后将其切换回 CL_DEVICE_TYPE_ALL。如果我这样做,clBuildProgramWithBinary 能够准确地为每种设备类型构建二进制文件并执行我的程序。
那么这只是 openCL 的一个怪癖,我不能一起为 GPU 和 CPU 构建二进制文件吗?还是我只是做错了?
我的代码基于此处找到的二进制保存的实现:https://code.google.com/p/opencl-book-samples/source/browse/trunk/src/Chapter_6/HelloBinaryWorld/HelloBinaryWorld.cpp?r=42 并进行了适当的修改以处理多个设备。
/*----Initial setup of platform, context and devices---*/
cl_int err, deviceCount;
cl_device_id *devices;
cl_platform_id platform;
cl_context context;
cl_program program;
err = clGetPlatformIDs(1, &platform, NULL);
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
devices = new cl_device_id[deviceCount];
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
context = clCreateContext(NULL, deviceCount, devices, NULL, NULL, &err);
/*---Build Program---*/
int numFiles = 2;
const char *sourceFiles[] =
char *sourceStrings[numFiles];
for(int i = 0; i < numFiles; i++)
sourceStrings[i] = ReadFile(sourceFiles[i]);
/*---Create the compute program from the source buffer---*/
program = clCreateProgramWithSource(context, numFiles, (const char **)sourceStrings, NULL, &err);
/*---Build the program executable---*/
err = clBuildProgram(program, deviceCount, devices, NULL, NULL, NULL);
/*----Save binary to disk---*/
//Determine the size of each program binary
size_t *programBinarySizes = new size_t[deviceCount];
err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * deviceCount, programBinarySizes, NULL);
if(err != CL_SUCCESS)
delete [] devices;
delete [] programBinarySizes;
return false;
unsigned char **programBinaries = new unsigned char*[deviceCount];
for(cl_uint i = 0; i < deviceCount; i++)
programBinaries[i] = new unsigned char[programBinarySizes[i]];
//Get all of the program binaries
err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char *) * deviceCount, programBinaries, NULL);
if (err != CL_SUCCESS)
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < deviceCount; i++)
delete [] programBinaries[i];
delete [] programBinaries;
//Store the binaries
for(cl_uint i = 0; i < deviceCount; i++)
// Store the binary for all devices
std::string currFile = binaryFile + to_string(i) + ".txt";
FILE *fp = fopen(currFile.c_str(), "wb");
fwrite(programBinaries[i], 1, programBinarySizes[i], fp);
// Cleanup
delete [] programBinarySizes;
for (cl_uint i = 0; i < deviceCount; i++)
delete [] programBinaries[i];
delete [] programBinaries;
unsigned char **programBinaries = new unsigned char *[deviceCount];
size_t sizes[deviceCount];
for(int i = 0; i < deviceCount; i++)
string currFile = binaryFile + to_string(i) + ".txt";
FILE *fp = fopen(currFile.c_str(), "rb");
if(!fp) return NULL;
size_t binarySize;
fseek(fp, 0, SEEK_END);
binarySize = ftell(fp);
sizes[i] = binarySize;
programBinaries[i] = new unsigned char[binarySize];
fread(programBinaries[i], 1, binarySize, fp);
cl_int errNum = 0;
cl_program program;
cl_int binaryStatus;
program = clCreateProgramWithBinary(context,
(const unsigned char **)programBinaries,
delete [] programBinaries;
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
我有一个 rmbp,它在唯一的一个苹果平台上有三个设备。我在上面运行你的代码并遇到了同样的问题。其实我不知道解决方案,但我可以给你一些调试提示。
#include <sys/stat.h>
unsigned char **programBinaries = new unsigned char *[deviceCount];
size_t sizes[deviceCount];
int fd;
struct stat st;
for(cl_uint i = 0; i < deviceCount; i++)
string currFile = binaryFile + to_string(i) + ".txt";
fd = open(currFile.c_str(), O_RDONLY);
if (fd == -1) {
return -1;
if ((fstat(fd, &st) != 0) || (!S_ISREG(st.st_mode))) {
return -2;
size_t binarySize;
FILE *fp = fdopen(fd, "rb");
if (fseeko(fp, 0 , SEEK_END) != 0) {
return -3;
binarySize = ftello(fp);
cout << "device " << i << ": " << binarySize << endl;
sizes[i] = binarySize;
programBinaries[i] = new unsigned char[binarySize];
fread(programBinaries[i], 1, binarySize, fp);
cl_int binaryStatus[deviceCount];
program = clCreateProgramWithBinary(context,
(const unsigned char **)programBinaries,
for (cl_uint i = 0; i < deviceCount; ++i)
cout << "device: " << i << ": " << binaryStatus[i] << endl;
device: 0: 0
device: 1: -42
第一行表示第一个二进制程序(用于 CPU)已成功加载。第二行-42对应的是
,表示加载二进制程序失败。//set device_id to 0,1,3...
cl_uint device_id = 0;
cl_build_status status;
// Determine the reason for the error
char buildOptions[16384];
char buildLog[16384];
clGetProgramBuildInfo(program, devices[device_id], CL_PROGRAM_BUILD_STATUS,
sizeof(cl_build_status), &status, NULL);
std::cout << "status: " << status << endl;
clGetProgramBuildInfo(program, devices[device_id], CL_PROGRAM_BUILD_OPTIONS,
sizeof(buildOptions), buildOptions, NULL);
std::cout << "build options: " << endl;
std::cout << buildOptions;
clGetProgramBuildInfo(program, devices[device_id], CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cout << "build log: " << endl;
std::cout << buildLog;
关于openCL 麻烦同时保存 CPU 和 GPU 的编译二进制文件,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/28259409/