我试图将我的 Halide 程序编译成jit,以便以后在不同的图像上多次使用它。但是我认为我做错了什么,有人可以纠正我吗?
首先,我创建要运行的 Halide 函数:
void m_gammaFunctionTMOGenerate()
{
Halide::ImageParam img(Halide::type_of<float>(), 3);
img.set_stride(0, 4);
img.set_stride(2, 1);
Halide::Var x, y, c;
Halide::Param<float> key, sat, clampMax, clampMin;
Halide::Param<bool> cS;
Halide::Func gamma;
// algorytm
//img.width() , img.height();
if (cS.get())
{
float k1 = 1.6774;
float k2 = 0.9925;
sat.set((1 + k1) * pow(key.get(), k2) / (1 + k1 * pow(key.get(), k2)));
}
Halide::Expr luminance = img(x, y, 0) * 0.072186f + img(x, y, 1) * 0.715158f + img(x, y, 2) * 0.212656f;
Halide::Expr ldr_lum = (luminance - clampMin) / (clampMax - clampMin);
Halide::clamp(ldr_lum, 0.f, 1.f);
ldr_lum = Halide::pow(ldr_lum, key);
Halide::Expr imLum = img(x, y, c) / luminance;
imLum = Halide::pow(imLum, sat) * ldr_lum;
Halide::clamp(imLum, 0.f, 1.f);
gamma(x, y, c) = imLum;
// rozkład
gamma.vectorize(x, 16).parallel(y);
// kompilacja
auto & obuff = gamma.output_buffer();
obuff.set_stride(0, 4);
obuff.set_stride(2, 1);
obuff.set_extent(2, 3);
std::vector<Halide::Argument> arguments = { img, key, sat, clampMax, clampMin, cS };
m_gammaFunction = (gammafunction)(gamma.compile_jit());
}
将其存储在指针中:
typedef int(*gammafunction)(buffer_t*, float, float, float, float, bool, buffer_t*);
gammafunction m_gammaFunction;
然后我尝试运行它:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 4; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = m_photoFunction(&buf, params[0], &output_buf);
但这行不通...
错误:
Exception thrown at 0x000002974F552DE0 in Viewer.exe: 0xC0000005: Access violation executing location 0x000002974F552DE0.
If there is a handler for this exception, the program may be safely continued.
编辑:
这是我的运行函数代码:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 3; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = m_gammaFunction(&buf, params[0], params[1], params[2], params[3], params[4] > 0.5 ? true : false, &output_buf);
if (error) {
printf("Halide returned an error: %d\n", error);
return -1;
}
memcpy(output, data, size * sizeof(float));
有人可以帮我吗?
编辑:
感谢@KhouriGiordano,我发现自己做错了。实际上,我从AOT编译切换到了此代码。所以现在我的代码看起来像这样:
class GammaOperator
{
public:
GammaOperator();
int realize(buffer_t * input, float params[], buffer_t * output, int width);
private:
HalideFloat m_key;
HalideFloat m_sat;
HalideFloat m_clampMax;
HalideFloat m_clampMin;
HalideBool m_cS;
Halide::ImageParam m_img;
Halide::Var x, y, c;
Halide::Func m_gamma;
};
GammaOperator::GammaOperator()
: m_img( Halide::type_of<float>(), 3)
{
Halide::Expr w = (1.f + 1.6774f) * pow(m_key.get(), 0.9925f) / (1.f + 1.6774f * pow(m_key.get(), 0.9925f));
Halide::Expr sat = Halide::select(m_cS, m_sat, w);
Halide::Expr luminance = m_img(x, y, 0) * 0.072186f + m_img(x, y, 1) * 0.715158f + m_img(x, y, 2) * 0.212656f;
Halide::Expr ldr_lum = (luminance - m_clampMin) / (m_clampMax - m_clampMin);
ldr_lum = Halide::clamp(ldr_lum, 0.f, 1.f);
ldr_lum = Halide::pow(ldr_lum, m_key);
Halide::Expr imLum = m_img(x, y, c) / luminance;
imLum = Halide::pow(imLum, sat) * ldr_lum;
imLum = Halide::clamp(imLum, 0.f, 1.f);
m_gamma(x, y, c) = imLum;
}
int GammaOperator::realize(buffer_t * input, float params[], buffer_t * output, int width)
{
m_img.set(Halide::Buffer(Halide::type_of<float>(), input));
m_img.set_stride(0, 4);
m_img.set_stride(1, width * 4);
m_img.set_stride(2, 4);
// algorytm
m_gamma.vectorize(x, 16).parallel(y);
//params[0], params[1], params[2], params[3], params[4] > 0.5 ? true : false
//{ img, key, sat, clampMax, clampMin, cS };
m_key.set(params[0]);
m_sat.set(params[1]);
m_clampMax.set(params[2]);
m_clampMin.set(params[3]);
m_cS.set(params[4] > 0.5f ? true : false);
//// kompilacja
m_gamma.realize(Halide::Buffer(Halide::type_of<float>(), output));
return 0;
}
我这样使用它:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 4; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = s_gamma->realize(&buf, params, &output_buf, width);
但是它仍然在带有控制台信息的m_gamma.realize函数上崩溃:
Error: Constraint violated: f0.stride.0 (4) == 1 (1)
最佳答案
通过使用Halide::Param::get()
,您将在调用Param
时从get()
对象提取(默认值为0)值。如果要使用在调用生成函数时给定的参数值,则无需调用get
就可以使用它,并且应将其隐式转换为Expr
。
由于Param
不能转换为 bool(boolean) 值,因此执行if
的Halide方法是Halide::select()
。
您没有使用Halide::clamp()
的固定返回值。
我看不到Halide代码正在使用cS
,只有C代码正在使用。
现在到您的JIT问题。看来您开始进行AOT编译并切换到JIT。
您可以创建std::vector<Halide::Argument>
,但不要将其传递到任何地方。 Halide如何知道您要使用什么Param
?它查看Func
并找到对ImageParam
和Param
对象的引用。
您如何知道期望Param
的顺序?您对此无能为力。我可以通过定义HL_GENBITCODE=1
来转储位码,然后使用llvm-dis
进行查看以查看您的功能:
int gamma
( buffer_t *img
, float clampMax
, float key
, float clampMin
, float sat
, void *user_context
, buffer_t *result
);
gamma.realize(Halide::Buffer(Halide::type_of<float>(), &output_buf))
而不是gamma.compile_jit()
并尝试正确调用生成的函数。 一次使用:
Image
代替ImageParam
。 Expr
代替Param
。 与单个JIT编译重复使用:
ImageParam
之前,请保留Param
和Func
并进行设置。