我正在使用iPhone着色器GLSL ES 1.1中的3x3内核进行卷积。目前,我正在执行9个纹理查找。有没有更快的方法?一些想法:

  • 将输入图像作为缓冲区(而不是纹理)传递给缓冲区,以避免调用纹理插值。
  • 从顶点着色器传递9个变化的vec2坐标(而不是像我目前正在传递的那样),以鼓励处理器有效地预取纹理。
  • 研究各种适用于此的Apple扩展。
  • (已添加)研究GLSL shaderOffset调用的ES等效项(在ES中不可用,但可能存在等效项)

  • 在硬件方面,我特别关注iPhone 4S。

    最佳答案

    您确定您不是指OpenGL ES 2.0吗?您不能使用OpenGL ES 1.1进行任何类型的着色器。我假设是前者。

    以我的经验,最快的方式是您列出的第二项。我在GPUImage框架中进行了几种类型的3x3卷积(您可以使用它们而不是尝试自己滚动),对于那些我输入水平和垂直方向的纹理偏移量并计算顶点内所需的九个纹理坐标着色器。从那里,我将这些变化传递给片段着色器。

    (在大多数情况下)这避免了片段着色器中依赖的纹理读取,这在iOS PowerVR GPU上非常昂贵。我之所以说“大部分”,是因为在iPhone 4等较旧的设备上,这些变化中只有八种用于避免读取依赖的纹理。正如我上周了解到的那样,第九个触发旧设备上的从属纹理读取,从而使速度变慢了一点。但是,iPhone 4S不会出现此问题,因为它支持以这种方式使用的更多种类的配件。

    我将以下内容用于我的顶点着色器:

     attribute vec4 position;
     attribute vec4 inputTextureCoordinate;
    
     uniform highp float texelWidth;
     uniform highp float texelHeight;
    
     varying vec2 textureCoordinate;
     varying vec2 leftTextureCoordinate;
     varying vec2 rightTextureCoordinate;
    
     varying vec2 topTextureCoordinate;
     varying vec2 topLeftTextureCoordinate;
     varying vec2 topRightTextureCoordinate;
    
     varying vec2 bottomTextureCoordinate;
     varying vec2 bottomLeftTextureCoordinate;
     varying vec2 bottomRightTextureCoordinate;
    
     void main()
     {
         gl_Position = position;
    
         vec2 widthStep = vec2(texelWidth, 0.0);
         vec2 heightStep = vec2(0.0, texelHeight);
         vec2 widthHeightStep = vec2(texelWidth, texelHeight);
         vec2 widthNegativeHeightStep = vec2(texelWidth, -texelHeight);
    
         textureCoordinate = inputTextureCoordinate.xy;
         leftTextureCoordinate = inputTextureCoordinate.xy - widthStep;
         rightTextureCoordinate = inputTextureCoordinate.xy + widthStep;
    
         topTextureCoordinate = inputTextureCoordinate.xy - heightStep;
         topLeftTextureCoordinate = inputTextureCoordinate.xy - widthHeightStep;
         topRightTextureCoordinate = inputTextureCoordinate.xy + widthNegativeHeightStep;
    
         bottomTextureCoordinate = inputTextureCoordinate.xy + heightStep;
         bottomLeftTextureCoordinate = inputTextureCoordinate.xy - widthNegativeHeightStep;
         bottomRightTextureCoordinate = inputTextureCoordinate.xy + widthHeightStep;
     }
    

    和片段着色器:
     precision highp float;
    
     uniform sampler2D inputImageTexture;
    
     uniform mediump mat3 convolutionMatrix;
    
     varying vec2 textureCoordinate;
     varying vec2 leftTextureCoordinate;
     varying vec2 rightTextureCoordinate;
    
     varying vec2 topTextureCoordinate;
     varying vec2 topLeftTextureCoordinate;
     varying vec2 topRightTextureCoordinate;
    
     varying vec2 bottomTextureCoordinate;
     varying vec2 bottomLeftTextureCoordinate;
     varying vec2 bottomRightTextureCoordinate;
    
     void main()
     {
         mediump vec4 bottomColor = texture2D(inputImageTexture, bottomTextureCoordinate);
         mediump vec4 bottomLeftColor = texture2D(inputImageTexture, bottomLeftTextureCoordinate);
         mediump vec4 bottomRightColor = texture2D(inputImageTexture, bottomRightTextureCoordinate);
         mediump vec4 centerColor = texture2D(inputImageTexture, textureCoordinate);
         mediump vec4 leftColor = texture2D(inputImageTexture, leftTextureCoordinate);
         mediump vec4 rightColor = texture2D(inputImageTexture, rightTextureCoordinate);
         mediump vec4 topColor = texture2D(inputImageTexture, topTextureCoordinate);
         mediump vec4 topRightColor = texture2D(inputImageTexture, topRightTextureCoordinate);
         mediump vec4 topLeftColor = texture2D(inputImageTexture, topLeftTextureCoordinate);
    
         mediump vec4 resultColor = topLeftColor * convolutionMatrix[0][0] + topColor * convolutionMatrix[0][1] + topRightColor * convolutionMatrix[0][2];
         resultColor += leftColor * convolutionMatrix[1][0] + centerColor * convolutionMatrix[1][1] + rightColor * convolutionMatrix[1][2];
         resultColor += bottomLeftColor * convolutionMatrix[2][0] + bottomColor * convolutionMatrix[2][1] + bottomRightColor * convolutionMatrix[2][2];
    
         gl_FragColor = resultColor;
     }
    

    即使有上述警告,此着色器在iPhone 4上以640x480的视频帧运行时间约为2毫秒,而4S可以使用这样的着色器轻松处理30 FPS的1080p视频。

    10-08 08:18
    查看更多