c++ - 更快的位图重新缩放

我编写了一个小函数，可以将图像重新缩放到特定的大小，而无需对其进行裁剪（通过添加黑色边框来适应它），而且它工作正常，但在高分辨率上特别慢请看一下，告诉我怎样才能提高这个功能的效率或者如果有更好的代码，我可以用来实现相同的结果。

static int ReScale(char* srcBuffer, int srcLen, int srcStart, int srcStride, int srcHeight, VideoInfo::ePixelFormat srcPixelFormat,
                   char* dstBuffer, int dstLen, int dstStart, int dstStride, int dstHeight, VideoInfo::ePixelFormat dstPixelFormat, bool reverseCopy)
{
    int srcPixelDepth = VideoInfo::GetPixelFormatSize(srcPixelFormat);
    int srcWidth = static_cast<int>(floor(srcStride / static_cast<float>(srcPixelDepth)));
    int dstPixelDepth = VideoInfo::GetPixelFormatSize(dstPixelFormat);
    int dstWidth = static_cast<int>(floor(dstStride / static_cast<float>(dstPixelDepth)));

    float resizeRatio = min(dstWidth / static_cast<float>(srcWidth), dstHeight / static_cast<float>(srcHeight));
    int dstXOffset = static_cast<int>((dstWidth - (resizeRatio * srcWidth)) / 2.f);
    int dstYOffset = static_cast<int>((dstHeight - (resizeRatio * srcHeight)) / 2.f);
    ZeroMemory(dstBuffer + dstStart, dstLen);

    srcBuffer += srcStart;
    dstBuffer += dstStart;

    dstWidth -= 2 * dstXOffset;
    dstHeight -= 2 * dstYOffset;

    int dstPixelOffset = 0;
    int srcPixelOffset = 0;
    for (int y = 0; y < dstHeight; y++)
    {
        dstPixelOffset = dstXOffset * dstPixelDepth + (y + dstYOffset) * dstStride;
        for (int x = 0; x < dstWidth; x++)
        {
            srcPixelOffset = static_cast<int>(min(y / resizeRatio, srcHeight));
            if (reverseCopy)
            {
                srcPixelOffset = srcHeight - (srcPixelOffset + 1);
            }
            srcPixelOffset = static_cast<int>(min(x / resizeRatio, srcWidth)) * srcPixelDepth + srcPixelOffset * srcStride;
            if (srcPixelOffset + srcPixelDepth < srcLen && dstPixelOffset + dstPixelDepth < dstLen)
            {
                memcpy(dstBuffer + dstPixelOffset, srcBuffer + srcPixelOffset, srcPixelDepth);
            }
            dstPixelOffset += dstPixelDepth;
        }
    }
    return 0;
}

编译器选项*：

/GS /analyze- /W3 /Zc:wchar_t /I"C:\Program Files (x86)\Microsoft Visual
/Studio 12.0\VC\include" /I"C:\Program Files (x86)\Microsoft Visual
/Studio 12.0\VC\PlatformSDK\include" /I"C:\Program Files (x86)\Windows
/Kits\8.1\include" /Zi /Gm- /O2 /Fd"Release\vc120.pdb" /fp:precise /D
/"WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_USRDLL" /D "_WINDLL" /D
/"_UNICODE" /D "UNICODE" /errorReport:prompt /WX- /Zc:forScope /Gz /Oy-/MD
/Fa"Release\" /EHsc /nologo /Fo"Release\" /Fp"Release\*****.pch"

分析结果*：

最佳答案

重构函数以应用建议并启用sse2和fp:fast选项。性能提高3倍：

static int ReScale(char* srcBuffer, int srcLen, int srcStart, int srcStride, int srcHeight, VideoInfo::ePixelFormat srcPixelFormat,
                   char* dstBuffer, int dstLen, int dstStart, int dstStride, int dstHeight, VideoInfo::ePixelFormat dstPixelFormat, bool reverseCopy)
{
    int srcPixelDepth = VideoInfo::GetPixelFormatSize(srcPixelFormat);
    int srcWidth = static_cast<int>(floor(srcStride / static_cast<float>(srcPixelDepth)));
    int dstPixelDepth = VideoInfo::GetPixelFormatSize(dstPixelFormat);
    int dstWidth = static_cast<int>(floor(dstStride / static_cast<float>(dstPixelDepth)));

    float resizeRatio = min(dstWidth / static_cast<float>(srcWidth), dstHeight / static_cast<float>(srcHeight));
    int dstXOffset = static_cast<int>((dstWidth - (resizeRatio * srcWidth)) / 2.f);
    int dstYOffset = static_cast<int>((dstHeight - (resizeRatio * srcHeight)) / 2.f);
    ZeroMemory(dstBuffer + dstStart, dstLen);

    srcBuffer += srcStart;
    dstBuffer += dstStart;

    dstWidth -= 2 * dstXOffset;
    dstHeight -= 2 * dstYOffset;

    int dstPixelOffset = 0;
    int srcPixelOffset = 0;
    int srcPixelYOffset = 0;
    float srcPixelXOffset = 0;
    float srcPixelXStride = 1 / resizeRatio;
    int pixelDepthSize = min(srcPixelDepth, dstPixelDepth);
    for (int y = 0; y < dstHeight; y++)
    {
        dstPixelOffset = dstXOffset * dstPixelDepth + (y + dstYOffset) * dstStride;
        srcPixelYOffset = static_cast<int>(min(y / resizeRatio, srcHeight));
        srcPixelYOffset = (reverseCopy ? (srcHeight - (srcPixelYOffset + 1)) : srcPixelYOffset) * srcStride;
        srcPixelXOffset = 0;
        for (int x = 0; x < dstWidth; x++)
        {
            srcPixelOffset = srcPixelYOffset + (static_cast<int>(min(srcPixelXOffset, srcWidth)) * srcPixelDepth);
            if (srcPixelOffset + pixelDepthSize < srcLen && dstPixelOffset + pixelDepthSize < dstLen)
            {
                memcpy(dstBuffer + dstPixelOffset, srcBuffer + srcPixelOffset, pixelDepthSize);
            }
            dstPixelOffset += dstPixelDepth;
            srcPixelXOffset += srcPixelXStride;
        }
    }
    return 0;
}

特别感谢@moehm，@tafuri和其他评论者