我正在尝试使用Media Foundation Transforms将HE-AAC音频流(由live555,RTP库接收)转换为PCM(通过waveOutXxx
Win32 API播放)。但是,目前我正在处理预先记录的数据包。
当我调用IMFTransform::ProcessOutput
时,它返回MF_E_TRANSFORM_STREAM_CHANGE
。
_MFT_PROCESS_OUTPUT_STATUS
中的此文档指示我应该确定正确的streamType,然后再次调用IMFTransform::SetOutputType
。
但是,我无法确定SetOutputType
的正确参数是什么。
作为引用,该流的RTSP描述为
m=audio 0 RTP/AVP 97
a=rtpmap:97 mpeg4-generic/16000/1
a=fmtp:97 streamtype=5; profile-level-id=15; mode=AAC-hbr; config=1408;
sizeLength=13; indexLength=3; indexDeltaLength=3; profile=1; bitrate=32000;
我的代码是(很抱歉,长度;为简洁起见,删除了错误处理)
static IMFMediaType *s_inputMediaType;
static IMFMediaType *s_outputMediaType;
static IMFTransform *s_transform;
static DWORD s_outputSampleSize;
static void InitMFT()
{
HRESULT res;
res = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); // probably should use threaded in production
#if 0
res = MFCreateMediaType(&inputMediaType);
res = MFCreateMediaType(&outputMediaType);
res = inputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
res = inputMediaType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC);
res = inputMediaType->SetUINT32(MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, 15);
res = inputMediaType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, 1); // Audio Data Transport Stream
res = outputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
res = outputMediaType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);
res = outputMediaType->SetUINT32(MF_MT_SAMPLE_SIZE, 2);
res = outputMediaType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, 2);
res = outputMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 8 * 2);
res = outputMediaType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, 8000);
res = outputMediaType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 8000 * 2 *2);
#endif
CLSID *clsids = NULL;
UINT32 clsidCount=0;
MFT_REGISTER_TYPE_INFO inputType = {MFMediaType_Audio, MEDIASUBTYPE_MPEG_HEAAC}; // WAVE_FORMAT_MPEG_HEAAC can't be searched for
MFT_REGISTER_TYPE_INFO outputType = {MFMediaType_Audio, WAVE_FORMAT_PCM}; // MEDIASUBTYPE_PCM doesn't exist (but can be search for if created)
res = MFTEnum(MFT_CATEGORY_AUDIO_DECODER, 0,
&inputType,
NULL, /* &outputType, */ // search fails if outputType is not NULL
NULL, &clsids, &clsidCount);
ListTranscoders(clsids, clsidCount);
res = CoCreateInstance(clsids[0], NULL, CLSCTX_ALL, IID_PPV_ARGS(&s_transform));
/* GetStreamCount and GetStreamIDs always return E_NOTIMPL */
DWORD inputCount;
DWORD outputCount;
res = s_transform->GetStreamCount(&inputCount, &outputCount);
DWORD inputIDs[16];
DWORD outputIDs[16];
res = s_transform->GetStreamIDs(inputCount, inputIDs,
outputCount, outputIDs);
res = s_transform->GetInputAvailableType(0, 0, &s_inputMediaType);
res = s_transform->SetInputType(0, s_inputMediaType, 0);
res = s_transform->GetOutputAvailableType(0, 1, &s_outputMediaType); // 1 here is because PCM outputer is the second output offered in ListTranscoders
res = s_transform->SetOutputType(0, s_outputMediaType, 0);
MFT_OUTPUT_STREAM_INFO outputStreamInfo;
res = s_transform->GetOutputStreamInfo(0, &outputStreamInfo);
s_outputSampleSize = outputStreamInfo.cbSize;
GUID inputCodec;
GUID outputCodec;
res = s_inputMediaType ->GetGUID(MF_MT_SUBTYPE, &inputCodec);
res = s_outputMediaType->GetGUID(MF_MT_SUBTYPE, &outputCodec);
CoTaskMemFree(clsids);
}
/*
http://msdn.microsoft.com/en-us/library/bb530106%28v=vs.85%29.aspx
http://msdn.microsoft.com/en-us/library/bb530123%28v=vs.85%29.aspx
*/
void ProcessData(const void* inputData, DWORD inputSize,
void* outputData, DWORD &outputSize)
{
HRESULT res;
IMFSample *pSample;
IMFMediaBuffer *pBuffer;
BYTE *pData = NULL;
/**** Create an input sample buffer, from the supplied data ****/
res = MFCreateSample(&pSample);
res = MFCreateMemoryBuffer(inputSize, &pBuffer);
res = pBuffer->Lock(&pData, NULL, NULL);
memcpy_s(pData, inputSize, inputData, inputSize);
res = pBuffer->SetCurrentLength(inputSize);
res = pBuffer->Unlock();
res = pSample->AddBuffer(pBuffer);
/**** Create output buffer ****/
IMFSample *pOutputSample;
IMFMediaBuffer *pOutputBuffer;
res = MFCreateSample(&pOutputSample);
res = MFCreateMemoryBuffer(s_outputSampleSize, &pOutputBuffer);
res = pOutputSample->AddBuffer(pOutputBuffer);
MFT_OUTPUT_DATA_BUFFER outputDataBuffer; // can be an array
outputDataBuffer.dwStreamID=0;
outputDataBuffer.pSample=pOutputSample;
outputDataBuffer.dwStatus=0;
outputDataBuffer.pEvents = NULL;
DWORD outputStatus=0;
/*** Process the data, and get it back ****/
res = s_transform->ProcessInput(0, pSample, 0);
res = s_transform->ProcessOutput( MFT_PROCESS_OUTPUT_DISCARD_WHEN_NO_BUFFER,
1, &outputDataBuffer, &outputStatus);
if (res==MF_E_TRANSFORM_STREAM_CHANGE)
{
// http://msdn.microsoft.com/en-us/library/windows/desktop/dd797815%28v=vs.85%29.aspx
// indicates that the output always changes
// but not how to handle it
/* GetStreamCount and GetStreamIDs always return E_NOTIMPL */
DWORD inputCount;
DWORD outputCount;
res = s_transform->GetStreamCount(&inputCount, &outputCount);
DWORD inputIDs[16];
DWORD outputIDs[16];
res = s_transform->GetStreamIDs(inputCount, inputIDs,
outputCount, outputIDs);
res = s_transform->GetInputAvailableType(0, 0, &s_inputMediaType);
res = s_transform->SetInputType(0, s_inputMediaType, 0);
res = s_transform->GetOutputAvailableType(0, 1, &s_outputMediaType); // 1 here is because PCM outputer is the second output offered
res = s_transform->SetOutputType(0, s_outputMediaType, 0);
}
/**** Extract converted audio from the sample ****/
DWORD dwNumOutputBuffers, i;
res = outputDataBuffer.pSample->GetBufferCount(&dwNumOutputBuffers);
for(i=0; i<dwNumOutputBuffers; i++)
{
IMFMediaBuffer *outputBuffer;
res = outputDataBuffer.pSample->GetBufferByIndex(i, &outputBuffer);
BYTE *outData;
DWORD outDataLen = 0;
res = outputBuffer->Lock(&outData, NULL, &outDataLen);
memcpy(outputBuffer, outData, outDataLen);
res = outputBuffer->Unlock();
}
/* TODO: Release any neccessery references */
}
最佳答案
我在做类似的事情。您必须执行“设置输出类型”,否则下一个过程输入将为您提供MF_E_TRANSFORM_TYPE_NOT_SET,
您还必须刷新解码器,否则您的下一个进程输入将返回
MF_E_NOTACCEPTING。从您的代码开始,我通过删除此行之后添加的设置输入类型res使其工作
res = s_transform->GetOutputAvailableType(0, 1, &s_outputMediaType); // 1 here is because PCM outputer is the second output offered
GUID outputCodec;
res = s_outputMediaType->GetGUID(MF_MT_SUBTYPE, &outputCodec);
if (outputCodec == MFAudioFormat_PCM){
printf("\nDecoder Output is expecting pcm format");
res = s_transform->SetOutputType(0, s_outputMediaType, 0);//setting the type again
}
if (outputCodec == MFAudioFormat_Float){
printf("\nDecoder Output is expecting float pcm format");
}
s_transform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH,NULL);
return res;//no output coming get another input to process.
为了进行测试,我使用了从源阅读器到一个好的文件的样本。确保播放许多数据包,因为某些包含“quiet”的数据包可能很短且全为0。
同样,URL http://msdn.microsoft.com/en-us/library/windows/desktop/ff485864%28v=vs.85%29.aspx很好地指定了如何配置输出。这是非常有用的。
仔细阅读http://msdn.microsoft.com/en-us/library/windows/desktop/dd742784%28v=vs.85%29.aspx,您会发现输入媒体类型上的某些属性(MF_MT_AUDIO_BITS_PER_SAMPLE)告诉解码器如何格式化输出的格式。如果您能够更完整地指定所需的输入和输出,则可以避免流更改。您的第一个样本可能是MF_E_TRANSFORM_NEED_MORE_INPUT。
关于visual-c++ - IMFTransform::ProcessOutput为HE-AAC-> PCM返回MF_E_TRANSFORM_STREAM_CHANGE,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/16565292/