我正在尝试使用speex进行编码/解码,但是当我不这样做时,音频会很大且清晰,但是当我编码/解码以测试音频质量时,我会得到非常差的音频质量和机器人声音。
这是我的初始化音频方法:
#define AUDIO_QUALITY 10
- (void) initAudio {
try {
//SPEEX CONFIG
speex_bits_init(&bits_in);
speex_bits_init(&bits_out);
enc_state = speex_encoder_init(&speex_nb_mode);
dec_state = speex_decoder_init(&speex_nb_mode);
int quality = AUDIO_QUALITY;
speex_encoder_ctl(enc_state, SPEEX_SET_QUALITY, &quality);
int tmp=1;
speex_decoder_ctl(dec_state, SPEEX_SET_ENH, &tmp);
OSStatus status;
XThrowIfError(AudioSessionInitialize(NULL, NULL, rioInterruptionListener, self), "couldn't initialize audio session");
float aBufferLength = 0.02; // In seconds
status = AudioSessionSetProperty(kAudioSessionProperty_PreferredHardwareIOBufferDuration,
sizeof(aBufferLength), &aBufferLength);
XThrowIfError(status, "");
UInt32 audioCategory = kAudioSessionCategory_PlayAndRecord;
XThrowIfError(AudioSessionSetProperty(kAudioSessionProperty_AudioCategory, sizeof(audioCategory), &audioCategory), "couldn't set audio category");
XThrowIfError(AudioSessionAddPropertyListener(kAudioSessionProperty_AudioRouteChange, propListener, self), "couldn't set property listener");
// Describe audio component
AudioComponentDescription desc;
desc.componentType = kAudioUnitType_Output;
desc.componentSubType = kAudioUnitSubType_RemoteIO;
desc.componentFlags = 0;
desc.componentFlagsMask = 0;
desc.componentManufacturer = kAudioUnitManufacturer_Apple;
// Get component
AudioComponent inputComponent = AudioComponentFindNext(NULL, &desc);
// Get audio units
status = AudioComponentInstanceNew(inputComponent, &rioUnit);
XThrowIfError(status, "1");
// Enable IO for recording
UInt32 flag = 1;
status = AudioUnitSetProperty(rioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
kInputBus,
&flag,
sizeof(flag));
XThrowIfError(status, "2");
// Enable IO for playback
status = AudioUnitSetProperty(rioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output,
kOutputBus,
&flag,
sizeof(flag));
XThrowIfError(status, "3");
// Describe format
AudioStreamBasicDescription audioFormat;
audioFormat.mSampleRate = 8000.00;
audioFormat.mFormatID = kAudioFormatLinearPCM;
audioFormat.mFormatFlags = kAudioFormatFlagIsSignedInteger |
kAudioFormatFlagsNativeEndian |
kAudioFormatFlagIsPacked;
audioFormat.mFramesPerPacket = 1;
audioFormat.mChannelsPerFrame = 1;
audioFormat.mBitsPerChannel = 16;
audioFormat.mBytesPerPacket = 2;
audioFormat.mBytesPerFrame = 2;
// Apply format
status = AudioUnitSetProperty(rioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
kInputBus,
&audioFormat,
sizeof(audioFormat));
XThrowIfError(status, "");
status = AudioUnitSetProperty(rioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
kOutputBus,
&audioFormat,
sizeof(audioFormat));
XThrowIfError(status, "");
// Set input callback
AURenderCallbackStruct callbackStruct;
callbackStruct.inputProc = recordingCallback;
callbackStruct.inputProcRefCon = self;
status = AudioUnitSetProperty(rioUnit,
kAudioOutputUnitProperty_SetInputCallback,
kAudioUnitScope_Global,
kInputBus,
&callbackStruct,
sizeof(callbackStruct));
XThrowIfError(status, "");
// Set output callback
callbackStruct.inputProc = playingCallback;
callbackStruct.inputProcRefCon = self;
status = AudioUnitSetProperty(rioUnit,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Global,
kOutputBus,
&callbackStruct,
sizeof(callbackStruct));
XThrowIfError(status, "");
// Disable buffer allocation for the recorder (optional - do this if we want to pass in our own)
flag = 0;
status = AudioUnitSetProperty(rioUnit,
kAudioUnitProperty_ShouldAllocateBuffer,
kAudioUnitScope_Output,
kInputBus,
&flag,
sizeof(flag));
// Allocate our own buffers (1 channel, 16 bits per sample, thus 16 bits per frame, thus 2 bytes per frame).
// Practice learns the buffers used contain 512 frames, if this changes it will be fixed in processAudio.
tempBuffer.mNumberChannels = 1;
tempBuffer.mDataByteSize = FRAME_SIZE * 2;
tempBuffer.mData = malloc( FRAME_SIZE * 2 );
XThrowIfError(AudioSessionSetActive(true), "couldn't set audio session active\n");
// Initialise
status = AudioUnitInitialize(rioUnit);
XThrowIfError(status, "");
status = AudioOutputUnitStart(rioUnit);
XThrowIfError(status, "");
}
catch (CAXException &e) {
NSLog(@"CAXException...");
}
catch (...) {
fprintf(stderr, "An unknown error occurred\n");
}
}
我的speex编码和解码功能:
#define FRAME_SIZE 160
#define COMP_FRAME_SIZE 62
char* encodeSpeex(spx_int16_t *buffer, UInt32 inSize, int *encodedSize) {
char *outputBuffer = (char *)malloc(COMP_FRAME_SIZE);
speex_bits_reset(&bits_in);
speex_encode_int(enc_state, buffer, &bits_in);
*encodedSize = speex_bits_write(&bits_in, outputBuffer, FRAME_SIZE * 2);
return outputBuffer;
}
short* decodeSpeex(char* buffer, int encodedSize, int decodedSize) {
short *outTemp = (short *)calloc(1, FRAME_SIZE * 2);
speex_bits_read_from(&bits_out, buffer, encodedSize * FRAME_SIZE * *2);
speex_decode_int(dec_state, &bits_out, outTemp);
return outTemp;
}
最后,调用speex的函数进行编码和解码,然后将其复制到将由特定回调播放的缓冲区中:
- (void) processAudio: (AudioBufferList*) bufferList
{
AudioBuffer sourceBuffer = bufferList->mBuffers[0];
NSLog(@"Origin size: %lu", sourceBuffer.mDataByteSize);
int size = 0;
char *encodedAudio = encodeSpeex((spx_int16_t*) sourceBuffer.mData, sourceBuffer.mDataByteSize, &size);
NSLog(@"Encoded size: %i", size);
short* decodedAudio = decodeSpeex(encodedAudio, size, sourceBuffer.mDataByteSize);
free(encodedAudio);
memcpy(tempBuffer.mData, decodedAudio, FRAME_SIZE * 2);
free(decodedAudio);
}
任何人都会知道为什么我质量太差吗?根据网站上的speex示例,不应该那样渲染它。
最佳答案
我也偶然发现了这个问题。我通过检查缓冲区是否正确填充来解决它。否则,您将获得空数据,这些数据会被播放,从而产生机械声音。