c - 使用libswresample从48000重新采样音频至44100

我正在尝试使用libswresample API将采样后的音频帧从48KHz重新采样到44.1KHz。我的代码如下:

// 'frame' is the original decoded audio frame
AVFrame *output_frame = av_frame_alloc();

// Without this, there is no sound at all at the output (PTS stuff I guess)
av_frame_copy_props(output_frame, frame);

output_frame->channel_layout = audioStream->codec->channel_layout;
output_frame->sample_rate = audioStream->codec->sample_rate;
output_frame->format = audioStream->codec->sample_fmt;

SwrContext *swr;
// Configure resampling context
swr = swr_alloc_set_opts(NULL,  // we're allocating a new context
                         AV_CH_LAYOUT_STEREO,  // out_ch_layout
                         AV_SAMPLE_FMT_FLTP,     // out_sample_fmt
                         44100,                // out_sample_rate
                         AV_CH_LAYOUT_STEREO,  // in_ch_layout
                         AV_SAMPLE_FMT_FLTP,   // in_sample_fmt
                         48000,                // in_sample_rate
                         0,                    // log_offset
                         NULL);                // log_ctx
// Initialize resampling context
swr_init(swr);

// Perform conversion
swr_convert_frame(swr, output_frame, frame);

// Close resampling context
swr_close(swr);
swr_free(&swr);
// Free the original frame and replace it with the new one
av_frame_unref(frame);
return output_frame;

使用此代码，我可以在输出端听到音频，但它也很吵。根据我的阅读，没有av_frame_copy_props()的这段代码应该足够了，但是由于某些原因它无法正常工作。有任何想法吗？

编辑:输入流使用AAC对音频进行编码，样本数为1024。但是，转换后，样本数为925。

编辑:我尝试反向进行。由于我的应用程序从任何来源接收流，因此某些音频流为48KHz，而另一些为44.1KHz。因此，我尝试从44.1重采样到48，以避免重采样损失。但是现在，这些帧每个都有超过1024个样本，并且编码失败。

编辑:我尝试使用libavfilter代替以下过滤器链:

int init_filter_graph(AVStream *audio_st) {
// create new graph
filter_graph = avfilter_graph_alloc();
if (!filter_graph) {
    av_log(NULL, AV_LOG_ERROR, "unable to create filter graph: out of memory\n");
    return -1;
}

AVFilter *abuffer = avfilter_get_by_name("abuffer");
AVFilter *aformat = avfilter_get_by_name("aformat");
AVFilter *asetnsamples = avfilter_get_by_name("asetnsamples");
AVFilter *abuffersink = avfilter_get_by_name("abuffersink");

int err;
// create abuffer filter
AVCodecContext *avctx = audio_st->codec;
AVRational time_base = audio_st->time_base;
snprintf(strbuf, sizeof(strbuf),
         "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
         time_base.num, time_base.den, avctx->sample_rate,
         av_get_sample_fmt_name(avctx->sample_fmt),
         avctx->channel_layout);
fprintf(stderr, "abuffer: %s\n", strbuf);
err = avfilter_graph_create_filter(&abuffer_ctx, abuffer,
                                   NULL, strbuf, NULL, filter_graph);
if (err < 0) {
    av_log(NULL, AV_LOG_ERROR, "error initializing abuffer filter\n");
    return err;
}
// create aformat filter
snprintf(strbuf, sizeof(strbuf),
         "sample_fmts=%s:sample_rates=%d:channel_layouts=0x%" PRIx64,
         av_get_sample_fmt_name(AV_SAMPLE_FMT_FLTP), 44100,
         AV_CH_LAYOUT_STEREO);
fprintf(stderr, "aformat: %s\n", strbuf);
err = avfilter_graph_create_filter(&aformat_ctx, aformat,
                                   NULL, strbuf, NULL, filter_graph);
if (err < 0) {
    av_log(NULL, AV_LOG_ERROR, "unable to create aformat filter\n");
    return err;
}
// create asetnsamples filter
snprintf(strbuf, sizeof(strbuf),
         "n=1024:p=0");
fprintf(stderr, "asetnsamples: %s\n", strbuf);
err = avfilter_graph_create_filter(&asetnsamples_ctx, asetnsamples,
                                   NULL, strbuf, NULL, filter_graph);
if (err < 0) {
    av_log(NULL, AV_LOG_ERROR, "unable to create asetnsamples filter\n");
    return err;
}
// create abuffersink filter
err = avfilter_graph_create_filter(&abuffersink_ctx, abuffersink,
                                   NULL, NULL, NULL, filter_graph);
if (err < 0) {
    av_log(NULL, AV_LOG_ERROR, "unable to create aformat filter\n");
    return err;
}

// connect inputs and outputs
if (err >= 0) err = avfilter_link(abuffer_ctx, 0, aformat_ctx, 0);
if (err >= 0) err = avfilter_link(aformat_ctx, 0, asetnsamples_ctx, 0);
if (err >= 0) err = avfilter_link(asetnsamples_ctx, 0, abuffersink_ctx, 0);
if (err < 0) {
    av_log(NULL, AV_LOG_ERROR, "error connecting filters\n");
    return err;
}
err = avfilter_graph_config(filter_graph, NULL);
if (err < 0) {
    av_log(NULL, AV_LOG_ERROR, "error configuring the filter graph\n");
    return err;
}
return 0;
}

现在生成的帧具有1024个样本，但音频仍然断断续续。

最佳答案

不要为每个帧重新创建SwrContext。它需要从一帧到下一帧携带一些数据，以平滑重采样帧之间的“边缘”。

当您开始播放音频并为每个帧调用SwrContext时，请创建一个swr_convert_frame。