使用libav *库进行FFMPEG音频转码

本文介绍了使用libav *库进行FFMPEG音频转码的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！问题描述 29岁程序员，3月因学历无情被辞！我正在使用ffmpeg库编写音频转码应用程序。这是我的代码 / * *文件：main.cpp *作者：vinod *编译g ++ -std = c ++ 11 -o audiotranscode main.cpp -lavformat -lavcodec -lavutil -lavfilter * * / #if！定义PRId64 || PRI_MACROS_BROKEN #undef PRId64 #define PRId64lld #endif #define __STDC_FORMAT_MACROS #ifdef __cplusplus externC{ #endif #include< stdio.h> #include< stdlib.h> #include< sys / types.h> #include< stdint.h> #include #include< libavutil / samplefmt.h> #include< libavutil / frame.h> #include< libavutil / timestamp.h> #include< libavformat / avformat.h> #include< libavfilter / avfilter.h> #include< libavfilter / buffersrc.h> #include< libavfilter / buffersink.h> #include< libswscale / swscale.h> #include< libavutil / opt.h> #ifdef __cplusplus } #endif #include< iostream> 使用命名空间std; int select_stream，got_frame，got_packet; AVFormatContext * in_fmt_ctx = NULL，* out_fmt_ctx = NULL; AVCodec * dec_codec = NULL，* enc_codec = NULL; AVStream * audio_st = NULL; AVCodecContext * enc_ctx = NULL，* dec_ctx = NULL; AVFrame * pFrame = NULL，* pFrameFiltered = NULL; AVFilterGraph * filter_graph = NULL; AVFilterContext * buffersrc_ctx = NULL; AVFilterContext * buffersink_ctx = NULL; AVPacket包; string inFileName =/home/vinod/vinod/Media/univac.webm; string outFileName =audio_extracted.m4a; int target_bit_rate = 128000， sample_rate = 22050， channels = 1; AVSampleFormat sample_fmt = AV_SAMPLE_FMT_S16; string filter_description =aresample = 22050，aformat = sample_fmts = s16：channel_layouts = mono; int log_averror（int errcode） { char * errbuf =（char *）calloc（AV_ERROR_MAX_STRING_SIZE，sizeof（char））; av_strerror（errcode，errbuf，AV_ERROR_MAX_STRING_SIZE）; std :: cout<<< 错误 - << errbuf<<的std :: ENDL; delete [] errbuf; return -1; } / ** *初始化转换过滤器* / int initialize_audio_filter（） { char args [512]; int ret; AVFilter * buffersrc = avfilter_get_by_name（abuffer）; AVFilter * buffersink = avfilter_get_by_name（abuffersink）; AVFilterInOut * outputs = avfilter_inout_alloc（）; AVFilterInOut * inputs = avfilter_inout_alloc（）; filter_graph = avfilter_graph_alloc（）; const enum AVSampleFormat out_sample_fmts [] = {sample_fmt，AV_SAMPLE_FMT_NONE}; const int64_t out_channel_layouts [] = {av_get_default_channel_layout（out_fmt_ctx - > streams [0]→>编解码器 - >通道），-1}; const int out_sample_rates [] = {out_fmt_ctx - > stream [0] - >编解码器 - > sample_rate，-1}; if（！dec_ctx-> channel_layout） dec_ctx-> channel_layout = av_get_default_channel_layout（dec_ctx-> channels）; snprintf（args，sizeof（args），time_base =％d /％d：sample_rate =％d：sample_fmt =％s：channel_layout = 0x％PRIx64， in_fmt_ctx - > ; stream [select_stream] - > time_base.num，in_fmt_ctx - > streams [select_stream] - > time_base.den， dec_ctx-> sample_rate， av_get_sample_fmt_name（dec_ctx-> sample_fmt） dec_ctx-> channel_layout）; ret = avfilter_graph_create_filter（& buffersrc_ctx，buffersrc，in，args，NULL，filter_graph）; if（ret av_log（NULL，AV_LOG_ERROR，can not create buffer source\\\）; return -1; } ret = avfilter_graph_create_filter（& buffersink_ctx，buffersink，out，NULL，NULL，filter_graph）; if（ret< 0）{ av_log（NULL，AV_LOG_ERROR，无法创建缓冲区sink\\\）; return ret; } ret = av_opt_set_int_list（buffersink_ctx，sample_fmts，out_sample_fmts，-1， AV_OPT_SEARCH_CHILDREN）; if（ret av_log（NULL，AV_LOG_ERROR，can not set output sample format\\\）; return ret; } ret = av_opt_set_int_list（buffersink_ctx，channel_layouts，out_channel_layouts，-1， AV_OPT_SEARCH_CHILDREN）; if（ret< 0）{ av_log（NULL，AV_LOG_ERROR，无法设置输出通道布局\）; return ret; } ret = av_opt_set_int_list（buffersink_ctx，sample_rates，out_sample_rates，-1， AV_OPT_SEARCH_CHILDREN）; if（ret< 0）{ av_log（NULL，AV_LOG_ERROR，无法设置输出采样率\）; return ret; } / *过滤器图表的端点。 * / 输出 - > name = av_strdup（in）; 输出 - > filter_ctx = buffersrc_ctx; 输出 - > pad_idx = 0; 输出 - > next = NULL; / *过滤器图表的端点。 * / 输入 - > name = av_strdup（out）; 输入 - > filter_ctx = buffersink_ctx; 输入 - > pad_idx = 0; 输入 - > next = NULL; string filter_desc = filter_description; if（（ret = avfilter_graph_parse_ptr（filter_graph，filter_desc.c_str（），& inputs，& outputs，NULL））< 0）{ log_averror（ret）; exit（1）; } if（（ret = avfilter_graph_config（filter_graph，NULL））< 0）{ log_averror（ret）; exit（1）; } / *打开宿缓冲区的总结 *注意：args缓冲区被重用用于存储通道布局字符串* / AVFilterLink * outlink = buffersink_ctx-> ;输入[0]; av_get_channel_layout_string（args，sizeof（args），-1，outlink-> channel_layout）; av_log（NULL，AV_LOG_INFO，Output：srate：％dHz fmt：％s chlayout：％s\\\，（int）outlink-> sample_rate，（char * ）av_x_if_null（av_get_sample_fmt_name（（AVSampleFormat）outlink->格式），？）， args）; return 0; } / * * * / int main（int argc，char ** argv） { int ret cout<<< Hello World<< ENDL; printf（abcd）; avcodec_register_all（）; av_register_all（）; avfilter_register_all（）; / *打开输入文件，并分配格式上下文* / if（avformat_open_input（& in_fmt_ctx，inFileName.c_str（），NULL，NULL）< 0）{ std :: cout<< 打开错误输入文件 - < inFileName<<的std :: ENDL; return -1; } / *检索流信息* / if（avformat_find_stream_info（in_fmt_ctx，NULL）< 0）{ std :: cerr } / *转储格式详细信息* / printf（\\\ -------------------- -------------------------------------------------- \\\）; av_dump_format（in_fmt_ctx，0，inFileName.c_str（），0）; printf（\\\ ---------------------------------------- ------------------------------ \\\）; / *选择音频流* / select_stream = av_find_best_stream（in_fmt_ctx，AVMEDIA_TYPE_AUDIO，-1，-1，& dec_codec，0）; if（select_stream == AVERROR_STREAM_NOT_FOUND）{ std :: cerr<<< 找不到音频流<<的std :: ENDL; return -1; } if（select_stream == AVERROR_DECODER_NOT_FOUND）{ std :: cerr<< 找不到合适的解码器<的std :: ENDL; return -1; } dec_ctx = in_fmt_ctx - > stream [select_stream] - >编解码器; av_opt_set_int（dec_ctx，refcounted_frames，1，0）; / *初始化音频解码器* / if（（ret = avcodec_open2（dec_ctx，dec_codec，NULL））< 0）{ av_log（NULL，AV_LOG_ERROR，无法打开音频解码器\）; return ret; } / *分配输出上下文* / ret = avformat_alloc_output_context2（& out_fmt_ctx，NULL，NULL， outFileName.c_str（））; if（ret< 0）{ std :: cerr<< 无法为文件创建输出上下文<< outFileName<<的std :: ENDL; return -1; } / *找到编码器* / 枚举AVCodecID codec_id = out_fmt_ctx - >地毯 - > audio_codec; enc_codec = avcodec_find_encoder（codec_id）; if（！（enc_codec））{ std :: cerr<< 找不到编码器 - < avcodec_get_name（codec_id）<<的std :: ENDL; return -1; } / *添加新流* / audio_st = avformat_new_stream（out_fmt_ctx，enc_codec）; if（！audio_st）{ std :: cerr<<< 无法添加音频流 - <<的std :: ENDL; } / *初始化音频编解码器* / audio_st - > id = out_fmt_ctx - > nb_streams - 1; enc_ctx = audio_st - >编解码器; enc_ctx - > codec_id = codec_id; enc_ctx - > codec_type = AVMEDIA_TYPE_AUDIO; enc_ctx - > bit_rate = target_bit_rate; enc_ctx - > sample_rate = sample_rate; enc_ctx - > sample_fmt = sample_fmt; enc_ctx - >频道=频道; enc_ctx - > channel_layout = av_get_default_channel_layout（enc_ctx - > channels）; / *某些格式要将流标题分开。 * / if（out_fmt_ctx - > oformat - > flags& AVFMT_GLOBALHEADER）{ enc_ctx - > flags | = CODEC_FLAG_GLOBAL_HEADER; } ret = avcodec_open2（out_fmt_ctx - > streams [0] - > codec，enc_codec，NULL）; if（ret< 0）{ std :: cerr<< 无法为文件创建编解码器上下文<< outFileName<<的std :: ENDL; return -1; } / *初始化过滤器* / initialize_audio_filter（）; if（！（out_fmt_ctx - > oformat - > flags& AVFMT_NOFILE））{ int ret = avio_open（& out_fmt_ctx - > pb，outFileName.c_str（） AVIO_FLAG_WRITE）; if（ret< 0）{ log_averror（ret）; return -1; } } / *写入头* / if（avformat_write_header（out_fmt_ctx，NULL）< 0）{ if（ret< 0）{ log_averror（ret）; return -1; } } / *分配框架* / pFrame = av_frame_alloc（）; if（！pFrame）{ std :: cerr<<< 无法分配frame\\\; return -1; } pFrameFiltered = av_frame_alloc（）; if（！pFrameFiltered）{ std :: cerr<< 无法分配frame\\\; return -1; } av_init_packet（& packet）; packet.data = NULL; packet.size = 0; / *从流中读取数据包* / while（av_read_frame（in_fmt_ctx，& packet）> = 0）{ if（packet.stream_index == select_stream） { avcodec_get_frame_defaults（pFrame）; ret = avcodec_decode_audio4（dec_ctx，pFrame，& got_frame，& packet）; if（ret< 0）{ log_averror（ret）; return ret; } printf（Decoded packet pts：％ld，packet.pts）; printf（Frame Best Effor pts：％ld \\\，pFrame-> best_effort_timestamp）; / *设置框架pts * / pFrame - > pts = av_frame_get_best_effort_timestamp（pFrame）; if（got_frame）{ / *将解码的帧推送到filtergraph * / ret = av_buffersrc_add_frame_flags（buffersrc_ctx，pFrame，AV_BUFFERSRC_FLAG_KEEP_REF）; if（ret< 0）{ log_averror（ret）; return ret; } / *从filtergraph * / 中抽取过滤的帧，而（1）{ ret = av_buffersink_get_frame（buffersink_ctx，pFrameFiltered）; if（（ret == AVERROR（EAGAIN））||（ret == AVERROR_EOF））{ break; } if（ret< 0）{ printf（从filtergraph\\\获取过滤帧时出错）; log_averror（ret）; return -1; } / *初始化数据包* / AVPacket encodedPacket = {0}; av_init_packet（& encodedPacket）; ret = avcodec_encode_audio2（out_fmt_ctx - > streams [0] - > codec，& encodedPacket，pFrameFiltered，& got_packet）; 如果（！ret&& got_packet&& encodedPacket.size）{ / *设置正确的pts和dts * / if（encodedPacket.pts！= AV_NOPTS_VALUE）{ encodedPacket.pts = av_rescale_q（encodedPacket.pts，buffersink_ctx - > inputs [0] - > time_base， out_fmt_ctx - > streams [0] - > time_base）; } if（encodedPacket.dts！= AV_NOPTS_VALUE）{ encodedPacket.dts = av_rescale_q（encodedPacket.dts，buffersink_ctx - > inputs [0] - > time_base， out_fmt_ctx - > streams [0] - > time_base）; } printf（Encoded packet pts％ld\\\，encodedPacket.pts）; / *将压缩帧写入媒体文件。 * / ret = av_interleaved_write_frame（out_fmt_ctx，& encodedPacket）; if（ret< 0）{ log_averror（ret）; return -1; } } else if（ret< 0）{ log_averror（ret）; return -1; } av_frame_unref（pFrameFiltered）; } av_frame_unref（pFrame）; } } } / *从编码器中刷新延迟帧* / got_packet = 1; while（got_packet）{ AVPacket encodedPacket = {0}; av_init_packet（& encodedPacket）; ret = avcodec_encode_audio2（out_fmt_ctx - > streams [0] - > codec，& encodedPacket，NULL，& got_packet）; 如果（！ret&& got_packet&& encodedPacket.size）{ / *设置正确的pts和dts * / if（encodedPacket.pts！= AV_NOPTS_VALUE）{ encodedPacket.pts = av_rescale_q（encodedPacket.pts，buffersink_ctx - > inputs [0] - > time_base， out_fmt_ctx - > streams [0] - > time_base）; } if（encodedPacket.dts！= AV_NOPTS_VALUE）{ encodedPacket.dts = av_rescale_q（encodedPacket.dts，buffersink_ctx - > inputs [0] - > time_base， out_fmt_ctx - > streams [0] - > time_base）; } printf（Encoded packet pts％ld\\\，encodedPacket.pts）; / *将压缩帧写入媒体文件。 * / ret = av_interleaved_write_frame（out_fmt_ctx，& encodedPacket）; if（ret< 0）{ log_averror（ret）; return -1; } } else if（ret< 0）{ log_averror（ret）; return -1; } } / *写预告片* / av_write_trailer（out_fmt_ctx）; avfilter_graph_free（& filter_graph）; if（dec_ctx） avcodec_close（dec_ctx）; avformat_close_input（& in_fmt_ctx）; av_frame_free（& pFrame）; av_frame_free（& pFrameFiltered）; if（！（out_fmt_ctx - > oformat - > flags& AVFMT_NOFILE）） avio_close（out_fmt_ctx - > pb）; avcodec_close（out_fmt_ctx-> streams [0] - >编解码器）; avformat_free_context（out_fmt_ctx）; return 0; } 转码后的音频文件与输入的持续时间相同。但它完全嘈杂。有人可以告诉我我在这里做错什么！解决方案我已经发现问题在哪里，已经解决了。当输出文件以大胆打开时，可以看到音频信号中插入了不必要的沉默。问题是提供给编码器的每帧样本数。不同的编解码器期望不同的帧大小进行编码。而aac编码器的大小为1024.这可以通过在执行 avcodec_open2（） enc_ctx-> frame_size c>。过滤器需要为编码器提供每通道1024个采样数的帧。所以在我的代码中， pFrameFiltered 需要每个通道具有1024个样本数。如果其小于1024，编码器将零添加到1024个样本，然后对其进行编码。这可以通过拥有自己的fifo队列或由使用ffmpeg音频滤镜可用的滤镜。我们需要使用 here 。所以需要的更改是 `string filter_description = aresample = 22050，aformat = sample_fmts = s16：channel_layouts = mono，asetnsamples = n = 1024：p = 0;` 过滤器中的 n 更好地了解。检查由avcodec_open2（）设置的 enc_ctx-> frame_size 字段，并适当地设置 n 的值。 I am writing an audio transcoding application using ffmpeg libraries.Here is my code /* * File: main.cpp * Author: vinod * Compile with "g++ -std=c++11 -o audiotranscode main.cpp -lavformat -lavcodec -lavutil -lavfilter" * */ #if !defined PRId64 || PRI_MACROS_BROKEN #undef PRId64 #define PRId64 "lld" #endif #define __STDC_FORMAT_MACROS #ifdef __cplusplus extern "C" { #endif #include <stdio.h> #include <stdlib.h> #include <sys/types.h> #include <stdint.h> #include <libavutil/imgutils.h> #include <libavutil/samplefmt.h> #include <libavutil/frame.h> #include <libavutil/timestamp.h> #include <libavformat/avformat.h> #include <libavfilter/avfilter.h> #include <libavfilter/buffersrc.h> #include <libavfilter/buffersink.h> #include <libswscale/swscale.h> #include <libavutil/opt.h> #ifdef __cplusplus } #endif #include <iostream> using namespace std; int select_stream, got_frame, got_packet; AVFormatContext *in_fmt_ctx = NULL, *out_fmt_ctx = NULL; AVCodec *dec_codec = NULL, * enc_codec = NULL; AVStream *audio_st = NULL; AVCodecContext *enc_ctx = NULL, *dec_ctx = NULL; AVFrame *pFrame = NULL, * pFrameFiltered = NULL; AVFilterGraph *filter_graph = NULL; AVFilterContext *buffersrc_ctx = NULL; AVFilterContext *buffersink_ctx = NULL; AVPacket packet; string inFileName = "/home/vinod/vinod/Media/univac.webm"; string outFileName = "audio_extracted.m4a"; int target_bit_rate = 128000, sample_rate = 22050, channels = 1; AVSampleFormat sample_fmt = AV_SAMPLE_FMT_S16; string filter_description = "aresample=22050,aformat=sample_fmts=s16:channel_layouts=mono"; int log_averror(int errcode) { char *errbuf = (char *) calloc(AV_ERROR_MAX_STRING_SIZE, sizeof(char)); av_strerror(errcode, errbuf, AV_ERROR_MAX_STRING_SIZE); std::cout << "Error - " << errbuf << std::endl; delete [] errbuf; return -1; } /** * Initialize conversion filter */ int initialize_audio_filter() { char args[512]; int ret; AVFilter *buffersrc = avfilter_get_by_name("abuffer"); AVFilter *buffersink = avfilter_get_by_name("abuffersink"); AVFilterInOut *outputs = avfilter_inout_alloc(); AVFilterInOut *inputs = avfilter_inout_alloc(); filter_graph = avfilter_graph_alloc(); const enum AVSampleFormat out_sample_fmts[] = {sample_fmt, AV_SAMPLE_FMT_NONE}; const int64_t out_channel_layouts[] = {av_get_default_channel_layout(out_fmt_ctx -> streams[0] -> codec -> channels), -1}; const int out_sample_rates[] = {out_fmt_ctx -> streams[0] -> codec -> sample_rate, -1}; if (!dec_ctx->channel_layout) dec_ctx->channel_layout = av_get_default_channel_layout(dec_ctx->channels); snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64, in_fmt_ctx -> streams[select_stream] -> time_base.num, in_fmt_ctx -> streams[select_stream] -> time_base.den, dec_ctx->sample_rate, av_get_sample_fmt_name(dec_ctx->sample_fmt), dec_ctx->channel_layout); ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in", args, NULL, filter_graph); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n"); return -1; } ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, filter_graph); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n"); return ret; } ret = av_opt_set_int_list(buffersink_ctx, "sample_fmts", out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n"); return ret; } ret = av_opt_set_int_list(buffersink_ctx, "channel_layouts", out_channel_layouts, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n"); return ret; } ret = av_opt_set_int_list(buffersink_ctx, "sample_rates", out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n"); return ret; } /* Endpoints for the filter graph. */ outputs -> name = av_strdup("in"); outputs -> filter_ctx = buffersrc_ctx; outputs -> pad_idx = 0; outputs -> next = NULL; /* Endpoints for the filter graph. */ inputs -> name = av_strdup("out"); inputs -> filter_ctx = buffersink_ctx; inputs -> pad_idx = 0; inputs -> next = NULL; string filter_desc = filter_description; if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_desc.c_str(), &inputs, &outputs, NULL)) < 0) { log_averror(ret); exit(1); } if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) { log_averror(ret); exit(1); } /* Print summary of the sink buffer * Note: args buffer is reused to store channel layout string */ AVFilterLink *outlink = buffersink_ctx->inputs[0]; av_get_channel_layout_string(args, sizeof(args), -1, outlink->channel_layout); av_log(NULL, AV_LOG_INFO, "Output: srate:%dHz fmt:%s chlayout:%s\n", (int) outlink->sample_rate, (char *) av_x_if_null(av_get_sample_fmt_name((AVSampleFormat) outlink->format), "?"), args); return 0; } /* * */ int main(int argc, char **argv) { int ret; cout << "Hello World" << endl; printf("abcd"); avcodec_register_all(); av_register_all(); avfilter_register_all(); /* open input file, and allocate format context */ if (avformat_open_input(&in_fmt_ctx, inFileName.c_str(), NULL, NULL) < 0) { std::cout << "error opening input file - " << inFileName << std::endl; return -1; } /* retrieve stream information */ if (avformat_find_stream_info(in_fmt_ctx, NULL) < 0) { std::cerr << "Could not find stream information in the input file " << inFileName << std::endl; } /* Dump format details */ printf("\n ---------------------------------------------------------------------- \n"); av_dump_format(in_fmt_ctx, 0, inFileName.c_str(), 0); printf("\n ---------------------------------------------------------------------- \n"); /* Choose a audio stream */ select_stream = av_find_best_stream(in_fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &dec_codec, 0); if (select_stream == AVERROR_STREAM_NOT_FOUND) { std::cerr << "No audio stream found" << std::endl; return -1; } if (select_stream == AVERROR_DECODER_NOT_FOUND) { std::cerr << "No suitable decoder found" << std::endl; return -1; } dec_ctx = in_fmt_ctx -> streams[ select_stream] -> codec; av_opt_set_int(dec_ctx, "refcounted_frames", 1, 0); /* init the audio decoder */ if ((ret = avcodec_open2(dec_ctx, dec_codec, NULL)) < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot open audio decoder\n"); return ret; } /* allocate output context */ ret = avformat_alloc_output_context2(&out_fmt_ctx, NULL, NULL, outFileName.c_str()); if (ret < 0) { std::cerr << "Could not create output context for the file " << outFileName << std::endl; return -1; } /* find the encoder */ enum AVCodecID codec_id = out_fmt_ctx -> oformat -> audio_codec; enc_codec = avcodec_find_encoder(codec_id); if (!(enc_codec)) { std::cerr << "Could not find encoder for - " << avcodec_get_name(codec_id) << std::endl; return -1; } /* add a new stream */ audio_st = avformat_new_stream(out_fmt_ctx, enc_codec); if (!audio_st) { std::cerr << "Could not add audio stream - " << std::endl; } /* Initialise audio codec */ audio_st -> id = out_fmt_ctx -> nb_streams - 1; enc_ctx = audio_st -> codec; enc_ctx -> codec_id = codec_id; enc_ctx -> codec_type = AVMEDIA_TYPE_AUDIO; enc_ctx -> bit_rate = target_bit_rate; enc_ctx -> sample_rate = sample_rate; enc_ctx -> sample_fmt = sample_fmt; enc_ctx -> channels = channels; enc_ctx -> channel_layout = av_get_default_channel_layout(enc_ctx -> channels); /* Some formats want stream headers to be separate. */ if (out_fmt_ctx -> oformat -> flags & AVFMT_GLOBALHEADER) { enc_ctx -> flags |= CODEC_FLAG_GLOBAL_HEADER; } ret = avcodec_open2(out_fmt_ctx -> streams[0] -> codec, enc_codec, NULL); if (ret < 0) { std::cerr << "Could not create codec context for the file " << outFileName << std::endl; return -1; } /* Initialize filter */ initialize_audio_filter(); if (!(out_fmt_ctx -> oformat -> flags & AVFMT_NOFILE)) { int ret = avio_open(& out_fmt_ctx -> pb, outFileName.c_str(), AVIO_FLAG_WRITE); if (ret < 0) { log_averror(ret); return -1; } } /* Write header */ if (avformat_write_header(out_fmt_ctx, NULL) < 0) { if (ret < 0) { log_averror(ret); return -1; } } /* Allocate frame */ pFrame = av_frame_alloc(); if (!pFrame) { std::cerr << "Could not allocate frame\n"; return -1; } pFrameFiltered = av_frame_alloc(); if (!pFrameFiltered) { std::cerr << "Could not allocate frame\n"; return -1; } av_init_packet(&packet); packet.data = NULL; packet.size = 0; /* Read packet from the stream */ while (av_read_frame(in_fmt_ctx, &packet) >= 0) { if (packet.stream_index == select_stream) { avcodec_get_frame_defaults(pFrame); ret = avcodec_decode_audio4(dec_ctx, pFrame, &got_frame, &packet); if (ret < 0) { log_averror(ret); return ret; } printf("Decoded packet pts : %ld ", packet.pts); printf("Frame Best Effor pts : %ld \n", pFrame->best_effort_timestamp); /* Set frame pts */ pFrame -> pts = av_frame_get_best_effort_timestamp(pFrame); if (got_frame) { /* push the decoded frame into the filtergraph */ ret = av_buffersrc_add_frame_flags(buffersrc_ctx, pFrame, AV_BUFFERSRC_FLAG_KEEP_REF); if (ret < 0) { log_averror(ret); return ret; } /* pull filtered frames from the filtergraph */ while (1) { ret = av_buffersink_get_frame(buffersink_ctx, pFrameFiltered); if ((ret == AVERROR(EAGAIN)) || (ret == AVERROR_EOF)) { break; } if (ret < 0) { printf("Error while getting filtered frames from filtergraph\n"); log_averror(ret); return -1; } /* Initialize the packets */ AVPacket encodedPacket = {0}; av_init_packet(&encodedPacket); ret = avcodec_encode_audio2(out_fmt_ctx -> streams[0] -> codec, &encodedPacket, pFrameFiltered, &got_packet); if (!ret && got_packet && encodedPacket.size) { /* Set correct pts and dts */ if (encodedPacket.pts != AV_NOPTS_VALUE) { encodedPacket.pts = av_rescale_q(encodedPacket.pts, buffersink_ctx -> inputs[0] -> time_base, out_fmt_ctx -> streams[0] -> time_base); } if (encodedPacket.dts != AV_NOPTS_VALUE) { encodedPacket.dts = av_rescale_q(encodedPacket.dts, buffersink_ctx -> inputs[0] -> time_base, out_fmt_ctx -> streams[0] -> time_base); } printf("Encoded packet pts %ld\n", encodedPacket.pts); /* Write the compressed frame to the media file. */ ret = av_interleaved_write_frame(out_fmt_ctx, &encodedPacket); if (ret < 0) { log_averror(ret); return -1; } } else if (ret < 0) { log_averror(ret); return -1; } av_frame_unref(pFrameFiltered); } av_frame_unref(pFrame); } } } /* Flush delayed frames from encoder*/ got_packet=1; while (got_packet) { AVPacket encodedPacket = {0}; av_init_packet(&encodedPacket); ret = avcodec_encode_audio2(out_fmt_ctx -> streams[0] -> codec, &encodedPacket, NULL, &got_packet); if (!ret && got_packet && encodedPacket.size) { /* Set correct pts and dts */ if (encodedPacket.pts != AV_NOPTS_VALUE) { encodedPacket.pts = av_rescale_q(encodedPacket.pts, buffersink_ctx -> inputs[0] -> time_base, out_fmt_ctx -> streams[0] -> time_base); } if (encodedPacket.dts != AV_NOPTS_VALUE) { encodedPacket.dts = av_rescale_q(encodedPacket.dts, buffersink_ctx -> inputs[0] -> time_base, out_fmt_ctx -> streams[0] -> time_base); } printf("Encoded packet pts %ld\n", encodedPacket.pts); /* Write the compressed frame to the media file. */ ret = av_interleaved_write_frame(out_fmt_ctx, &encodedPacket); if (ret < 0) { log_averror(ret); return -1; } } else if (ret < 0) { log_averror(ret); return -1; } } /* Write Trailer */ av_write_trailer(out_fmt_ctx); avfilter_graph_free(&filter_graph); if (dec_ctx) avcodec_close(dec_ctx); avformat_close_input(&in_fmt_ctx); av_frame_free(&pFrame); av_frame_free(&pFrameFiltered); if (!(out_fmt_ctx -> oformat -> flags & AVFMT_NOFILE)) avio_close(out_fmt_ctx -> pb); avcodec_close(out_fmt_ctx->streams[0]->codec); avformat_free_context(out_fmt_ctx); return 0; }The audio file after transcoding is same duration as the input. But its completely noisy. Can somebody tell me what I am doing wrong here! 解决方案 I have found out where the problem was and it has been resolved.When the output file was opened in audacity, it was seen that there were unwanted silences inserted in the audio signal. The problem was with the 'number of samples per frame' supplied to the encoder.Different codecs expect different frame sizes for encoding. And aac encoder expects a size of 1024. This can be seen by observing enc_ctx->frame_size after execution of avcodec_open2().The filter needs to supply a frame with 1024 number of samples per channel to the encoder.So in my code, pFrameFiltered needs to have exactly 1024 number of samples per channel. If the its less than 1024 , the encoder appends zeros to make it to 1024 samples and then encodes it.This can be solved by either having our own fifo queue or by using the filter available with the ffmpeg audio filters. We need to use a filter asetnsamples=n=1024:p=0 as explained here. So the alteration required was `string filter_description = "aresample=22050,aformat=sample_fmts=s16:channel_layouts=mono,asetnsamples=n=1024:p=0";`Just play around with the value of n in the filter to understand better. Check the enc_ctx->frame_size field set by avcodec_open2( ) and set the value of n appropriately. 这篇关于使用libav *库进行FFMPEG音频转码的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！上岸，阿里云！