使用libswresample重采样音频,重采样后会留下少量噪声

时间:2020-07-20 07:16:28

标签: c++ audio ffmpeg resampling libswresample

我正在尝试将音频从44Khz重新采样到48Khz,并且在重新采样后得到很小的光噪声。好像有人轻轻地在麦克风上打勾。这是双向发生的。从48Khz到44Khz,反之亦然。

我已经读到,这可能会发生,因为swrContext仍然还有一些数据,而且我应该在重新采样下一帧之前刷新上下文。而且,尽管这会有所帮助(噪音很小),但仍然存在。

我尝试改用FFmpeg重采样滤波器,但是输出只是很大的非相干噪声。我很确定libswresample在重采样时不会输出任何噪音,这意味着我只是不知道如何很好地使用它,而我却缺少一些选项。

这是重采样器的代码。

int ResampleFrame(VideoState * videoState, AVFrame *decoded_audio_frame,     enum AVSampleFormat out_sample_fmt, uint8_t * out_buf)
{
 int in_sample_rate = videoState->audio->ptrAudioCodecCtx_->sample_rate;
 int out_sample_rate = SAMPLE_RATE;

// get an instance of the AudioResamplingState struct, create if NULL
AudioResamplingState* arState = getAudioResampling(videoState->audio->ptrAudioCodecCtx_->channel_layout);

if (!arState->swr_ctx)
{
    printf("swr_alloc error.\n");
    return -1;
}

// get input audio channels
arState->in_channel_layout = (videoState->audio->ptrAudioCodecCtx_->channels ==
            av_get_channel_layout_nb_channels(videoState->audio->ptrAudioCodecCtx_->channel_layout)) ?
            videoState->audio->ptrAudioCodecCtx_->channel_layout :
            av_get_default_channel_layout(videoState->audio->ptrAudioCodecCtx_->channels);


// check input audio channels correctly retrieved
if (arState->in_channel_layout <= 0)
{
    printf("in_channel_layout error.\n");
    return -1;
}


arState->out_channel_layout = AV_CH_LAYOUT_STEREO;

// retrieve number of audio samples (per channel)
arState->in_nb_samples = decoded_audio_frame->nb_samples;
if (arState->in_nb_samples <= 0)
{
    printf("in_nb_samples error.\n");
    return -1;
}

// Set SwrContext parameters for resampling
av_opt_set_int(arState->swr_ctx, "in_channel_layout", arState->in_channel_layout, 0);
av_opt_set_int(arState->swr_ctx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_sample_fmt(arState->swr_ctx, "in_sample_fmt", videoState->audio->ptrAudioCodecCtx_->sample_fmt, 0);


// Set SwrContext parameters for resampling
av_opt_set_int(arState->swr_ctx, "out_channel_layout", arState->out_channel_layout, 0);
av_opt_set_int(arState->swr_ctx, "out_sample_rate", out_sample_rate, 0);
av_opt_set_sample_fmt(arState->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);


// initialize SWR context after user parameters have been set
int ret = swr_init(arState->swr_ctx);
if (ret < 0)
   {
    printf("Failed to initialize the resampling context.\n");
    return -1;
   }


 // retrieve output samples number taking into account the progressive delay
int64_t delay = swr_get_delay(arState->swr_ctx, videoState->audio->ptrAudioCodecCtx_->sample_rate) + arState->in_nb_samples;
arState->out_nb_samples = av_rescale_rnd(delay, out_sample_rate, in_sample_rate, AV_ROUND_UP );

// check output samples number was correctly rescaled
if (arState->out_nb_samples <= 0)
{
    printf("av_rescale_rnd error\n");
    return -1;
}

// get number of output audio channels
arState->out_nb_channels = av_get_channel_layout_nb_channels(arState->out_channel_layout);

// allocate data pointers array for arState->resampled_data and fill data
// pointers and linesize accordingly
// check memory allocation for the resampled data was successful
ret = av_samples_alloc_array_and_samples(&arState->resampled_data, &arState->out_linesize, arState->out_nb_channels, arState->out_nb_samples, out_sample_fmt, 0);
if (ret < 0)
   {
    printf("av_samples_alloc_array_and_samples() error: Could not allocate destination samples.\n");
    return -1;
   }


if (arState->swr_ctx)
   {
    // do the actual audio data resampling
    // check audio conversion was successful
    int ret_num_samples = swr_convert(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,(const uint8_t**)decoded_audio_frame->data, decoded_audio_frame->nb_samples);
    //int ret_num_samples = swr_convert_frame(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,(const uint8_t**)decoded_audio_frame->data, decoded_audio_frame->nb_samples);

    if (ret_num_samples < 0)
       {
        printf("swr_convert_error.\n");
        return -1;
       }


    // get the required buffer size for the given audio parameters
    // check audio buffer size
    arState->resampled_data_size = av_samples_get_buffer_size(&arState->out_linesize,   arState->out_nb_channels,ret_num_samples,out_sample_fmt,1);

    if (arState->resampled_data_size < 0)
       {
        printf("av_samples_get_buffer_size error.\n");
        return -1;
       }
   } else {
           printf("swr_ctx null error.\n");
           return -1;
          }



// copy the resampled data to the output buffer
memcpy(out_buf, arState->resampled_data[0], arState->resampled_data_size);


// flush the swr context
int delayed = swr_convert(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,NULL,0);



if (arState->resampled_data)
   {
    av_freep(&arState->resampled_data[0]);
   }

av_freep(&arState->resampled_data);
arState->resampled_data = NULL;

int ret_data_size = arState->resampled_data_size;



return ret_data_size;
}

我也尝试使用here所示的过滤器,但我的输出只是噪声。

这是我的过滤器代码

int  ResampleFrame(AVFrame *frame, uint8_t *out_buf)
{
   /* Push the decoded frame into the filtergraph */
    qint32 ret;
    ret = av_buffersrc_add_frame_flags(buffersrc_ctx1, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
    if (ret < 0) 
       {
        printf("ResampleFrame: Error adding frame to buffer\n");
        // Delete input frame and return null
        av_frame_unref(frame);
        return 0;
    }


    //printf("resampling\n");
    AVFrame *resampled_frame = av_frame_alloc();


    /* Pull filtered frames from the filtergraph */
    ret = av_buffersink_get_frame(buffersink_ctx1, resampled_frame);

    /* Set the timestamp on the resampled frame */
    resampled_frame->best_effort_timestamp = resampled_frame->pts;

    if (ret < 0) 
       {
        av_frame_unref(frame);
        av_frame_unref(resampled_frame);
        return 0;
       }


    int buffer_size = av_samples_get_buffer_size(NULL,   2,resampled_frame->nb_samples,AV_SAMPLE_FMT_S16,1);

    memcpy(out_buf,resampled_frame->data,buffer_size);

    //av_frame_unref(frame);
    av_frame_unref(resampled_frame);
    return buffer_size;
}





QString filter_description1 = "aresample=48000,aformat=sample_fmts=s16:channel_layouts=stereo,asetnsamples=n=1024:p=0";

int InitAudioFilter(AVStream *inputStream) 
{

    char args[512];
    int ret;
    const AVFilter *buffersrc = avfilter_get_by_name("abuffer");
    const AVFilter *buffersink = avfilter_get_by_name("abuffersink");
    AVFilterInOut *outputs = avfilter_inout_alloc();
    AVFilterInOut *inputs = avfilter_inout_alloc();
    filter_graph = avfilter_graph_alloc();


    const enum AVSampleFormat out_sample_fmts[] = {AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE};
    const int64_t out_channel_layouts[] = {AV_CH_LAYOUT_STEREO, -1};
    const int out_sample_rates[] = {48000, -1};

    snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
                         inputStream->codec->time_base.num, inputStream->codec->time_base.den,
                         inputStream->codec->sample_rate,
                         av_get_sample_fmt_name(inputStream->codec->sample_fmt),
                         inputStream->codec->channel_layout);


    ret = avfilter_graph_create_filter(&buffersrc_ctx1, buffersrc, "in", args, NULL, filter_graph);

    if (ret < 0) 
       {
        printf("InitAudioFilter: Unable to create buffersrc\n");
        return -1;
       }

    ret = avfilter_graph_create_filter(&buffersink_ctx1, buffersink, "out", NULL, NULL, filter_graph);

    if (ret < 0) 
       {
        printf("InitAudioFilter: Unable to create buffersink\n");
        return ret;
       }

    // set opt SAMPLE FORMATS
    ret = av_opt_set_int_list(buffersink_ctx1, "sample_fmts", out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN);

    if (ret < 0) 
       {
        printf("InitAudioFilter: Cannot set output sample format\n");
        return ret;
       }

    // set opt CHANNEL LAYOUTS
    ret = av_opt_set_int_list(buffersink_ctx1, "channel_layouts", out_channel_layouts, -1, AV_OPT_SEARCH_CHILDREN);

    if (ret < 0) {
        printf("InitAudioFilter: Cannot set output channel layout\n");
        return ret;
    }

    // set opt OUT SAMPLE RATES
    ret = av_opt_set_int_list(buffersink_ctx1, "sample_rates", out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN);

    if (ret < 0) 
       {
        printf("InitAudioFilter: Cannot set output sample rate\n");
        return ret;
       }

    /* Endpoints for the filter graph. */
    outputs -> name = av_strdup("in");
    outputs -> filter_ctx = buffersrc_ctx1;
    outputs -> pad_idx = 0;
    outputs -> next = NULL;

    /* Endpoints for the filter graph. */
    inputs -> name = av_strdup("out");
    inputs -> filter_ctx = buffersink_ctx1;
    inputs -> pad_idx = 0;
    inputs -> next = NULL;


    if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_description1.toStdString().c_str(), &inputs, &outputs, NULL)) < 0) 
       {
        printf("InitAudioFilter: Could not add the filter to graph\n");
       }


    if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) 
       {
        printf("InitAudioFilter: Could not configure the graph\n");
       }

    /* Print summary of the sink buffer
     * Note: args buffer is reused to store channel layout string */
    AVFilterLink *outlink = buffersink_ctx1->inputs[0];
    av_get_channel_layout_string(args, sizeof(args), -1, outlink->channel_layout);

    QString str = args;
    printf("Output: srate:%dHz fmt:%s chlayout: %s\n", (int) outlink->sample_rate, 
                                                      av_get_sample_fmt_name((AVSampleFormat) outlink->format),
                                                      str.toStdString().c_str());


    filterGraphInitialized_ = true; 
}

由于我对此没有太多的过滤器或音频经验,因此我可能在这里也缺少一些东西。但是不知道是什么。

谢谢

0 个答案:

没有答案