Question

我是一名大学生，现在正在学习FFmpeg。

我写了一个软件，可以用FFmpeg录制桌面和音频（'virtual-audio-capturer'），现在正在编写音频和视频同步。我遇到了录像播放速度过快的一些问题。

当我在互联网上寻求音频和视频同步帮助时，我发现了一个计算PTS的公式：

点= n *（（1 / timbase）/ fps）

使用此公式时，我发现一种现象。

1。帧速率越高，视频播放速度越快。

2。帧频越慢，视频播放越快。

我还发现帧速率为10时，视频播放速度将是正确的。

为什么会发生这种情况？

我已经考虑了这个问题三天了。我真的希望有人能帮助我解决这个问题。

我非常感谢您的帮助。

#include "stdafx.h"

#ifdef  __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"

#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/imgutils.h"
#include "libavutil/mathematics.h"
#include "libavutil/samplefmt.h"
#include "libavutil/time.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/file.h"
#include "libavutil/mem.h"
#include "libavutil/frame.h"
#include "libavfilter/avfilter.h"
#include "libswresample/swresample.h"

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")

#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif

AVFormatContext *pFormatCtx_Video = NULL, *pFormatCtx_Audio = NULL, *pFormatCtx_Out = NULL;

AVCodecContext *outVideoCodecCtx = NULL;
AVCodecContext *outAudioCodecCtx = NULL;

AVStream *pVideoStream = NULL, *pAudioStream = NULL;

AVCodec *outAVCodec;
AVCodec *outAudioCodec;

AVCodecContext  *pCodecCtx_Video;
AVCodec         *pCodec_Video;
AVFifoBuffer    *fifo_video = NULL;
AVAudioFifo     *fifo_audio = NULL;
int VideoIndex, AudioIndex;
int codec_id;

CRITICAL_SECTION AudioSection, VideoSection;



SwsContext *img_convert_ctx;
int frame_size = 0;

uint8_t *picture_buf = NULL, *frame_buf = NULL;

bool bCap = true;

DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam );
DWORD WINAPI AudioCapThreadProc( LPVOID lpParam );

int OpenVideoCapture()
{
    AVInputFormat *ifmt=av_find_input_format("gdigrab");
    AVDictionary *options = NULL;
    av_dict_set(&options, "framerate", "60", NULL);
    if(avformat_open_input(&pFormatCtx_Video, "desktop", ifmt, &options)!=0)
    {
        printf("Couldn't open input stream.（无法打开视频输入流）\n");
        return -1;
    }
    if(avformat_find_stream_info(pFormatCtx_Video,NULL)<0)
    {
        printf("Couldn't find stream information.（无法获取视频流信息）\n");
        return -1;
    }
    if (pFormatCtx_Video->streams[0]->codec->codec_type != AVMEDIA_TYPE_VIDEO)
    {
        printf("Couldn't find video stream information.（无法获取视频流信息）\n");
        return -1;
    }
    pCodecCtx_Video = pFormatCtx_Video->streams[0]->codec;
    pCodec_Video = avcodec_find_decoder(pCodecCtx_Video->codec_id);
    if(pCodec_Video == NULL)
    {
        printf("Codec not found.（没有找到解码器）\n");
        return -1;
    }
    if(avcodec_open2(pCodecCtx_Video, pCodec_Video, NULL) < 0)
    {
        printf("Could not open codec.（无法打开解码器）\n");
        return -1;
    }

    av_dump_format(pFormatCtx_Video, 0, NULL, 0);

    img_convert_ctx = sws_getContext(pCodecCtx_Video->width, pCodecCtx_Video->height, pCodecCtx_Video->pix_fmt, 
        pCodecCtx_Video->width, pCodecCtx_Video->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL); 

    frame_size = avpicture_get_size(pCodecCtx_Video->pix_fmt, pCodecCtx_Video->width, pCodecCtx_Video->height);
    fifo_video = av_fifo_alloc(30 * avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx_Video->width, pCodecCtx_Video->height));

    return 0;
}

static char *dup_wchar_to_utf8(wchar_t *w)
{
    char *s = NULL;
    int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
    s = (char *) av_malloc(l);
    if (s)
        WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
    return s;
}

int OpenAudioCapture()
{
    AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
    char * psDevName = dup_wchar_to_utf8(L"audio=virtual-audio-capturer");

    if (avformat_open_input(&pFormatCtx_Audio, psDevName, pAudioInputFmt,NULL) < 0)
    {
        printf("Couldn't open input stream.（无法打开音频输入流）\n");
        return -1;
    }

    if(avformat_find_stream_info(pFormatCtx_Audio,NULL)<0)  
        return -1; 

    if(pFormatCtx_Audio->streams[0]->codec->codec_type != AVMEDIA_TYPE_AUDIO)
    {
        printf("Couldn't find video stream information.（无法获取音频流信息）\n");
        return -1;
    }

    AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_Audio->streams[0]->codec->codec_id);
    if(0 > avcodec_open2(pFormatCtx_Audio->streams[0]->codec, tmpCodec, NULL))
    {
        printf("can not find or open audio decoder!\n");
    }

    av_dump_format(pFormatCtx_Audio, 0, NULL, 0);

    return 0;
}

int OpenOutPut()
{
    AVStream *pVideoStream = NULL, *pAudioStream = NULL;
    const char *outFileName = "test.mp4";
    avformat_alloc_output_context2(&pFormatCtx_Out, NULL, NULL, outFileName);

    if (pFormatCtx_Video->streams[0]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
    {
        VideoIndex = 0;
        pVideoStream = avformat_new_stream(pFormatCtx_Out, NULL);
        if (!pVideoStream)
        {
            printf("can not new stream for output!\n");
            return -1;
        }

        outVideoCodecCtx = avcodec_alloc_context3(outAVCodec);
        if ( !outVideoCodecCtx )
        {
            printf("Error : avcodec_alloc_context3()\n");
            return -1;
        }

        //set codec context param
        outVideoCodecCtx = pVideoStream->codec;
        outVideoCodecCtx->codec_id = AV_CODEC_ID_MPEG4;
        outVideoCodecCtx->width = pFormatCtx_Video->streams[0]->codec->width;
        outVideoCodecCtx->height = pFormatCtx_Video->streams[0]->codec->height;
        outVideoCodecCtx->time_base = pFormatCtx_Video->streams[0]->codec->time_base;
        outVideoCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
        outVideoCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;

        if (codec_id == AV_CODEC_ID_H264)
        {
            av_opt_set(outVideoCodecCtx->priv_data, "preset", "slow", 0);
        }

        outAVCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
        if( !outAVCodec )
        {
            printf("\n\nError : avcodec_find_encoder()");
            return -1;
        }
        if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
            outVideoCodecCtx->flags |=CODEC_FLAG_GLOBAL_HEADER;

        if ((avcodec_open2(outVideoCodecCtx,outAVCodec, NULL)) < 0)
        {
            printf("can not open the encoder\n");
            return -1;
        }
    }

    if(pFormatCtx_Audio->streams[0]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
    {
        AVCodecContext *pOutputCodecCtx;
        AudioIndex = 1;
        pAudioStream = avformat_new_stream(pFormatCtx_Out, NULL);

        pAudioStream->codec->codec = avcodec_find_encoder(pFormatCtx_Out->oformat->audio_codec);

        pOutputCodecCtx = pAudioStream->codec;

        pOutputCodecCtx->sample_rate = pFormatCtx_Audio->streams[0]->codec->sample_rate;
        pOutputCodecCtx->channel_layout = pFormatCtx_Out->streams[0]->codec->channel_layout;
        pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pAudioStream->codec->channel_layout);
        if(pOutputCodecCtx->channel_layout == 0)
        {
            pOutputCodecCtx->channel_layout = AV_CH_LAYOUT_STEREO;
            pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pOutputCodecCtx->channel_layout);

        }
        pOutputCodecCtx->sample_fmt = pAudioStream->codec->codec->sample_fmts[0];
        AVRational time_base={1, pAudioStream->codec->sample_rate};
        pAudioStream->time_base = time_base;
        //audioCodecCtx->time_base = time_base;

        pOutputCodecCtx->codec_tag = 0;  
        if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)  
            pOutputCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;

        if (avcodec_open2(pOutputCodecCtx, pOutputCodecCtx->codec, 0) < 0)
        {
            printf("编码器打开失败，退出程序\n");
            return -1;
        }
    }

    if (!(pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
    {
        if(avio_open(&pFormatCtx_Out->pb, outFileName, AVIO_FLAG_WRITE) < 0)
        {
            printf("can not open output file handle!\n");
            return -1;
        }
    }

    if(avformat_write_header(pFormatCtx_Out, NULL) < 0)
    {
        printf("can not write the header of the output file!\n");
        return -1;
    }

    return 0;
}

int _tmain(int argc, _TCHAR* argv[])
{
    av_register_all();
    avdevice_register_all();
    if (OpenVideoCapture() < 0)
    {
        return -1;
    }
    if (OpenAudioCapture() < 0)
    {
        return -1;
    }
    if (OpenOutPut() < 0)
    {
        return -1;
    }
//  int fps;
    /*printf("输入帧率：");
    scanf_s("%d",&fps);
    if ( NULL == fps)
    {
        fps = 10;
    }*/

    InitializeCriticalSection(&VideoSection);
    InitializeCriticalSection(&AudioSection);

    AVFrame *picture = av_frame_alloc();
    int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, pFormatCtx_Out->streams[VideoIndex]->codec->height);
    picture_buf = new uint8_t[size];

    avpicture_fill((AVPicture *)picture, picture_buf, 
        pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);



    //star cap screen thread
    CreateThread( NULL, 0, ScreenCapThreadProc, 0, 0, NULL);
    //star cap audio thread
    CreateThread( NULL, 0, AudioCapThreadProc, 0, 0, NULL);
    int64_t cur_pts_v=0,cur_pts_a=0;
    int VideoFrameIndex = 0, AudioFrameIndex = 0;

    while(1)
    {
        if (_kbhit() != 0 && bCap)
        {
            bCap = false;
            Sleep(2000);
        }
        if (fifo_audio && fifo_video)
        {
            int sizeAudio = av_audio_fifo_size(fifo_audio);
            int sizeVideo = av_fifo_size(fifo_video);
            //缓存数据写完就结束循环
            if (av_audio_fifo_size(fifo_audio) <= pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && 
                av_fifo_size(fifo_video) <= frame_size && !bCap)
            {
                break;
            }
        }

        if(av_compare_ts(cur_pts_v, pFormatCtx_Out->streams[VideoIndex]->time_base, 
                         cur_pts_a,pFormatCtx_Out->streams[AudioIndex]->time_base) <= 0)
        {
            if (av_fifo_size(fifo_video) < frame_size && !bCap)
            {
                cur_pts_v = 0x7fffffffffffffff;
            }
            if(av_fifo_size(fifo_video) >= size)
            {
                EnterCriticalSection(&VideoSection);
                av_fifo_generic_read(fifo_video, picture_buf, size, NULL); //将数据从avfifobuffer馈送到用户提供的回调。
                LeaveCriticalSection(&VideoSection);

                avpicture_fill((AVPicture *)picture, picture_buf,
                    pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
                    pFormatCtx_Out->streams[VideoIndex]->codec->width,
                    pFormatCtx_Out->streams[VideoIndex]->codec->height); //根据指定的图像参数和提供的图像数据缓冲区设置图片字段。

                //pts = n * (（1 / timbase）/ fps);
                //picture->pts = VideoFrameIndex * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 24);
                picture->pts = VideoFrameIndex * ((outVideoCodecCtx->time_base.den * 100000 / outVideoCodecCtx->time_base.num) / 180);

                int got_picture = 0;
                AVPacket pkt;
                av_init_packet(&pkt);

                pkt.data = NULL;
                pkt.size = 0;
                //从帧中获取输入的原始视频数据
                int ret = avcodec_encode_video2(pFormatCtx_Out->streams[VideoIndex]->codec, &pkt, picture, &got_picture);
                if(ret < 0)
                {
                    continue;
                }

                if (got_picture==1)
                {
                    pkt.stream_index = VideoIndex;
                    /*int count = 1;
                    pkt.pts = pkt.dts = count * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 15);
                    count++;*/

                    //x = pts * (timebase1.num / timebase1.den )* (timebase2.den / timebase2.num);

                    pkt.pts = av_rescale_q_rnd(pkt.pts, pFormatCtx_Video->streams[0]->time_base, 
                        pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));  
                    pkt.dts = av_rescale_q_rnd(pkt.dts,  pFormatCtx_Video->streams[0]->time_base, 
                        pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX)); 


                    pkt.duration = ((pFormatCtx_Out->streams[0]->time_base.den / pFormatCtx_Out->streams[0]->time_base.num) / 60);
                    //pkt.duration = 1000/60;
                    //pkt.pts = pkt.dts = Count * (ofmt_ctx->streams[stream_index]->time_base.den) /ofmt_ctx->streams[stream_index]->time_base.num / 10;

                    //Count++;


                    cur_pts_v = pkt.pts;

                    ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt);
                    //delete[] pkt.data;
                    av_free_packet(&pkt);
                }
                VideoFrameIndex++;
            }
        }
        else
        {
            if (NULL == fifo_audio)
            {
                continue;//还未初始化fifo
            }
            if (av_audio_fifo_size(fifo_audio) < pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && !bCap)
            {
                cur_pts_a = 0x7fffffffffffffff;
            }
            if(av_audio_fifo_size(fifo_audio) >= 
                (pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024))
            {
                AVFrame *frame;
                frame = av_frame_alloc();
                frame->nb_samples = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size>0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size: 1024;
                frame->channel_layout = pFormatCtx_Out->streams[AudioIndex]->codec->channel_layout;
                frame->format = pFormatCtx_Out->streams[AudioIndex]->codec->sample_fmt;
                frame->sample_rate = pFormatCtx_Out->streams[AudioIndex]->codec->sample_rate;
                av_frame_get_buffer(frame, 0);

                EnterCriticalSection(&AudioSection);
                av_audio_fifo_read(fifo_audio, (void **)frame->data, 
                    (pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024));
                LeaveCriticalSection(&AudioSection);

                AVPacket pkt_out;
                av_init_packet(&pkt_out);
                int got_picture = -1;
                pkt_out.data = NULL;
                pkt_out.size = 0;

                frame->pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                if (avcodec_encode_audio2(pFormatCtx_Out->streams[AudioIndex]->codec, &pkt_out, frame, &got_picture) < 0)
                {
                    printf("can not decoder a frame");
                }
                av_frame_free(&frame);
                if (got_picture) 
                {
                    pkt_out.stream_index = AudioIndex;
                    pkt_out.pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                    pkt_out.dts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                    pkt_out.duration = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;

                    cur_pts_a = pkt_out.pts;

                    int ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt_out);
                    av_free_packet(&pkt_out);
                }
                AudioFrameIndex++;
            }
        }
    }

    delete[] picture_buf;

    av_fifo_free(fifo_video);
    av_audio_fifo_free(fifo_audio);

    av_write_trailer(pFormatCtx_Out);

    avio_close(pFormatCtx_Out->pb);
    avformat_free_context(pFormatCtx_Out);

    if (pFormatCtx_Video != NULL)
    {
        avformat_close_input(&pFormatCtx_Video);
        pFormatCtx_Video = NULL;
    }
    if (pFormatCtx_Audio != NULL)
    {
        avformat_close_input(&pFormatCtx_Audio);
        pFormatCtx_Audio = NULL;
    }

    return 0;
}

DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam )
{
    AVPacket packet;
    int got_picture;
    AVFrame *pFrame;
    pFrame=av_frame_alloc();

    AVFrame *picture = av_frame_alloc();
    int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    avpicture_fill((AVPicture *)picture, picture_buf, 
        pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    FILE *p = NULL;
    p = fopen("proc_test.yuv", "wb+");
    av_init_packet(&packet);
    int height = pFormatCtx_Out->streams[VideoIndex]->codec->height;
    int width = pFormatCtx_Out->streams[VideoIndex]->codec->width;
    int y_size=height*width;
    while(bCap)
    {
        packet.data = NULL;
        packet.size = 0;
        if (av_read_frame(pFormatCtx_Video, &packet) < 0)
        {
            continue;
        }
        if(packet.stream_index == 0)
        {
            if (avcodec_decode_video2(pCodecCtx_Video, pFrame, &got_picture, &packet) < 0)
            {
                printf("Decode Error.（解码错误）\n");
                continue;
            }
            if (got_picture)
            {
                sws_scale(img_convert_ctx, 
                    (const uint8_t* const*)pFrame->data,
                    pFrame->linesize, 
                    0, 
                    pFormatCtx_Out->streams[VideoIndex]->codec->height,
                    picture->data,
                    picture->linesize);

                if (av_fifo_space(fifo_video) >= size)
                {
                    EnterCriticalSection(&VideoSection);                    
                    av_fifo_generic_write(fifo_video, picture->data[0], y_size, NULL);
                    av_fifo_generic_write(fifo_video, picture->data[1], y_size/4, NULL);
                    av_fifo_generic_write(fifo_video, picture->data[2], y_size/4, NULL);
                    LeaveCriticalSection(&VideoSection);
                }
            }
        }
        av_free_packet(&packet);
    }
    av_frame_free(&pFrame);
    av_frame_free(&picture);
    return 0;
}

DWORD WINAPI AudioCapThreadProc( LPVOID lpParam )
{
    AVPacket pkt;
    AVFrame *frame;
    frame = av_frame_alloc();
    int gotframe;
    while(bCap)
    {
        pkt.data = NULL;
        pkt.size = 0;
        if(av_read_frame(pFormatCtx_Audio,&pkt) < 0)
        {
            continue;
        }

        if (avcodec_decode_audio4(pFormatCtx_Audio->streams[0]->codec, frame, &gotframe, &pkt) < 0)
        {
            av_frame_free(&frame);
            printf("can not decoder a frame");
            break;
        }
        av_free_packet(&pkt);

        if (!gotframe)
        {
            printf("没有获取到数据，继续下一次");
            continue;
        }

        if (NULL == fifo_audio)
        {
            fifo_audio = av_audio_fifo_alloc(pFormatCtx_Audio->streams[0]->codec->sample_fmt, 
                pFormatCtx_Audio->streams[0]->codec->channels, 30 * frame->nb_samples);
        }

        int buf_space = av_audio_fifo_space(fifo_audio);
        if (av_audio_fifo_space(fifo_audio) >= frame->nb_samples)
        {
            EnterCriticalSection(&AudioSection);
            av_audio_fifo_write(fifo_audio, (void **)frame->data, frame->nb_samples);
            LeaveCriticalSection(&AudioSection);
        }
    }
    av_frame_free(&frame);
    return 0;
}

也许还有另一种计算PTS和DTS的方法

我希望无论帧速率如何，视频播放速度都是正确的。不要太快或太慢。

Answer 1

最后，我找出了问题的原因。

由视频录制生成的视频的帧频取决于录制的视频流。我的计算机的gdigrab的最大帧频为10帧，因此如果将其设置为10帧以上，则播放速度会很快。我将其设置为少于10帧，则播放速度会很慢。但是我在我玩游戏的计算机上运行代码。无论我选择10帧还是15帧，回溯速度都是正确的。

不过，我仍然不知道我的gdigrab的帧速率仅为0-10帧的原因。CPU显卡，卡，显示器和内存等许多因素会影响视频记录的帧速率。 / p>

这是最终代码使用FFMPEG捕获屏幕和音频

#include "stdafx.h"

#ifdef  __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"

#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/imgutils.h"
#include "libavutil/mathematics.h"
#include "libavutil/samplefmt.h"
#include "libavutil/time.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/file.h"
#include "libavutil/mem.h"
#include "libavutil/frame.h"
#include "libavfilter/avfilter.h"
#include "libswresample/swresample.h"

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")

#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif

AVFormatContext *pFormatCtx_Video = NULL, *pFormatCtx_Audio = NULL, *pFormatCtx_Out = NULL;

AVCodecContext *outVideoCodecCtx = NULL;
AVCodecContext *outAudioCodecCtx = NULL;

AVStream *pVideoStream = NULL, *pAudioStream = NULL;

AVCodec *outAVCodec;
AVCodec *outAudioCodec;

AVCodecContext  *pCodecCtx_Video;
AVCodec         *pCodec_Video;
AVFifoBuffer    *fifo_video = NULL;
AVAudioFifo     *fifo_audio = NULL;
int VideoIndex, AudioIndex;
int codec_id;

CRITICAL_SECTION AudioSection, VideoSection;

SwsContext *img_convert_ctx;
int frame_size = 0;

uint8_t *picture_buf = NULL;

bool bCap = true;

DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam );
DWORD WINAPI AudioCapThreadProc( LPVOID lpParam );

static char *dup_wchar_to_utf8(wchar_t *w)
{
    char *s = NULL;
    int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
    s = (char *) av_malloc(l);
    if (s)
        WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
    return s;
}

int OpenVideoCapture()
{
    int fps = 10;
    char opt;
    printf("选择视频播放帧率：\n");
    printf("A   5帧\n");
    printf("B   10帧\n");
    printf("C   15帧\n");
    printf("D   20帧\n");
    printf("E   25帧\n");
    printf("F   30帧\n");
    opt = getchar();
    AVDictionary *options = NULL;
    switch (opt)
    {
    case 'A':
        fps = 5;
        av_dict_set(&options, "framerate", "5", 0);
        break;
    case 'a':
        fps = 5;
        av_dict_set(&options, "framerate", "5", 0);
        break;
    case 'B':
        fps = 10;
        av_dict_set(&options, "framerate", "10", 0);
        break;
    case 'b':
        fps = 10;
        av_dict_set(&options, "framerate", "10", 0);
        break;
    case 'C':
        fps = 15;
        av_dict_set(&options, "framerate", "15", 0);
        break;
    case 'c':
        fps = 15;
        av_dict_set(&options, "framerate", "15", 0);
        break;
    case 'D':
        fps = 20;
        av_dict_set(&options, "framerate", "20", 0);
        break;
    case 'd':
        fps = 20;
        av_dict_set(&options, "framerate", "20", 0);
        break;
    case 'E':
        fps = 25;
        av_dict_set(&options, "framerate", "25", 0);
        break;
    case 'e':
        fps = 25;
        av_dict_set(&options, "framerate", "25", 0);
        break;
    case 'F':
        fps = 30;
        av_dict_set(&options, "framerate", "30", 0);
        break;
    case 'f':
        fps = 30;
        av_dict_set(&options, "framerate", "30", 0);
        break;
    default:
        printf("选项输入错误\n");
        return -1;
    }
    //AVDictionary *options = NULL;
    //av_dict_set(&options, "framerate", "15", 0);
    AVInputFormat *ifmt=av_find_input_format("gdigrab");
    if(avformat_open_input(&pFormatCtx_Video, "desktop", ifmt, &options) != 0)
    {
        printf("Couldn't open input stream.（无法打开视频输入流）\n");
        return -1;
    }
    if(avformat_find_stream_info(pFormatCtx_Video,NULL)<0)
    {
        printf("Couldn't find stream information.（无法获取视频流信息）\n");
        return -1;
    }
    if (pFormatCtx_Video->streams[0]->codec->codec_type != AVMEDIA_TYPE_VIDEO)
    {
        printf("Couldn't find video stream information.（无法获取视频流信息）\n");
        return -1;
    }
    pCodecCtx_Video = pFormatCtx_Video->streams[0]->codec;
    pCodec_Video = avcodec_find_decoder(pCodecCtx_Video->codec_id);
    if(pCodec_Video == NULL)
    {
        printf("Codec not found.（没有找到解码器）\n");
        return -1;
    }
    if(avcodec_open2(pCodecCtx_Video, pCodec_Video, NULL) < 0)
    {
        printf("Could not open codec.（无法打开解码器）\n");
        return -1;
    }

    av_dump_format(pFormatCtx_Video, 0, NULL, 0);

    img_convert_ctx = sws_getContext(pCodecCtx_Video->width, 
        pCodecCtx_Video->height, 
        pCodecCtx_Video->pix_fmt, 
        pCodecCtx_Video->width,
        pCodecCtx_Video->height, 
        PIX_FMT_YUV420P, 
        SWS_BICUBIC, NULL, NULL, NULL); 

    frame_size = avpicture_get_size(pCodecCtx_Video->pix_fmt, pCodecCtx_Video->width, pCodecCtx_Video->height);
    fifo_video = av_fifo_alloc(60 * avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx_Video->width, pCodecCtx_Video->height));

    av_dict_free(&options);

    return 0;
}

int OpenAudioCapture()
{
    AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
    AVDictionary *opt = NULL;
    char * psDevName = dup_wchar_to_utf8(L"audio=virtual-audio-capturer");
    if (avformat_open_input(&pFormatCtx_Audio, psDevName, pAudioInputFmt, &opt) < 0)
    {
        printf("Couldn't open input stream.（无法打开音频输入流）\n");
        return -1;
    }

    if(avformat_find_stream_info(pFormatCtx_Audio,NULL)<0)  
        return -1; 

    if(pFormatCtx_Audio->streams[0]->codec->codec_type != AVMEDIA_TYPE_AUDIO)
    {
        printf("Couldn't find video stream information.（无法获取音频流信息）\n");
        return -1;
    }

    AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_Audio->streams[0]->codec->codec_id);
    if(0 > avcodec_open2(pFormatCtx_Audio->streams[0]->codec, tmpCodec, NULL))
    {
        printf("can not find or open audio decoder!\n");
    }

    av_dump_format(pFormatCtx_Audio, 0, NULL, 0);

    return 0;
}

int OpenOutPut()
{
    AVStream *pVideoStream = NULL, *pAudioStream = NULL;
    const char *outFileName = "test.mp4";
    avformat_alloc_output_context2(&pFormatCtx_Out, NULL, NULL, outFileName);
    if (pFormatCtx_Video->streams[0]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
    {
        VideoIndex = 0;
        pVideoStream = avformat_new_stream(pFormatCtx_Out, NULL);
        if (!pVideoStream)
        {
            printf("can not new stream for output!\n");
            return -1;
        }

        outVideoCodecCtx = avcodec_alloc_context3(outAVCodec);
        if ( !outVideoCodecCtx )
        {
            printf("Error : avcodec_alloc_context3()\n");
            return -1;
        }

        outVideoCodecCtx = pVideoStream->codec;
        outVideoCodecCtx->codec_id = AV_CODEC_ID_MPEG4;
        outVideoCodecCtx->width = pFormatCtx_Video->streams[0]->codec->width;
        outVideoCodecCtx->height = pFormatCtx_Video->streams[0]->codec->height;
        outVideoCodecCtx->time_base = pFormatCtx_Video->streams[0]->codec->time_base;;
        outVideoCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
        outVideoCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;

        if (codec_id == AV_CODEC_ID_H264)
        {
            av_opt_set(outVideoCodecCtx->priv_data, "preset", "slow", 0);
        }

        outAVCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
        if( !outAVCodec )
        {
            printf("\n\nError : avcodec_find_encoder()");
            return -1;
        }
        if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
            outVideoCodecCtx->flags |=CODEC_FLAG_GLOBAL_HEADER;

        if ((avcodec_open2(outVideoCodecCtx,outAVCodec, NULL)) < 0)
        {
            printf("can not open the encoder\n");
            return -1;
        }
    }
    if(pFormatCtx_Audio->streams[0]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
    {
        AVCodecContext *pOutputCodecCtx;
        AudioIndex = 1;
        pAudioStream = avformat_new_stream(pFormatCtx_Out, NULL);

        pAudioStream->codec->codec = avcodec_find_encoder(pFormatCtx_Out->oformat->audio_codec);

        pOutputCodecCtx = pAudioStream->codec;

        pOutputCodecCtx->sample_rate = pFormatCtx_Audio->streams[0]->codec->sample_rate;
        pOutputCodecCtx->channel_layout = pFormatCtx_Out->streams[0]->codec->channel_layout;
        pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pAudioStream->codec->channel_layout);
        if(pOutputCodecCtx->channel_layout == 0)
        {
            pOutputCodecCtx->channel_layout = AV_CH_LAYOUT_STEREO;
            pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pOutputCodecCtx->channel_layout);

        }
        pOutputCodecCtx->sample_fmt = pAudioStream->codec->codec->sample_fmts[0];
        AVRational time_base={1, pAudioStream->codec->sample_rate};
        pAudioStream->time_base = time_base;

        pOutputCodecCtx->codec_tag = 0;  
        if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)  
            pOutputCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;

        if (avcodec_open2(pOutputCodecCtx, pOutputCodecCtx->codec, 0) < 0)
        {
            printf("编码器打开失败，退出程序\n");
            return -1;
        }
    }

    if (!(pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
    {
        if(avio_open(&pFormatCtx_Out->pb, outFileName, AVIO_FLAG_WRITE) < 0)
        {
            printf("can not open output file handle!\n");
            return -1;
        }
    }
    if(avformat_write_header(pFormatCtx_Out, NULL) < 0)
    {
        printf("can not write the header of the output file!\n");
        return -1;
    }

    return 0;
}

int _tmain(int argc, _TCHAR* argv[])
{
    av_register_all();
    avdevice_register_all();
    if (OpenVideoCapture() < 0)
    {
        return -1;
    }
    if (OpenAudioCapture() < 0)
    {
        return -1;
    }
    if (OpenOutPut() < 0)
    {
        return -1;
    }

    InitializeCriticalSection(&VideoSection);
    InitializeCriticalSection(&AudioSection);

    AVFrame *picture = av_frame_alloc();
    int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, pFormatCtx_Out->streams[VideoIndex]->codec->height);
    picture_buf = new uint8_t[size];

    avpicture_fill((AVPicture *)picture, picture_buf, 
        pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    //star cap screen thread
    CreateThread( NULL, 0, ScreenCapThreadProc, 0, 0, NULL);
    //star cap audio thread
    CreateThread( NULL, 0, AudioCapThreadProc, 0, 0, NULL);
    int64_t cur_pts_v=0,cur_pts_a=0;
    int64_t VideoFrameIndex = 0, AudioFrameIndex = 0;
    int64_t count = 1;
    int64_t video_pre_pts = 0;

    while(1)
    {
        if (_kbhit() != 0 && bCap)
        {
            bCap = false;
            Sleep(2000);
        }
        if (fifo_audio && fifo_video)
        {
            int sizeAudio = av_audio_fifo_size(fifo_audio);
            int sizeVideo = av_fifo_size(fifo_video);
            //缓存数据写完就结束循环
            if (av_audio_fifo_size(fifo_audio) <= pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && 
                av_fifo_size(fifo_video) <= frame_size && !bCap)
            {
                break;
            }
        }

        if(av_compare_ts(cur_pts_v, pFormatCtx_Out->streams[VideoIndex]->time_base, cur_pts_a,pFormatCtx_Out->streams[AudioIndex]->time_base) <= 0)
        {
            if (av_fifo_size(fifo_video) < frame_size && !bCap)
            {
                cur_pts_v = 0x7fffffffffffffff;
            }
            if(av_fifo_size(fifo_video) >= size)
            {
                //将数据从avfifobuffer馈送到用户提供的回调。
                EnterCriticalSection(&VideoSection);
                av_fifo_generic_read(fifo_video, picture_buf, size, NULL); 
                LeaveCriticalSection(&VideoSection);

                //根据指定的图像参数和提供的图像数据缓冲区设置图片字段。
                avpicture_fill((AVPicture *)picture, picture_buf,
                    pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
                    pFormatCtx_Out->streams[VideoIndex]->codec->width,
                    pFormatCtx_Out->streams[VideoIndex]->codec->height); 

                //pts = n * (（1 / timbase）/ fps);
                //picture->pts = VideoFrameIndex * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 15);
                picture->pts = av_rescale_q(VideoFrameIndex,outVideoCodecCtx->time_base,pFormatCtx_Video->streams[0]->time_base);
                printf("picture->pts: %d\n",picture->pts);

                int got_picture = 0;
                AVPacket pkt;
                av_init_packet(&pkt);

                pkt.data = NULL;
                pkt.size = 0;
                //从帧中获取输入的原始视频数据
                int ret = avcodec_encode_video2(pFormatCtx_Out->streams[VideoIndex]->codec, &pkt, picture, &got_picture);
                if(ret < 0)
                {
                    continue;
                }

                if (got_picture==1)
                {
                    pkt.stream_index = VideoIndex;

                    //pFormatCtx_Video //pFormatCtx_Out
                    pkt.pts = av_rescale_q_rnd(pkt.pts, pFormatCtx_Video->streams[0]->time_base, pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX)); 
                    printf("pkt.pts = %d\n",pkt.pts);
                    pkt.dts = av_rescale_q_rnd(pkt.dts, pFormatCtx_Video->streams[0]->time_base, pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX)); 
                    printf("pkt.dts = %d\n",pkt.dts);

                    pkt.duration = ((pFormatCtx_Out->streams[0]->time_base.den / pFormatCtx_Out->streams[0]->time_base.num) / outVideoCodecCtx->time_base.den);
                    //pkt.duration = 1;
                    //pkt.duration = av_rescale_q(pkt.duration,outVideoCodecCtx->time_base,pFormatCtx_Video->streams[0]->time_base);

                    printf("pkt.duration = %d\n",pkt.duration);
                    pkt.pos = -1;

                    cur_pts_v = pkt.pts;

                    ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt);
                    if (ret<0)
                    {
                        printf("Error muxing packet\n");
                        break;
                    }
                    av_free_packet(&pkt);
                }
                VideoFrameIndex++;
            }
        }
        else
        {
            if (NULL == fifo_audio)
            {
                continue;//还未初始化fifo
            }
            if (av_audio_fifo_size(fifo_audio) < pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && !bCap)
            {
                cur_pts_a = 0x7fffffffffffffff;
            }
            if(av_audio_fifo_size(fifo_audio) >= 
                (pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024))
            {
                AVFrame *frame;
                frame = av_frame_alloc();
                frame->nb_samples = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size>0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size: 1024;
                frame->channel_layout = pFormatCtx_Out->streams[AudioIndex]->codec->channel_layout;
                frame->format = pFormatCtx_Out->streams[AudioIndex]->codec->sample_fmt;
                frame->sample_rate = pFormatCtx_Out->streams[AudioIndex]->codec->sample_rate;
                av_frame_get_buffer(frame, 0);

                EnterCriticalSection(&AudioSection);
                av_audio_fifo_read(fifo_audio, (void **)frame->data, 
                    (pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024));
                LeaveCriticalSection(&AudioSection);

                AVPacket pkt_out;
                av_init_packet(&pkt_out);
                int got_picture = -1;
                pkt_out.data = NULL;
                pkt_out.size = 0;

                frame->pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                if (avcodec_encode_audio2(pFormatCtx_Out->streams[AudioIndex]->codec, &pkt_out, frame, &got_picture) < 0)
                {
                    printf("can not decoder a frame");
                }
                av_frame_free(&frame);
                if (got_picture)
                {
                    pkt_out.stream_index = AudioIndex;
                    pkt_out.pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                    pkt_out.dts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                    pkt_out.duration = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;

                    cur_pts_a = pkt_out.pts;

                    int ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt_out);
                    if (ret<0)
                    {
                        printf("Error muxing packet\n");
                        break;
                    }
                    av_free_packet(&pkt_out);
                }
                AudioFrameIndex++;
            }
        }

    }

    delete[] picture_buf;

    av_fifo_free(fifo_video);
    av_audio_fifo_free(fifo_audio);

    av_write_trailer(pFormatCtx_Out);

    avio_close(pFormatCtx_Out->pb);
    avformat_free_context(pFormatCtx_Out);

    if (pFormatCtx_Video != NULL)
    {
        avformat_close_input(&pFormatCtx_Video);
        pFormatCtx_Video = NULL;
    }
    if (pFormatCtx_Audio != NULL)
    {
        avformat_close_input(&pFormatCtx_Audio);
        pFormatCtx_Audio = NULL;
    }

    return 0;
}

DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam )
{
    AVPacket packet;
    int got_picture;
    AVFrame *pFrame;
    pFrame=av_frame_alloc();

    AVFrame *picture = av_frame_alloc();
    int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    avpicture_fill((AVPicture *)picture, picture_buf, 
        pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    av_init_packet(&packet);
    int height = pFormatCtx_Out->streams[VideoIndex]->codec->height;
    int width = pFormatCtx_Out->streams[VideoIndex]->codec->width;
    int y_size=height*width;
    while(bCap)
    {
        packet.data = NULL;
        packet.size = 0;
        if (av_read_frame(pFormatCtx_Video, &packet) < 0)
        {
            continue;
        }
        if(packet.stream_index == 0)
        {
            if (avcodec_decode_video2(pCodecCtx_Video, pFrame, &got_picture, &packet) < 0)
            {
                printf("Decode Error.（解码错误）\n");
                continue;
            }
            if (got_picture)
            {
                sws_scale(img_convert_ctx, 
                    (const uint8_t* const*)pFrame->data,
                    pFrame->linesize, 
                    0, 
                    pFormatCtx_Out->streams[VideoIndex]->codec->height,
                    picture->data,
                    picture->linesize);

                if (av_fifo_space(fifo_video) >= size)
                {
                    EnterCriticalSection(&VideoSection);                    
                    av_fifo_generic_write(fifo_video, picture->data[0], y_size, NULL);
                    av_fifo_generic_write(fifo_video, picture->data[1], y_size/4, NULL);
                    av_fifo_generic_write(fifo_video, picture->data[2], y_size/4, NULL);
                    LeaveCriticalSection(&VideoSection);
                }
            }
        }
        av_free_packet(&packet);
    }
    av_frame_free(&pFrame);
    av_frame_free(&picture);
    return 0;
}

DWORD WINAPI AudioCapThreadProc( LPVOID lpParam )
{
    AVPacket pkt;
    AVFrame *frame;
    frame = av_frame_alloc();
    int gotframe;
    while(bCap)
    {
        pkt.data = NULL;
        pkt.size = 0;
        if(av_read_frame(pFormatCtx_Audio,&pkt) < 0)
        {
            continue;
        }

        if (avcodec_decode_audio4(pFormatCtx_Audio->streams[0]->codec, frame, &gotframe, &pkt) < 0)
        {
            av_frame_free(&frame);
            printf("can not decoder a frame");
            break;
        }
        av_free_packet(&pkt);

        if (!gotframe)
        {
            printf("没有获取到数据，继续下一次");
            continue;
        }

        if (NULL == fifo_audio)
        {
            fifo_audio = av_audio_fifo_alloc(pFormatCtx_Audio->streams[0]->codec->sample_fmt, 
                pFormatCtx_Audio->streams[0]->codec->channels, 30 * frame->nb_samples);
        }

        int buf_space = av_audio_fifo_space(fifo_audio);
        if (av_audio_fifo_space(fifo_audio) >= frame->nb_samples)
        {
            EnterCriticalSection(&AudioSection);
            av_audio_fifo_write(fifo_audio, (void **)frame->data, frame->nb_samples);
            LeaveCriticalSection(&AudioSection);
        }
    }
    av_frame_free(&frame);
    return 0;
}

ffmpeg录制视频播放太快

点= n *（（1 / timbase）/ fps）

为什么会发生这种情况？

1 个答案: