Question

我想用iphone相机＆amp;麦克风捕获通过FFMPEG RTMP Streaming推出的信息

以下功能捕获有关IOS的信息

- (void)captureOutput:(AVCaptureOutput *)captureOutput  didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection
{    
    if (connection == videoCaptureConnection)
    {
        [manager264 encoderToH264:sampleBuffer];
    }
    else if (connection == audioCaptureConnection)
    {
        [manager264 encoderToMP3:sampleBuffer];
    }
}

初始化FFMPEG

- (int)setX264Resource
{
    Global_Variables_VVV = (AppDelegate *)[[UIApplication sharedApplication] delegate];
    avformat_network_init();
    av_register_all();

    pFormatCtx = avformat_alloc_context();
    avformat_alloc_output_context2(&pFormatCtx, NULL, "flv", out_file);
    fmt = pFormatCtx->oformat;

    //Open output URL
    if (avio_open(&pFormatCtx->pb, out_file, AVIO_FLAG_READ_WRITE) < 0)
    {
        printf("Failed to open output file! \n");
        return -1;
    }

    /* Add the audio and video streams using the default format codecs
     * and initialize the codecs. */
    video_st = NULL;
    audio_st = NULL;
   if (fmt->video_codec != AV_CODEC_ID_NONE) {
        video_st = add_stream(pFormatCtx, &pCodec, AV_CODEC_ID_H264);
    }
   if (fmt->audio_codec != AV_CODEC_ID_NONE) {
        audio_st = add_stream(pFormatCtx, &aCodec, AV_CODEC_ID_MP3);
    }

    /* Now that all the parameters are set, we can open the audio and
     * video codecs and allocate the necessary encode buffers. */
    if (video_st)
        [self open_video:pFormatCtx avcodec:pCodec avstream:video_st];

    if (audio_st)
        [self open_audio:pFormatCtx avcodec:aCodec avstream:audio_st];

    // Show some Information
    av_dump_format(pFormatCtx, 0, out_file, 1);

    //Write File Header
    avformat_write_header(pFormatCtx, NULL);

    av_new_packet(&pkt, picture_size);
    av_new_packet(&pkt2, picture_size);

    AVCodecContext *c = video_st->codec;

    y_size = c->width * c->height;

    if (pFrame)
        pFrame->pts = 0;

    if(aFrame)
    {
        aFrame->pts = 0;
    }

    return 0;
}

static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;
    /* find the encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec))
    {
        NSLog(@"Could not find encoder for '%s'\n",
          avcodec_get_name(codec_id));
    }
    st = avformat_new_stream(oc, *codec);
    if (!st)
    {
        NSLog(@"Could not allocate stream\n");
    }
    st->id = oc->nb_streams-1;
    c = st->codec;
    switch ((*codec)->type)
    {
        case AVMEDIA_TYPE_AUDIO:
            c->codec_id = AV_CODEC_ID_MP3;
            c->codec_type = AVMEDIA_TYPE_AUDIO;
            c->channels = 1;

            c->sample_fmt = AV_SAMPLE_FMT_S16P;
            c->bit_rate = 128000;
            c->sample_rate = 44100;
            c->channel_layout = AV_CH_LAYOUT_MONO;
            break;
        case AVMEDIA_TYPE_VIDEO:
            c->codec_id = AV_CODEC_ID_H264;
            c->codec_type=AVMEDIA_TYPE_VIDEO;
            /* Resolution must be a multiple of two. */
            c->width    = 720;
            c->height   = 1280;
            /* timebase: This is the fundamental unit of time (in seconds) in terms
             * of which frame timestamps are represented. For fixed-fps content,
             * timebase should be 1/framerate and timestamp increments should be
             * identical to 1. */
            c->time_base.den = 30;
            c->time_base.num = 1;
            c->gop_size      = 15; /* emit one intra frame every twelve frames at most */
            c->pix_fmt       = PIX_FMT_YUV420P;
            c->max_b_frames = 0;
            c->bit_rate = 3000000;
            c->qmin = 10;
            c->qmax = 51;

            break;
        default:
            break;
    }
    /* Some formats want stream headers to be separate. */
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;
    return st;
}

SampleBuffer将变为H264并推出RTMP Streaming

- (void)encoderToH264:(CMSampleBufferRef)sampleBuffer
{
    CVPixelBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
    if (CVPixelBufferLockBaseAddress(imageBuffer, 0) == kCVReturnSuccess)
    {
        UInt8 *bufferbasePtr = (UInt8 *)CVPixelBufferGetBaseAddress(imageBuffer);
        UInt8 *bufferPtr = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(imageBuffer,0);
        UInt8 *bufferPtr1 = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(imageBuffer,1);
        size_t buffeSize = CVPixelBufferGetDataSize(imageBuffer);
        size_t width = CVPixelBufferGetWidth(imageBuffer);
        size_t height = CVPixelBufferGetHeight(imageBuffer);
        size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
        size_t bytesrow0 = CVPixelBufferGetBytesPerRowOfPlane(imageBuffer,0);
        size_t bytesrow1  = CVPixelBufferGetBytesPerRowOfPlane(imageBuffer,1);
        size_t bytesrow2 = CVPixelBufferGetBytesPerRowOfPlane(imageBuffer,2);
        UInt8 *yuv420_data = (UInt8 *)malloc(width * height *3/ 2); // buffer to store YUV with layout YYYYYYYYUUVV


        /* convert NV12 data to YUV420*/
        UInt8 *pY = bufferPtr ;
        UInt8 *pUV = bufferPtr1;
        UInt8 *pU = yuv420_data + width*height;
        UInt8 *pV = pU + width*height/4;
        for(int i =0;i<height;i++)
        {
            memcpy(yuv420_data+i*width,pY+i*bytesrow0,width);
        }
        for(int j = 0;j<height/2;j++)
        {
            for(int i =0;i<width/2;i++)
            {
                *(pU++) = pUV[i<<1];
                *(pV++) = pUV[(i<<1) + 1];
            }
            pUV+=bytesrow1;
        }

        //Read raw YUV data
        picture_buf = yuv420_data;
        pFrame->data[0] = picture_buf;              // Y
        pFrame->data[1] = picture_buf+ y_size;      // U
        pFrame->data[2] = picture_buf+ y_size*5/4;  // V

        int got_picture = 0;

        // Encode
        pFrame->width = 720;
        pFrame->height = 1280;
        pFrame->format = PIX_FMT_YUV420P;

        AVCodecContext *c = video_st->codec;
        int ret = avcodec_encode_video2(c, &pkt, pFrame, &got_picture);
        if(ret < 0)
        {
            printf("Failed to encode! \n");
        }

        if (got_picture==1)
        {
            /* Compute current audio and video time. */
            video_time = video_st ? video_st->pts.val * av_q2d(video_st->time_base) : 0.0;
            pFrame->pts += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);

            if(pkt.size != 0)
            {
                printf("Succeed to encode frame: %5lld\tsize:%5d\n", pFrame->pts, pkt.size);
                pkt.stream_index = video_st->index;
                ret = av_write_frame(pFormatCtx, &pkt);
                av_free_packet(&pkt);
            }
        }
        free(yuv420_data);
    }
    CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
}

SampleBuffer将变为MP3并推出RTMP Streaming

-(void)encoderToMP3:(CMSampleBufferRef)sampleBuffer
{
    CMSampleTimingInfo timing_info;
    CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &timing_info);
    double  pts=0;
    double  dts=0;
    AVCodecContext *c;
    int got_packet, ret;
    c = audio_st->codec;
    CMItemCount numSamples = CMSampleBufferGetNumSamples(sampleBuffer);

    NSUInteger channelIndex = 0;

    CMBlockBufferRef audioBlockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);

    size_t audioBlockBufferOffset = (channelIndex * numSamples * sizeof(SInt16));
    size_t lengthAtOffset = 0;
    size_t totalLength = 0;
    SInt16 *samples = NULL;
    CMBlockBufferGetDataPointer(audioBlockBuffer, audioBlockBufferOffset, &lengthAtOffset, &totalLength, (char **)(&samples));

    const AudioStreamBasicDescription *audioDescription = CMAudioFormatDescriptionGetStreamBasicDescription(CMSampleBufferGetFormatDescription(sampleBuffer));

    SwrContext *swr = swr_alloc();

    int in_smprt = (int)audioDescription->mSampleRate;
    av_opt_set_int(swr, "in_channel_layout",  AV_CH_LAYOUT_MONO, 0);
    av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout,  0);

    av_opt_set_int(swr, "in_channel_count", audioDescription->mChannelsPerFrame,  0);
    av_opt_set_int(swr, "out_channel_count", 1,  0);

    av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout,  0);
    av_opt_set_int(swr, "in_sample_rate",     audioDescription->mSampleRate,0);

    av_opt_set_int(swr, "out_sample_rate",    audio_st->codec->sample_rate,0);

    av_opt_set_sample_fmt(swr, "in_sample_fmt",  AV_SAMPLE_FMT_S16, 0);

    av_opt_set_sample_fmt(swr, "out_sample_fmt", audio_st->codec->sample_fmt,  0);

    swr_init(swr);
    uint8_t **input = NULL;
    int src_linesize;
    int in_samples = (int)numSamples;
    ret = av_samples_alloc_array_and_samples(&input, &src_linesize, audioDescription->mChannelsPerFrame, in_samples, AV_SAMPLE_FMT_S16P, 0);

    *input=(uint8_t*)samples;
    uint8_t *output=NULL;

    int out_samples = av_rescale_rnd(swr_get_delay(swr, in_smprt) +in_samples, (int)audio_st->codec->sample_rate, in_smprt, AV_ROUND_UP);

    av_samples_alloc(&output, NULL, audio_st->codec->channels, out_samples, audio_st->codec->sample_fmt, 0);
    in_samples = (int)numSamples;
    out_samples = swr_convert(swr, &output, out_samples, (const uint8_t **)input, in_samples);

    aFrame->nb_samples =(int) out_samples;

    ret = avcodec_fill_audio_frame(aFrame, audio_st->codec->channels, audio_st->codec->sample_fmt,
                                   (uint8_t *)output,
                                   (int) out_samples *
                                   av_get_bytes_per_sample(audio_st->codec->sample_fmt) *
                                   audio_st->codec->channels, 1);
    if (ret < 0)
    {
        fprintf(stderr, "Error fill audio frame: %s\n", av_err2str(ret));
    }
    aFrame->channel_layout = audio_st->codec->channel_layout;
    aFrame->channels=audio_st->codec->channels;
    aFrame->sample_rate= audio_st->codec->sample_rate;

    if (timing_info.presentationTimeStamp.timescale!=0)
        pts=(double) timing_info.presentationTimeStamp.value/timing_info.presentationTimeStamp.timescale;


    aFrame->pts = pts*audio_st->time_base.den;
    aFrame->pts = av_rescale_q(aFrame->pts, audio_st->time_base, audio_st->codec->time_base);

    ret = avcodec_encode_audio2(c, &pkt2, aFrame, &got_packet);

    if (ret < 0)
    {
        fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));
    }
    swr_free(&swr);

    if (got_packet)
    {
        pkt2.stream_index = audio_st->index;        

        // Write the compressed frame to the media file.

        ret = av_interleaved_write_frame(pFormatCtx, &pkt2);
        if (ret != 0)
        {
            fprintf(stderr, "Error while writing audio frame: %s\n", av_err2str(ret));
            av_free_packet(&pkt2);
        }
    }
}

执行后很快出现“断管”问题。 PTS目前感觉不调整，但不知道如何调整PTS。

2016-03-09 16:57:41.058 PoliceCamPlayer[1004:193465] recordVideo....
[libx264 @ 0x12f8b6e00] using cpu capabilities: ARMv8 NEON
[libx264 @ 0x12f8b6e00] profile Constrained Baseline, level 3.1
[libx264 @ 0x12f8b6e00] 264 - core 148 - H.264/MPEG-4 AVC codec - Copyleft 2003-2016 - http://www.videolan.org/x264.html - options: cabac=0 ref=1 deblock=1:0:0 analyse=0x1:0x111 me=hex subme=2 psy=1 psy_rd=1.00:0.00 mixed_ref=0 me_range=16 chroma_me=1 trellis=0 8x8dct=0 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=0 threads=2 lookahead_threads=2 sliced_threads=1 slices=2 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=0 weightp=0 keyint=15 keyint_min=1 scenecut=40 intra_refresh=0 rc=abr mbtree=0 bitrate=3000 ratetol=1.0 qcomp=0.60 qpmin=25 qpmax=51 qpstep=4 ip_ratio=1.40 aq=1:1.00
Output #0, flv, to 'rtmp://XXX.XX.XXX.XX/myapp/jackal':
    Stream #0:0: Video: h264 (libx264), yuv420p, 720x1280, q=25-51, 3000 kb/s, 23 tbc
    Stream #0:1: Audio: mp3 (libmp3lame), 44100 Hz, mono, s16p, 64 kb/s
[flv @ 0x12f8b5400] Using AVStream.codec.time_base as a timebase hint to the muxer is deprecated. Set AVStream.time_base instead.
[flv @ 0x12f8b5400] Using AVStream.codec.time_base as a timebase hint to the muxer is deprecated. Set AVStream.time_base instead.
[libx264 @ 0x12f8b6e00] Provided packet is too small, needs to be 33468
Failed to encode! 
Audio_pts:4154432515 pts_time:4.15443e+06 dts:4154432515 dts_time:4.15443e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:43 pts_time:0.043 dts:43 dts_time:0.043 duration:0 duration_time:0 stream_index:0
Audio_pts:4154433667 pts_time:4.15443e+06 dts:4154433667 dts_time:4.15443e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154434854 pts_time:4.15443e+06 dts:4154434854 dts_time:4.15443e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:86 pts_time:0.086 dts:86 dts_time:0.086 duration:0 duration_time:0 stream_index:0
Audio_pts:4154435996 pts_time:4.15444e+06 dts:4154435996 dts_time:4.15444e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154437138 pts_time:4.15444e+06 dts:4154437138 dts_time:4.15444e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:129 pts_time:0.129 dts:129 dts_time:0.129 duration:0 duration_time:0 stream_index:0
Audio_pts:4154438281 pts_time:4.15444e+06 dts:4154438281 dts_time:4.15444e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:172 pts_time:0.172 dts:172 dts_time:0.172 duration:0 duration_time:0 stream_index:0
Audio_pts:4154439467 pts_time:4.15444e+06 dts:4154439467 dts_time:4.15444e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:215 pts_time:0.215 dts:215 dts_time:0.215 duration:0 duration_time:0 stream_index:0
Audio_pts:4154440609 pts_time:4.15444e+06 dts:4154440609 dts_time:4.15444e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154441752 pts_time:4.15444e+06 dts:4154441752 dts_time:4.15444e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:258 pts_time:0.258 dts:258 dts_time:0.258 duration:0 duration_time:0 stream_index:0
Audio_pts:4154442884 pts_time:4.15444e+06 dts:4154442884 dts_time:4.15444e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154444071 pts_time:4.15444e+06 dts:4154444071 dts_time:4.15444e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:301 pts_time:0.301 dts:301 dts_time:0.301 duration:0 duration_time:0 stream_index:0
Audio_pts:4154445213 pts_time:4.15445e+06 dts:4154445213 dts_time:4.15445e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154446355 pts_time:4.15445e+06 dts:4154446355 dts_time:4.15445e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:344 pts_time:0.344 dts:344 dts_time:0.344 duration:0 duration_time:0 stream_index:0
Audio_pts:4154447498 pts_time:4.15445e+06 dts:4154447498 dts_time:4.15445e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:387 pts_time:0.387 dts:387 dts_time:0.387 duration:0 duration_time:0 stream_index:0
Audio_pts:4154448640 pts_time:4.15445e+06 dts:4154448640 dts_time:4.15445e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154449826 pts_time:4.15445e+06 dts:4154449826 dts_time:4.15445e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:430 pts_time:0.43 dts:430 dts_time:0.43 duration:0 duration_time:0 stream_index:0
Audio_pts:4154450969 pts_time:4.15445e+06 dts:4154450969 dts_time:4.15445e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154452101 pts_time:4.15445e+06 dts:4154452101 dts_time:4.15445e+06 duration:1152 duration_time:1.152 stream_index:1
...................
...................
...................
Video_pts:4343 pts_time:4.343 dts:4343 dts_time:4.343 duration:0 duration_time:0 stream_index:0
Audio_pts:4154622619 pts_time:4.15462e+06 dts:4154622619 dts_time:4.15462e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:4386 pts_time:4.386 dts:4386 dts_time:4.386 duration:0 duration_time:0 stream_index:0
Audio_pts:4154623761 pts_time:4.15462e+06 dts:4154623761 dts_time:4.15462e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154624903 pts_time:4.15462e+06 dts:4154624903 dts_time:4.15462e+06 duration:1152 duration_time:1.152 stream_index:1
Audio_pts:4154626090 pts_time:4.15463e+06 dts:4154626090 dts_time:4.15463e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:4429 pts_time:4.429 dts:4429 dts_time:4.429 duration:0 duration_time:0 stream_index:0
Audio_pts:4154627222 pts_time:4.15463e+06 dts:4154627222 dts_time:4.15463e+06 duration:1152 duration_time:1.152 stream_index:1
Video_pts:4472 pts_time:4.472 dts:4472 dts_time:4.472 duration:0 duration_time:0 stream_index:0
Error while writing audio frame: Broken pipe
Audio_pts:4154628365 pts_time:4.15463e+06 dts:4154628365 dts_time:4.15463e+06 duration:1152 duration_time:1.152 stream_index:1
Error while writing audio frame: Broken pipe
Audio_pts:4154629507 pts_time:4.15463e+06 dts:4154629507 dts_time:4.15463e+06 duration:1152 duration_time:1.152 stream_index:1
Error while writing audio frame: Broken pipe
Audio_pts:4154630693 pts_time:4.15463e+06 dts:4154630693 dts_time:4.15463e+06 duration:1152 duration_time:1.152 stream_index:1
Error while writing audio frame: Broken pipe
Audio_pts:4154631836 pts_time:4.15463e+06 dts:4154631836 dts_time:4.15463e+06 duration:1152 duration_time:1.152 stream_index:1
Error while writing audio frame: Broken pipe
Audio_pts:4154632978 pts_time:4.15463e+06 dts:4154632978 dts_time:4.15463e+06 duration:1152 duration_time:1.152 stream_index:1
.......................
.......................
.......................
2016-03-09 16:57:49.345 PoliceCamPlayer[1004:193465] stopRecord!!!
Video_pts:7783 pts_time:7.783 dts:7783 dts_time:7.783 duration:0 duration_time:0 stream_index:0
[flv @ 0x12f8b5400] Failed to update header with correct duration.
[flv @ 0x12f8b5400] Failed to update header with correct filesize.
[libx264 @ 0x12f8b6e00] frame I:28    Avg QP:25.36  size: 24181
[libx264 @ 0x12f8b6e00] frame P:154   Avg QP:25.34  size:  6603
[libx264 @ 0x12f8b6e00] mb I  I16..4: 80.9%  0.0% 19.1%
[libx264 @ 0x12f8b6e00] mb P  I16..4:  5.9%  0.0%  0.2%  P16..4: 28.2%  4.4%  1.0%  0.0%  0.0%    skip:60.2%
[libx264 @ 0x12f8b6e00] final ratefactor: 16.70
[libx264 @ 0x12f8b6e00] coded y,uvDC,uvAC intra: 35.8% 9.3% 0.4% inter: 8.8% 1.6% 0.0%
[libx264 @ 0x12f8b6e00] i16 v,h,dc,p: 28% 26% 26% 21%
[libx264 @ 0x12f8b6e00] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 13% 26% 25%  3%  7%  4%  5%  3% 13%
[libx264 @ 0x12f8b6e00] i8c dc,h,v,p: 85%  9%  5%  0%
[libx264 @ 0x12f8b6e00] kb/s:1712.63

Answer 1

根据要发送到编码器的样本数量增加PTS。另外，不要忘记将时序从音频流重新调整为输出格式上下文。

因此，解决方法是：

audioFrame->pts = audioSamplesCounter; // starting from zero

然后，在编码（avcodec_encode_audio2）之后，计数器会根据发送到编码器的帧中的样本数量增加计数器（在您的情况下，这将不是您从CMSampleBuffer获得的数量，而是由SWR重新采样后的数量-“ out_samples ”）：

audioSamplesCounter += audioFrame->nb_samples;

在写入媒体输出文件之前，重新调整时间：

av_packet_rescale_ts(&audioPacket,
                     audioStream->codec->time_base,
                     outputFormatContext->streams[audioStream->index]->time_base);

此外，我还建议您使用设备资源来优化方法。

创建一次用于重新缩放/重采样的上下文，然后重新使用它们。
流开始时或第一个CMSampleBufferRef到达时，为音频和视频分配缓冲区。在重新启动流/会话之前，大小不会更改。这将对性能和内存消耗产生巨大的改善。
尽可能使用硬件加速。
不要忘记释放任何分配的数组和上下文。

希望它对您有帮助：）

使用FFMPEG mux flv并在IOS上发送rtmp

1 个答案: