Question

我正在修剪视频并且很难让音频正确同步。下面的代码尽可能接近我的工作。我已经尝试了重新编码而不是重新编码输出流。

视频正确修剪并写入输出容器。音频流也正确修剪，但写入输出容器的前面。例如，如果修剪长度为10秒 - 音频的正确部分播放10秒，然后播放正确的视频部分。

//////// audio stream ////////
const AVStream *input_stream_audio = input_container->streams[audio_stream_index];
const AVCodec *decoder_audio = avcodec_find_decoder(input_stream_audio->codec->codec_id);
if(!decoder_audio) {
    cleanup(decoded_packet, output_container, decoded_frame);
    avformat_close_input(&input_container);
    LOGE("=> Audio decoder not found");
    return -1;
}
if(avcodec_open2(input_stream_audio->codec, decoder_audio, NULL) < 0) {
    cleanup(decoded_packet, output_container, decoded_frame);
    avformat_close_input(&input_container);
    LOGE("=> Error opening audio decoder");
    return -1;
}

AVStream *output_stream_audio = avformat_new_stream(output_container, NULL);
if(avcodec_copy_context(output_stream_audio->codec, input_stream_audio->codec) != 0){
    LOGE("=> Failed to Copy audio Context ");
    return -1;
}
else {
    LOGI("=> Copied audio context ");
    output_stream_audio->codec->codec_id = input_stream_audio->codec->codec_id;
    output_stream_audio->codec->codec_tag = 0;
    output_stream_audio->pts = input_stream_audio->pts;
    output_stream_audio->time_base.num = input_stream_audio->time_base.num;
    output_stream_audio->time_base.den = input_stream_audio->time_base.den;

}

if(avio_open(&output_container->pb, output_file, AVIO_FLAG_WRITE) < 0) {
    cleanup(decoded_packet, output_container, decoded_frame);
    avformat_close_input(&input_container);
    LOGE("=> Error opening output file");
    return -1;
}

// allocate frame for conversion
decoded_frame = avcodec_alloc_frame();
if(!decoded_frame) {
    cleanup(decoded_packet, output_container, decoded_frame);
    avformat_close_input(&input_container);
    LOGE("=> Error allocating frame");
    return -1;
}

av_dump_format(input_container, 0, input_file, 0);
avformat_write_header(output_container, NULL);
av_init_packet(&decoded_packet);

decoded_packet.data = NULL;
decoded_packet.size = 0;
int current_frame_num = 1;
int current_frame_num_audio = 1;
int got_frame, len;

AVRational default_timebase;
default_timebase.num = 1;
default_timebase.den = AV_TIME_BASE;

int64_t starttime_int64 = av_rescale_q((int64_t)( 12.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream->time_base);
int64_t endtime_int64 = av_rescale_q((int64_t)( 18.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream->time_base);
LOGI("=> starttime_int64:     %" PRId64, starttime_int64);
LOGI("=> endtime_int64:       %" PRId64, endtime_int64);

int64_t starttime_int64_audio = av_rescale_q((int64_t)( 12.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream_audio->time_base);
int64_t endtime_int64_audio = av_rescale_q((int64_t)( 18.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream_audio->time_base);
LOGI("=> starttime_int64_audio:     %" PRId64, starttime_int64_audio);
LOGI("=> endtime_int64_audio:       %" PRId64, endtime_int64_audio);

// loop input container and decode frames
while(av_read_frame(input_container, &decoded_packet)>=0) {
    // video packets
    if (decoded_packet.stream_index == video_stream_index) {
        len = avcodec_decode_video2(input_stream->codec, decoded_frame, &got_frame, &decoded_packet);
        if(len < 0) {
            cleanup(decoded_packet, output_container, decoded_frame);
            avformat_close_input(&input_container);
            LOGE("=> No frames to decode");
            return -1;
        }
        // this is the trim range we're looking for
        if(got_frame && decoded_frame->pkt_pts >= starttime_int64 && decoded_frame->pkt_pts <= endtime_int64) {
                av_init_packet(&encoded_packet);
                encoded_packet.data =  NULL;
                encoded_packet.size =  0;

                ret = avcodec_encode_video2(output_stream->codec, &encoded_packet, decoded_frame, &got_frame);
                if (ret < 0) {
                    cleanup(decoded_packet, output_container, decoded_frame);
                    avformat_close_input(&input_container);
                    LOGE("=> Error encoding frames");
                    return ret;
                }
                if(got_frame) {
                    if (output_stream->codec->coded_frame->key_frame) {
                        encoded_packet.flags |= AV_PKT_FLAG_KEY;
                    }

                    encoded_packet.stream_index = output_stream->index;
                    encoded_packet.pts = av_rescale_q(current_frame_num, output_stream->codec->time_base, output_stream->time_base);
                    encoded_packet.dts = av_rescale_q(current_frame_num, output_stream->codec->time_base, output_stream->time_base);

                    ret = av_interleaved_write_frame(output_container, &encoded_packet);
                    if (ret < 0) {
                        cleanup(decoded_packet, output_container, decoded_frame);
                        avformat_close_input(&input_container);
                        LOGE("=> Error encoding frames");
                        return ret;
                    }
                    else {
                        current_frame_num +=1;
                    }
                }
            av_free_packet(&encoded_packet);
        }
    }
    // audio packets
    else if(decoded_packet.stream_index == audio_stream_index) {
        // this is the trim range we're looking for
        if(decoded_packet.pts >= starttime_int64_audio && decoded_packet.pts <= endtime_int64_audio) {
            av_init_packet(&encoded_packet);

            encoded_packet.data =  decoded_packet.data;
            encoded_packet.size =  decoded_packet.size;
            encoded_packet.stream_index = audio_stream_index;
            encoded_packet.pts = av_rescale_q(current_frame_num_audio, output_stream_audio->codec->time_base, output_stream_audio->time_base);
            encoded_packet.dts = av_rescale_q(current_frame_num_audio, output_stream_audio->codec->time_base, output_stream_audio->time_base);

            ret = av_interleaved_write_frame(output_container, &encoded_packet);
            if (ret < 0) {
                cleanup(decoded_packet, output_container, decoded_frame);
                avformat_close_input(&input_container);
                LOGE("=> Error encoding frames");
                return ret;
            }
            else {
                current_frame_num_audio +=1;
            }
           av_free_packet(&encoded_packet);
        }
    }
}

修改

我对初始代码略有改进。音频和视频仍然没有完全同步，但是解决了视频首先播放的原始问题。

我现在正在将解码后的数据包写入输出容器，而不是重新编码。

最后虽然我遇到了同样的问题 - 修剪过的视频的音频和视频流并没有完全同步。

// audio packets
    else if(decoded_packet.stream_index == audio_stream_index) {
        // this is the trim range we're looking for
        if(decoded_packet.pts >= starttime_int64_audio && decoded_packet.pts <= endtime_int64_audio) {
            ret = av_interleaved_write_frame(output_container, &decoded_packet);
            if (ret < 0) {
                cleanup(decoded_packet, output_container, decoded_frame);
                avformat_close_input(&input_container);
                LOGE("=> Error writing audio frame (%s)", av_err2str(ret));
                return ret;
            }
            else {
                current_frame_num_audio +=1;
            }
        }
        else if(decoded_frame->pkt_pts > endtime_int64_audio) {
            audio_copy_complete = true;
        }
    }

Answer 1

我相信如果你正确设置编解码器上下文和流时基，你应该能够做到这一点，然后在encode_video2和encode_audio2之后你用这样的时基来调用av_packet_rescale_tb。

FFmpeg C API - 同步视频和音频

1 个答案: