我正在使用ffmpeg libavformat库来编写仅限视频的webm文件。我在服务器上收到了VP8编码的rtp流。我已经成功地将rtp字节流(来自rtp payload)分组到各个帧中,并构建了一个AVPacket。我没有将有效负载重新编码到VP8,因为它已经是vp8编码的。
我正在使用av_write_interleaved()方法将AVPacket写入文件。虽然我得到一个webm文件作为输出,但它根本没有播放。当我使用mkv工具的'mkvinfo'命令检查文件的信息时,我发现了以下信息:
+ EBML head
|+ EBML version: 1
|+ EBML read version: 1
|+ EBML maximum ID length: 4
|+ EBML maximum size length: 8
|+ Doc type: webm
|+ Doc type version: 2
|+ Doc type read version: 2
+ Segment, size 2142500
|+ Seek head (subentries will be skipped)
|+ EbmlVoid (size: 170)
|+ Segment information
| + Timestamp scale: 1000000
| + Multiplexing application: Lavf58.0.100
| + Writing application: Lavf58.0.100
| + Duration: 78918744.480s (21921:52:24.480)
|+ Segment tracks
| + A track
| + Track number: 1 (track ID for mkvmerge & mkvextract: 0)
| + Track UID: 1
| + Lacing flag: 0
| + Name: Video Track
| + Language: eng
| + Codec ID: V_VP8
| + Track type: video
| + Default duration: 1.000ms (1000.000 frames/fields per second for a
video track)
| + Video track
| + Pixel width: 640
| + Pixel height: 480
|+ Tags
| + Tag
| + Targets
| + Simple
| + Name: ENCODER
| + String: Lavf58.0.100
| + Tag
| + Targets
| + TrackUID: 1
| + Simple
| + Name: DURATION
| + String: 21921:52:24.4800000
|+ Cluster
正如我们所看到的,流的持续时间非常不成比例地高。 (我的有效流持续时间应该在8-10秒左右)。并且,轨道信息中的帧速率也不是我设置的。我将帧速率设置为25 fps。
我正在应用av_scale_q(rtpTimeStamp,codec_timebase,stream_timebase)并将重新调整的rtpTimeStamp设置为pts和dts值。我的猜测是我设置pts和dts的方式是错误的。请帮助我如何在AVPacket上设置pts和dts值,以便获得一个带有正确元信息的webm文件。
编辑:
以下是我调用init库的代码:
#define STREAM_FRAME_RATE 25
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
AVFrame *frame;
} OutputStream;
typedef struct WebMWriter {
OutputStream *audioStream, *videoStream;
AVFormatContext *ctx;
AVOutputFormat *outfmt;
AVCodec *audioCodec, *videoCodec;
} WebMWriter;
static OutputStream audioStream = { 0 }, videoStream = { 0 };
WebMWriter *init(char *filename)
{
av_register_all();
AVFormatContext *ctx = NULL;
AVCodec *audioCodec = NULL, *videoCodec = NULL;
const char *fmt_name = NULL;
const char *file_name = filename;
int alloc_status = avformat_alloc_output_context2(&ctx, NULL, fmt_name, file_name);
if(!ctx)
return NULL;
AVOutputFormat *fmt = (*ctx).oformat;
AVDictionary *video_opt = NULL;
av_dict_set(&video_opt, "language", "eng", 0);
av_dict_set(&video_opt, "title", "Video Track", 0);
if(fmt->video_codec != AV_CODEC_ID_NONE)
{
addStream(&videoStream, ctx, &videoCodec, AV_CODEC_ID_VP8, video_opt);
}
if(videoStream.st)
openVideo1(&videoStream, videoCodec, NULL);
av_dump_format(ctx, 0, file_name, 1);
int ret = -1;
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ctx->pb, file_name, AVIO_FLAG_WRITE);
if (ret < 0) {
printf("Could not open '%s': %s\n", file_name, av_err2str(ret));
return NULL;
}
}
/* Write the stream header, if any. */
AVDictionary *format_opt = NULL;
ret = avformat_write_header(ctx, &format_opt);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
return NULL;
}
WebMWriter *webmWriter = malloc(sizeof(struct WebMWriter));
webmWriter->ctx = ctx;
webmWriter->outfmt = fmt;
webmWriter->audioStream = &audioStream;
webmWriter->videoStream = &videoStream;
webmWriter->videoCodec = videoCodec;
return webmWriter;
}
以下是openVideo()方法:
void openVideo1(OutputStream *out_st, AVCodec *codec, AVDictionary *opt_arg)
{
AVCodecContext *codec_ctx = out_st->enc;
int ret = -1;
AVDictionary *opt = NULL;
if(opt_arg != NULL)
{
av_dict_copy(&opt, opt_arg, 0);
ret = avcodec_open2(codec_ctx, codec, &opt);
}
else
{
ret = avcodec_open2(codec_ctx, codec, NULL);
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(out_st->st->codecpar, codec_ctx);
if (ret < 0) {
printf("Could not copy the stream parameters\n");
exit(1);
}
}
以下是addStream()方法:
void addStream(OutputStream *out_st, AVFormatContext *ctx, AVCodec **cdc, enum AVCodecID codecId, AVDictionary *opt_arg)
{
(*cdc) = avcodec_find_encoder(codecId);
if(!(*cdc)) {
exit(1);
}
/*as we are passing a NULL AVCodec cdc, So AVCodecContext codec_ctx will not be allocated, we have to do it explicitly */
AVStream *st = avformat_new_stream(ctx, *cdc);
if(!st) {
exit(1);
}
out_st->st = st;
st->id = ctx->nb_streams-1;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
st->metadata = opt;
AVCodecContext *codec_ctx = st->codec;
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
out_st->enc = codec_ctx;
codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
switch ((*cdc)->type) {
case AVMEDIA_TYPE_AUDIO:
codec_ctx->codec_id = codecId;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
codec_ctx->bit_rate = 64000;
codec_ctx->sample_rate = 48000;
codec_ctx->channels = 2;//1;
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;
codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
break;
case AVMEDIA_TYPE_VIDEO:
codec_ctx->codec_id = codecId;
codec_ctx->bit_rate = 90000;
codec_ctx->width = 640;
codec_ctx->height = 480;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = STREAM_PIX_FMT;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
break;
default:
break;
}
/* Some formats want stream headers to be separate. */
if (ctx->oformat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
以下是我调用的代码,用于将一帧数据写入文件:
int writeVideoStream(AVFormatContext *ctx, AVStream *st, uint8_t *data, int size, long frameTimeStamp, int isKeyFrame, AVCodecContext *codec_ctx)
{
AVRational rat = st->time_base;
AVPacket pkt = {0};
av_init_packet(&pkt);
void *opaque = NULL;
int flags = AV_BUFFER_FLAG_READONLY;
AVBufferRef *bufferRef = av_buffer_create(data, size, NULL, opaque, flags);
pkt.buf = bufferRef;
pkt.data = data;
pkt.size = size;
pkt.stream_index = st->index;
pkt.pts = pkt.dts = frameTimeStamp;
pkt.pts = av_rescale_q(pkt.pts, codec_ctx->time_base, st->time_base);
pkt.dts = av_rescale_q(pkt.dts, codec_ctx->time_base, st->time_base);
if(isKeyFrame == 1)
pkt.flags |= AV_PKT_FLAG_KEY;
int ret = av_interleaved_write_frame(ctx, &pkt);
return ret;
}
注意: 这里'frameTimeStamp'是该帧rtp包的rtp timeStamp。
EDIT 2.0:
我更新的带有codecpars的addStream()方法更改:
void addStream(OutputStream *out_st, AVFormatContext *ctx, AVCodec **cdc, enum AVCodecID codecId, AVDictionary *opt_arg)
{
(*cdc) = avcodec_find_encoder(codecId);
if(!(*cdc)) {
printf("@@@@@ couldnt find codec \n");
exit(1);
}
AVStream *st = avformat_new_stream(ctx, *cdc);
if(!st) {
printf("@@@@@ couldnt init stream\n");
exit(1);
}
out_st->st = st;
st->id = ctx->nb_streams-1;
AVCodecParameters *codecpars = st->codecpar;
codecpars->codec_id = codecId;
codecpars->codec_type = (*cdc)->type;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
st->metadata = opt;
//av_dict_free(&opt);
AVCodecContext *codec_ctx = st->codec;
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
out_st->enc = codec_ctx;
//since opus is experimental codec
//codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
switch ((*cdc)->type) {
case AVMEDIA_TYPE_AUDIO:
codec_ctx->codec_id = codecId;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;//AV_SAMPLE_FMT_U8 or AV_SAMPLE_FMT_S16;
codec_ctx->bit_rate = 64000;
codec_ctx->sample_rate = 48000;
codec_ctx->channels = 2;//1;
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO; //AV_CH_LAYOUT_MONO;
codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codecpars->format = codec_ctx->sample_fmt;
codecpars->channels = codec_ctx->channels;
codecpars->sample_rate = codec_ctx->sample_rate;
break;
case AVMEDIA_TYPE_VIDEO:
codec_ctx->codec_id = codecId;
codec_ctx->bit_rate = 90000;
codec_ctx->width = 640;
codec_ctx->height = 480;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = STREAM_PIX_FMT;
//codec_ctx->max_b_frames = 1;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->framerate = av_inv_q(codec_ctx->time_base);
st->avg_frame_rate = codec_ctx->framerate;//(AVRational){25000, 1000};
codecpars->format = codec_ctx->pix_fmt;
codecpars->width = codec_ctx->width;
codecpars->height = codec_ctx->height;
codecpars->sample_aspect_ratio = (AVRational){codec_ctx->width, codec_ctx->height};
break;
default:
break;
}
codecpars->bit_rate = codec_ctx->bit_rate;
int ret = avcodec_parameters_to_context(codec_ctx, codecpars);
if (ret < 0) {
printf("Could not copy the stream parameters\n");
exit(1);
}
/* Some formats want stream headers to be separate. */
if (ctx->oformat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
答案 0 :(得分:0)
我认为你是正确的训练pts / dts是问题,使用这个公式来手动计算时间戳,看它是否有效,然后你可以用av_rescale_q
来做。
这是我测试的公式(原始(yuv)输出):
int64_t frameTime;
int64_t frameDuration;
frameDuration = video_st->time_base.den / video_fps; // i.e. 25
frameTime = frame_count * frameDuration;
pkt->pts = frameTime / video_st->time_base.num;
pkt->duration = frameDuration;
pkt->dts = pkt->pts;
pkt->stream_index = video_st->index;
在av_interleaved_write_frame
之前使用此功能
注意:frame_count
这里是一个计数器,在每个视频帧输出后增加(使用av_interleaved_write_frame)。