目前我正在使用ffmpeg libs编写用于转码媒体文件的软件。问题是在H264的情况下,QuickTime无法播放结果流并显示黑屏。音频流按预期工作。我已经读过QuickTime只能处理yuv420p
像素格式,对于编码视频也是如此。
我查看了ffmpeg示例和ffmpeg源代码,找不到任何可以查找问题的线索。我真的很感激任何帮助。
我唯一能从QuickTime获得的是
控制台中的SeqAndPicParamSetFromCFDictionaryRef, bad config record
消息。 AVFlayer从AVFoundation记录了同样的事情。
这是输出流和编码器的初始化。
int status;
// avformat_alloc_output_context2()
if ((status = formatContext.open(destFilename)) < 0) {
return status;
}
AVDictionary *fmtOptions = nullptr;
av_dict_set(&fmtOptions, "movflags", "faststart", 0);
av_dict_set(&fmtOptions, "brand", "mp42", 0);
streams.resize(input->getStreamsCount());
for (int i = 0; i < input->getStreamsCount(); ++i) {
AVStream *inputStream = input->getStreamAtIndex(i);
CodecContext &decoderContext = input->getDecoderAtIndex(i);
// retrieve output codec by codec id
auto encoderCodecId = decoderContext.getCodecID();;
if (decoderContext.getCodecType() == AVMEDIA_TYPE_VIDEO || decoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO) {
int codecIdKey = decoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO ? IPROC_KEY_INT(TargetAudioCodecID) : IPROC_KEY_INT(TargetVideoCodecID);
auto codecIdParam = static_cast<AVCodecID>(params[codecIdKey]);
if (codecIdParam != AV_CODEC_ID_NONE) {
encoderCodecId = codecIdParam;
}
}
AVCodec *encoder = nullptr;
if ((encoder = avcodec_find_encoder(encoderCodecId)) == nullptr) {
status = AVERROR_ENCODER_NOT_FOUND;
return status;
}
// create stream with specific codec and format
AVStream *outputStream = nullptr;
// avformat_new_stream()
if ((outputStream = formatContext.newStream(encoder)) == nullptr) {
return AVERROR(ENOMEM);
}
CodecContext encoderContext;
// avcodec_alloc_context3()
if ((status = encoderContext.init(encoder)) < 0) {
return status;
}
outputStream->disposition = inputStream->disposition;
encoderContext.getRawCtx()->chroma_sample_location = decoderContext.getRawCtx()->chroma_sample_location;
if (encoderContext.getCodecType() == AVMEDIA_TYPE_VIDEO) {
auto lang = av_dict_get(input->getStreamAtIndex(i)->metadata, "language", nullptr, 0);
if (lang) {
av_dict_set(&outputStream->metadata, "language", lang->value, 0);
}
// prepare encoder context
int targetWidth = params[IPROC_KEY_INT(TargetVideoWidth)];
int targetHeight = params[IPROC_KEY_INT(TargetVideHeight)];
encoderContext.width() = targetWidth > 0 ? targetWidth : decoderContext.width();
encoderContext.height() = targetHeight > 0 ? targetHeight : decoderContext.height();
encoderContext.pixelFormat() = encoder->pix_fmts ? encoder->pix_fmts[0] : decoderContext.pixelFormat();;
encoderContext.timeBase() = decoderContext.timeBase();
encoderContext.getRawCtx()->level = 31;
encoderContext.getRawCtx()->gop_size = 25;
double far = static_cast<double>(encoderContext.getRawCtx()->width) / encoderContext.getRawCtx()->height;
double dar = static_cast<double>(decoderContext.width()) / decoderContext.height();
encoderContext.sampleAspectRatio() = av_d2q(dar / far, 255);
encoderContext.getRawCtx()->bits_per_raw_sample = FFMIN(decoderContext.getRawCtx()->bits_per_raw_sample,
av_pix_fmt_desc_get(encoderContext.pixelFormat())->comp[0].depth);
encoderContext.getRawCtx()->framerate = inputStream->r_frame_rate;
outputStream->avg_frame_rate = encoderContext.getRawCtx()->framerate;
VideoFilterGraphParameters params;
params.height = encoderContext.height();
params.width = encoderContext.width();
params.pixelFormat = encoderContext.pixelFormat();
if ((status = generateGraph(decoderContext, encoderContext, params, streams[i].filterGraph)) < 0) {
return status;
}
} else if (encoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO) {
auto lang = av_dict_get(input->getStreamAtIndex(i)->metadata, "language", nullptr, 0);
if (lang) {
av_dict_set(&outputStream->metadata, "language", lang->value, 0);
}
encoderContext.sampleRate() = params[IPROC_KEY_INT(TargetAudioSampleRate)] ? : decoderContext.sampleRate();
encoderContext.channels() = params[IPROC_KEY_INT(TargetAudioChannels)] ? : decoderContext.channels();
auto paramChannelLayout = params[IPROC_KEY_INT(TargetAudioChannelLayout)];
if (paramChannelLayout) {
encoderContext.channelLayout() = paramChannelLayout;
} else {
encoderContext.channelLayout() = av_get_default_channel_layout(encoderContext.channels());
}
AVSampleFormat sampleFormatParam = static_cast<AVSampleFormat>(params[IPROC_KEY_INT(TargetAudioSampleFormat)]);
if (sampleFormatParam != AV_SAMPLE_FMT_NONE) {
encoderContext.sampleFormat() = sampleFormatParam;
} else if (encoder->sample_fmts) {
encoderContext.sampleFormat() = encoder->sample_fmts[0];
} else {
encoderContext.sampleFormat() = decoderContext.sampleFormat();
}
encoderContext.timeBase().num = 1;
encoderContext.timeBase().den = encoderContext.sampleRate();
AudioFilterGraphParameters params;
params.channelLayout = encoderContext.channelLayout();
params.channels = encoderContext.channels();
params.format = encoderContext.sampleFormat();
params.sampleRate = encoderContext.sampleRate();
if ((status = generateGraph(decoderContext, encoderContext, params, streams[i].filterGraph)) < 0) {
return status;
}
}
// before using encoder, we should open it and update its parameters
printf("Codec bits per sample %d\n", av_get_bits_per_sample(encoderCodecId));
AVDictionary *options = nullptr;
// avcodec_open2()
if ((status = encoderContext.open(encoder, &options)) < 0) {
return status;
}
if (streams[i].filterGraph) {
streams[i].filterGraph.setOutputFrameSize(encoderContext.getFrameSize());
}
// avcodec_parameters_from_context()
if ((status = encoderContext.fillParamters(outputStream->codecpar)) < 0) {
return status;
}
outputStream->codecpar->format = encoderContext.getRawCtx()->pix_fmt;
if (formatContext.getRawCtx()->oformat->flags & AVFMT_GLOBALHEADER) {
encoderContext.getRawCtx()->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if (encoderContext.getRawCtx()->nb_coded_side_data) {
int i;
for (i = 0; i < encoderContext.getRawCtx()->nb_coded_side_data; i++) {
const AVPacketSideData *sd_src = &encoderContext.getRawCtx()->coded_side_data[i];
uint8_t *dst_data;
dst_data = av_stream_new_side_data(outputStream, sd_src->type, sd_src->size);
if (!dst_data)
return AVERROR(ENOMEM);
memcpy(dst_data, sd_src->data, sd_src->size);
}
}
/*
* Add global input side data. For now this is naive, and copies it
* from the input stream's global side data. All side data should
* really be funneled over AVFrame and libavfilter, then added back to
* packet side data, and then potentially using the first packet for
* global side data.
*/
for (int i = 0; i < inputStream->nb_side_data; i++) {
AVPacketSideData *sd = &inputStream->side_data[i];
uint8_t *dst = av_stream_new_side_data(outputStream, sd->type, sd->size);
if (!dst)
return AVERROR(ENOMEM);
memcpy(dst, sd->data, sd->size);
}
// copy timebase while removing common factors
if (outputStream->time_base.num <= 0 || outputStream->time_base.den <= 0) {
outputStream->time_base = av_add_q(encoderContext.timeBase(), (AVRational){0, 1});
}
// copy estimated duration as a hint to the muxer
if (outputStream->duration <= 0 && inputStream->duration > 0) {
outputStream->duration = av_rescale_q(inputStream->duration, inputStream->time_base, outputStream->time_base);
}
streams[i].codecType = encoderContext.getRawCtx()->codec_type;
streams[i].codec = std::move(encoderContext);
streams[i].streamIndex = i;
}
// avio_open() and avformat_write_header()
if ((status = formatContext.writeHeader(fmtOptions)) < 0) {
return status;
}
formatContext.dumpFormat();
从溪流中读取。
int InputProcessor::performStep() {
int status;
Packet nextPacket;
if ((status = input->getFormatContext().readFrame(nextPacket)) < 0) {
return status;
}
++streams[nextPacket.getStreamIndex()].readPackets;
int streamIndex = nextPacket.getStreamIndex();
CodecContext &decoder = input->getDecoderAtIndex(streamIndex);
AVStream *inputStream = input->getStreamAtIndex(streamIndex);
if (streams[nextPacket.getStreamIndex()].readPackets == 1) {
for (int i = 0; i < inputStream->nb_side_data; ++i) {
AVPacketSideData *src_sd = &inputStream->side_data[i];
uint8_t *dst_data;
if (src_sd->type == AV_PKT_DATA_DISPLAYMATRIX) {
continue;
}
if (av_packet_get_side_data(nextPacket.getRawPtr(), src_sd->type, nullptr)) {
continue;
}
dst_data = av_packet_new_side_data(nextPacket.getRawPtr(), src_sd->type, src_sd->size);
if (!dst_data) {
return AVERROR(ENOMEM);
}
memcpy(dst_data, src_sd->data, src_sd->size);
}
}
nextPacket.rescaleTimestamps(inputStream->time_base, decoder.timeBase());
status = decodePacket(&nextPacket, nextPacket.getStreamIndex());
if (status < 0 && status != AVERROR(EAGAIN)) {
return status;
}
return 0;
}
这是解码/编码代码。
int InputProcessor::decodePacket(Packet *packet, int streamIndex) {
int status;
int sendStatus;
auto &decoder = input->getDecoderAtIndex(streamIndex);
do {
if (packet == nullptr) {
sendStatus = decoder.flushDecodedFrames();
} else {
sendStatus = decoder.sendPacket(*packet);
}
if (sendStatus < 0 && sendStatus != AVERROR(EAGAIN) && sendStatus != AVERROR_EOF) {
return sendStatus;
}
if (sendStatus == 0 && packet) {
++streams[streamIndex].decodedPackets;
}
Frame decodedFrame;
while (true) {
if ((status = decoder.receiveFrame(decodedFrame)) < 0) {
break;
}
++streams[streamIndex].decodedFrames;
if ((status = filterAndWriteFrame(&decodedFrame, streamIndex)) < 0) {
break;
}
decodedFrame.unref();
}
} while (sendStatus == AVERROR(EAGAIN));
return status;
}
int InputProcessor::encodeAndWriteFrame(Frame *frame, int streamIndex) {
assert(input->isValid());
assert(formatContext);
int status = 0;
int sendStatus;
Packet packet;
CodecContext &encoderContext = streams[streamIndex].codec;
do {
if (frame) {
sendStatus = encoderContext.sendFrame(*frame);
} else {
sendStatus = encoderContext.flushEncodedPackets();
}
if (sendStatus < 0 && sendStatus != AVERROR(EAGAIN) && sendStatus != AVERROR_EOF) {
return status;
}
if (sendStatus == 0 && frame) {
++streams[streamIndex].encodedFrames;
}
while (true) {
if ((status = encoderContext.receivePacket(packet)) < 0) {
break;
}
++streams[streamIndex].encodedPackets;
packet.setStreamIndex(streamIndex);
auto sourceTimebase = encoderContext.timeBase();
auto dstTimebase = formatContext.getStreams()[streamIndex]->time_base;
packet.rescaleTimestamps(sourceTimebase, dstTimebase);
if ((status = formatContext.writeFrameInterleaved(packet)) < 0) {
return status;
}
packet.unref();
}
} while (sendStatus == AVERROR(EAGAIN));
if (status != AVERROR(EAGAIN)) {
return status;
}
return 0;
}
原始视频的FFprobe输出。
Input #0, matroska,webm, from 'testvideo':
Metadata:
title : TestVideo
encoder : libebml v1.3.0 + libmatroska v1.4.0
creation_time : 2014-12-23T03:38:05.000000Z
Duration: 00:02:29.25, start: 0.000000, bitrate: 79549 kb/s
Stream #0:0(rus): Video: h264 (High 4:4:4 Predictive), yuv444p10le(pc, bt709, progressive), 2048x858 [SAR 1:1 DAR 1024:429], 24 fps, 24 tbr, 1k tbn, 48 tbc (default)
Stream #0:1(rus): Audio: pcm_s24le, 48000 Hz, 6 channels, s32 (24 bit), 6912 kb/s (default)
进行转码:
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '123.mp4':
Metadata:
major_brand : mp42
minor_version : 512
compatible_brands: isomiso2avc1mp41
encoder : Lavf57.71.100
Duration: 00:02:29.27, start: 0.000000, bitrate: 4282 kb/s
Stream #0:0(rus): Video: h264 (High) (avc1 / 0x31637661), yuv420p, 1280x720 [SAR 192:143 DAR 1024:429], 3940 kb/s, 24.01 fps, 24 tbr, 12288 tbn, 96 tbc (default)
Metadata:
handler_name : VideoHandler
Stream #0:1(rus): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, 5.1, fltp, 336 kb/s (default)
Metadata:
handler_name : SoundHandler
答案 0 :(得分:2)
当初始化编码器时,问题在于错误的步骤顺序。在transcoding.c
示例中,他们在调用CODEC_FLAG_GLOBAL_HEADER
后将AVCodecContext.flags
分配给avcodec_open2()
属性。我认为这是正确的,并在我的代码中做了同样的事情。它导致extradata
字段未初始化,QuickTime无法解析结果流。在打开编解码器之前设置标志解决了问题。
结果代码:
// should be placed before avcodec_open2
if (formatContext.getRawCtx()->oformat->flags & AVFMT_GLOBALHEADER) {
encoderContext.getRawCtx()->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
// before using encoder, we should open it and update its parameters
printf("Codec bits per sample %d\n", av_get_bits_per_sample(encoderCodecId));
AVDictionary *options = nullptr;
if ((status = encoderContext.open(encoder, &options)) < 0) {
return status;
}