我正在尝试使用VP8和Opus编码将音频和视频编码为webm文件。它几乎可以工作。 (我使用FFmpeg 3.3.2)
我可以创建一个唯一的视频webm文件并在VLC,FFPlay中播放并将其上传到YouTube(以及所有作品)。如果我将Opus声音添加到文件中,它仍可在VLC中使用,但在FFPlay或youtube上不,在youtube上,声音变得只是“滴答声”。
如果我只将Opus音频编码为webm文件,我会遇到同样的问题;它只适用于VLC。但是,如果我只将Opus音频编码为 ogg容器,它可以在任何地方使用,我甚至可以使用FFmpeg将ogg文件与仅视频webm文件合并,并生成带有音频和视频的完整工作webm文件
所以在我看来,只有当我使用我的代码将Opus编码为webm容器时,它才会在大多数播放器和YouTube中运行。我需要它在youtube上工作。
以下是opus仅限webm编码的代码(您可以使用定义切换ogg / webm):https://pastebin.com/jyQ4s3tB
#include <algorithm>
#include <iterator>
extern "C"
{
//#define OGG
#include "libavcodec/avcodec.h"
#include "libavdevice/avdevice.h"
#include "libavfilter/avfilter.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include "libavutil/imgutils.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
enum InfoCodes
{
ENCODED_VIDEO,
ENCODED_AUDIO,
ENCODED_AUDIO_AND_VIDEO,
NOT_ENOUGH_AUDIO_DATA,
};
enum ErrorCodes
{
RES_NOT_MUL_OF_TWO = -1,
ERROR_FINDING_VID_CODEC = -2,
ERROR_CONTEXT_CREATION = -3,
ERROR_CONTEXT_ALLOCATING = -4,
ERROR_OPENING_VID_CODEC = -5,
ERROR_OPENING_FILE = -6,
ERROR_ALLOCATING_FRAME = -7,
ERROR_ALLOCATING_PIC_BUF = -8,
ERROR_ENCODING_FRAME_SEND = -9,
ERROR_ENCODING_FRAME_RECEIVE = -10,
ERROR_FINDING_AUD_CODEC = -11,
ERROR_OPENING_AUD_CODEC = -12,
ERROR_INIT_RESMPL_CONTEXT = -13,
ERROR_ENCODING_SAMPLES_SEND = -14,
ERROR_ENCODING_SAMPLES_RECEIVE = -15,
ERROR_WRITING_HEADER = -16,
ERROR_INIT_AUDIO_RESPAMLER = -17,
};
AVCodecID aud_codec_comp_id = AV_CODEC_ID_OPUS;
AVSampleFormat sample_fmt_comp = AV_SAMPLE_FMT_FLT;
AVCodecID aud_codec_id;
AVSampleFormat sample_fmt;
#ifndef OGG
char* compressed_cont = "webm";
#endif
#ifdef OGG
char* compressed_cont = "ogg";
#endif
AVCodec *aud_codec = NULL;
AVCodecContext *aud_codec_context = NULL;
AVFormatContext *outctx;
AVStream *audio_st;
AVFrame *aud_frame;
SwrContext *audio_swr_ctx;
int vid_frame_counter, aud_frame_counter;
int vid_width, vid_height;
char* concat(const char *s1, const char *s2)
{
char *result = (char*)malloc(strlen(s1) + strlen(s2) + 1);
strcpy(result, s1);
strcat(result, s2);
return result;
}
int setup_audio_codec()
{
aud_codec_id = aud_codec_comp_id;
sample_fmt = sample_fmt_comp;
// Fixup audio codec
if (aud_codec == NULL)
{
aud_codec = avcodec_find_encoder(aud_codec_id);
avcodec_register(aud_codec);
}
if (!aud_codec)
return ERROR_FINDING_AUD_CODEC;
return 0;
}
int initialize_audio_stream(AVFormatContext *local_outctx, int sample_rate, int per_frame_audio_samples, int audio_bitrate)
{
aud_codec_context = avcodec_alloc_context3(aud_codec);
if (!aud_codec_context)
return ERROR_CONTEXT_CREATION;
aud_codec_context->bit_rate = audio_bitrate;
aud_codec_context->sample_rate = sample_rate;
aud_codec_context->sample_fmt = sample_fmt;
aud_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
aud_codec_context->channels = av_get_channel_layout_nb_channels(aud_codec_context->channel_layout);
//aud_codec_context->profile = FF_PROFILE_AAC_MAIN;
aud_codec_context->codec = aud_codec;
aud_codec_context->codec_id = aud_codec_id;
AVRational time_base;
time_base.num = per_frame_audio_samples;
time_base.den = aud_codec_context->sample_rate;
aud_codec_context->time_base = time_base;
int ret = avcodec_open2(aud_codec_context, aud_codec, NULL);
if (ret < 0)
return ERROR_OPENING_AUD_CODEC;
local_outctx->audio_codec = aud_codec;
local_outctx->audio_codec_id = aud_codec_id;
audio_st = avformat_new_stream(local_outctx, aud_codec);
audio_st->codecpar->bit_rate = aud_codec_context->bit_rate;
audio_st->codecpar->sample_rate = aud_codec_context->sample_rate;
audio_st->codecpar->channels = aud_codec_context->channels;
audio_st->codecpar->channel_layout = aud_codec_context->channel_layout;
audio_st->codecpar->codec_id = aud_codec_context->codec_id;
audio_st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
audio_st->codecpar->format = aud_codec_context->sample_fmt;
audio_st->codecpar->frame_size = aud_codec_context->frame_size;
audio_st->codecpar->block_align = aud_codec_context->block_align;
audio_st->codecpar->initial_padding = aud_codec_context->initial_padding;
audio_st->codecpar->extradata = aud_codec_context->extradata;
audio_st->codecpar->extradata_size = aud_codec_context->extradata_size;
aud_frame = av_frame_alloc();
aud_frame->nb_samples = aud_codec_context->frame_size;
aud_frame->format = aud_codec_context->sample_fmt;
aud_frame->channel_layout = aud_codec_context->channel_layout;
aud_frame->sample_rate = aud_codec_context->sample_rate;
int buffer_size;
if (aud_codec_context->frame_size == 0)
{
buffer_size = per_frame_audio_samples * 2 * 4;
aud_frame->nb_samples = per_frame_audio_samples;
}
else
{
buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
}
if (av_sample_fmt_is_planar(sample_fmt))
ret = av_frame_get_buffer(aud_frame, buffer_size / 2);
else
ret = av_frame_get_buffer(aud_frame, buffer_size);
if (!aud_frame || ret < 0)
return ERROR_ALLOCATING_FRAME;
aud_frame_counter = 0;
return 0;
}
int initialize_audio_only_encoding(int sample_rate, int per_frame_audio_samples, int audio_bitrate, const char *filename)
{
int ret;
avcodec_register_all();
av_register_all();
outctx = avformat_alloc_context();
char* with_dot = concat(filename, ".");
char* full_filename = concat(with_dot, compressed_cont);
ret = avformat_alloc_output_context2(&outctx, NULL, compressed_cont, full_filename);
free(with_dot);
if (ret < 0)
{
free(full_filename);
return ERROR_CONTEXT_CREATION;
}
ret = setup_audio_codec();
if (ret < 0)
return ret;
// Setup Audio
ret = initialize_audio_stream(outctx, sample_rate, per_frame_audio_samples, audio_bitrate);
if (ret < 0)
return ret;
av_dump_format(outctx, 0, full_filename, 1);
if (!(outctx->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&outctx->pb, full_filename, AVIO_FLAG_WRITE) < 0)
{
free(full_filename);
return ERROR_OPENING_FILE;
}
}
free(full_filename);
ret = avformat_write_header(outctx, NULL);
if (ret < 0)
return ERROR_WRITING_HEADER;
return 0;
}
int write_interleaved_audio_frame(float_t *aud_sample)
{
int ret;
aud_frame->data[0] = (uint8_t*)aud_sample;
aud_frame->extended_data[0] = (uint8_t*)aud_sample;
aud_frame->pts = aud_frame_counter++;
ret = avcodec_send_frame(aud_codec_context, aud_frame);
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
av_packet_rescale_ts(&pkt, aud_codec_context->time_base, audio_st->time_base);
pkt.stream_index = audio_st->index;
av_interleaved_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == AVERROR(EAGAIN))
break;
else if (ret < 0)
return ERROR_ENCODING_SAMPLES_RECEIVE;
else
break;
}
return ENCODED_AUDIO;
}
int write_audio_frame(float_t *aud_sample)
{
int ret;
aud_frame->data[0] = (uint8_t*)aud_sample;
aud_frame->extended_data[0] = (uint8_t*)aud_sample;
aud_frame->pts = aud_frame_counter++;
ret = avcodec_send_frame(aud_codec_context, aud_frame);
if (ret < 0)
return ERROR_ENCODING_FRAME_SEND;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
if (pkt.pts != AV_NOPTS_VALUE)
pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
if (pkt.dts != AV_NOPTS_VALUE)
pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);
{
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == AVERROR(EAGAIN))
break;
else if (ret < 0)
return ERROR_ENCODING_FRAME_RECEIVE;
else
break;
}
return ENCODED_AUDIO;
}
int finish_audio_encoding()
{
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
int ret = avcodec_send_frame(aud_codec_context, NULL);
if (ret < 0)
return ERROR_ENCODING_FRAME_SEND;
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
if (pkt.pts != AV_NOPTS_VALUE)
pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
if (pkt.dts != AV_NOPTS_VALUE)
pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == -AVERROR(AVERROR_EOF))
break;
else if (ret < 0)
return ERROR_ENCODING_FRAME_RECEIVE;
}
av_write_trailer(outctx);
return 0;
}
void cleanup()
{
if (aud_frame)
{
av_frame_free(&aud_frame);
}
if (outctx)
{
for (int i = 0; i < outctx->nb_streams; i++)
av_freep(&outctx->streams[i]);
avio_close(outctx->pb);
av_free(outctx);
}
if (aud_codec_context)
{
avcodec_close(aud_codec_context);
av_free(aud_codec_context);
}
}
void fill_samples(float_t *dst, int nb_samples, int nb_channels, int sample_rate, float_t *t)
{
int i, j;
float_t tincr = 1.0 / sample_rate;
const float_t c = 2 * M_PI * 440.0;
for (i = 0; i < nb_samples; i++) {
*dst = sin(c * *t);
for (j = 1; j < nb_channels; j++)
dst[j] = dst[0];
dst += nb_channels;
*t += tincr;
}
}
int main()
{
int sec = 5;
int frame_rate = 30;
float t = 0, tincr = 0, tincr2 = 0;
int src_samples_linesize;
int src_nb_samples = 960;
int src_channels = 2;
int sample_rate = 48000;
uint8_t **src_data = NULL;
int ret;
initialize_audio_only_encoding(48000, src_nb_samples, 192000, "sound_FLT_960");
ret = av_samples_alloc_array_and_samples(&src_data, &src_samples_linesize, src_channels,
src_nb_samples, AV_SAMPLE_FMT_FLT, 0);
for (size_t i = 0; i < frame_rate * sec; i++)
{
fill_samples((float *)src_data[0], src_nb_samples, src_channels, sample_rate, &t);
write_interleaved_audio_frame((float *)src_data[0]);
}
finish_audio_encoding();
cleanup();
return 0;
}
}
还有一些文件:
不的webm音频文件(仅限VLC): https://drive.google.com/file/d/0B16rIXjPXJCqcU5HVllIYW1iODg/view?usp=sharing
有效的ogg音频文件: https://drive.google.com/file/d/0B16rIXjPXJCqMUZhbW0tTDFjT1E/view?usp=sharing
仅适用于VLC的视频和音频文件:https://drive.google.com/file/d/0B16rIXjPXJCqX3pEN3B0QVlrekU/view?usp=sharing
如果在FFPlay中播放ogg文件,则表示“aq = 30kb”,但如果我播放webm音频文件,则会获得“aq = 0kb”。所以这似乎也不对。
有什么想法吗?提前谢谢!
编辑所以我只需将VP8和Opus编码到ogg容器中,然后将其重命名为.webm并将其上传到YouTube即可。我实际上并不知道ogg里面可能有视频。我真的不知道它如何影响编码和东西......我可以上传原始的ogg文件和视频,它也适用于YouTube。但我去webm的全部原因是它拥有的许可(https://www.webmproject.org/license/)......所以我现在有点困惑。
我需要了解“容器”在上下文中的含义以及更改扩展意味着什么。
任何评论都可以对此表示赞赏!