提交 340fb9d6 编写于 作者: P pkviet 提交者: jp9000

obs-ffmpeg: Enable multiple audio tracks for FFmpeg output

This allows multiple audio tracks for the FFmpeg output.

Closes obsproject/obs-studio#1351
上级 b8a3ae1b
......@@ -45,6 +45,8 @@ struct ffmpeg_cfg {
int audio_encoder_id;
const char *video_settings;
const char *audio_settings;
int audio_mix_count;
int audio_tracks;
enum AVPixelFormat format;
enum AVColorRange color_range;
enum AVColorSpace color_space;
......@@ -56,7 +58,7 @@ struct ffmpeg_cfg {
struct ffmpeg_data {
AVStream *video;
AVStream *audio;
AVStream **audio_streams;
AVCodec *acodec;
AVCodec *vcodec;
AVFormatContext *output;
......@@ -68,14 +70,18 @@ struct ffmpeg_data {
uint64_t start_timestamp;
int64_t total_samples;
int64_t total_samples[MAX_AUDIO_MIXES];
uint32_t audio_samplerate;
enum audio_format audio_format;
size_t audio_planes;
size_t audio_size;
struct circlebuf excess_frames[MAX_AV_PLANES];
uint8_t *samples[MAX_AV_PLANES];
AVFrame *aframe;
int num_audio_streams;
/* audio_tracks is a bitmask storing the indices of the mixes */
int audio_tracks;
struct circlebuf excess_frames[MAX_AUDIO_MIXES][MAX_AV_PLANES];
uint8_t *samples[MAX_AUDIO_MIXES][MAX_AV_PLANES];
AVFrame *aframe[MAX_AUDIO_MIXES];
struct ffmpeg_cfg config;
......@@ -273,9 +279,9 @@ static bool create_video_stream(struct ffmpeg_data *data)
return true;
}
static bool open_audio_codec(struct ffmpeg_data *data)
static bool open_audio_codec(struct ffmpeg_data *data, int idx)
{
AVCodecContext *context = data->audio->codec;
AVCodecContext *context = data->audio_streams[idx]->codec;
char **opts = strlist_split(data->config.audio_settings, ' ', false);
int ret;
......@@ -284,16 +290,16 @@ static bool open_audio_codec(struct ffmpeg_data *data)
strlist_free(opts);
}
data->aframe = av_frame_alloc();
if (!data->aframe) {
data->aframe[idx] = av_frame_alloc();
if (!data->aframe[idx]) {
blog(LOG_WARNING, "Failed to allocate audio frame");
return false;
}
data->aframe->format = context->sample_fmt;
data->aframe->channels = context->channels;
data->aframe->channel_layout = context->channel_layout;
data->aframe->sample_rate = context->sample_rate;
data->aframe[idx]->format = context->sample_fmt;
data->aframe[idx]->channels = context->channels;
data->aframe[idx]->channel_layout = context->channel_layout;
data->aframe[idx]->sample_rate = context->sample_rate;
context->strict_std_compliance = -2;
......@@ -306,7 +312,7 @@ static bool open_audio_codec(struct ffmpeg_data *data)
data->frame_size = context->frame_size ? context->frame_size : 1024;
ret = av_samples_alloc(data->samples, NULL, context->channels,
ret = av_samples_alloc(data->samples[idx], NULL, context->channels,
data->frame_size, context->sample_fmt, 0);
if (ret < 0) {
blog(LOG_WARNING, "Failed to create audio buffer: %s",
......@@ -317,9 +323,10 @@ static bool open_audio_codec(struct ffmpeg_data *data)
return true;
}
static bool create_audio_stream(struct ffmpeg_data *data)
static bool create_audio_stream(struct ffmpeg_data *data, int idx)
{
AVCodecContext *context;
AVStream *stream;
struct obs_audio_info aoi;
if (!obs_get_audio_info(&aoi)) {
......@@ -327,17 +334,18 @@ static bool create_audio_stream(struct ffmpeg_data *data)
return false;
}
if (!new_stream(data, &data->audio, &data->acodec,
if (!new_stream(data, &stream, &data->acodec,
data->output->oformat->audio_codec,
data->config.audio_encoder))
return false;
context = data->audio->codec;
context->bit_rate = data->config.audio_bitrate * 1000;
context->time_base = (AVRational){ 1, aoi.samples_per_sec };
context->channels = get_audio_channels(aoi.speakers);
context->sample_rate = aoi.samples_per_sec;
context->channel_layout =
data->audio_streams[idx] = stream;
context = data->audio_streams[idx]->codec;
context->bit_rate = data->config.audio_bitrate * 1000;
context->time_base = (AVRational){ 1, aoi.samples_per_sec };
context->channels = get_audio_channels(aoi.speakers);
context->sample_rate = aoi.samples_per_sec;
context->channel_layout =
av_get_default_channel_layout(context->channels);
//AVlib default channel layout for 5 channels is 5.0 ; fix for 4.1
......@@ -347,7 +355,7 @@ static bool create_audio_stream(struct ffmpeg_data *data)
context->sample_fmt = data->acodec->sample_fmts ?
data->acodec->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
data->audio->time_base = context->time_base;
data->audio_streams[idx]->time_base = context->time_base;
data->audio_samplerate = aoi.samples_per_sec;
data->audio_format = convert_ffmpeg_sample_format(context->sample_fmt);
......@@ -357,7 +365,7 @@ static bool create_audio_stream(struct ffmpeg_data *data)
if (data->output->oformat->flags & AVFMT_GLOBALHEADER)
context->flags |= CODEC_FLAG_GLOBAL_H;
return open_audio_codec(data);
return open_audio_codec(data, idx);
}
static inline bool init_streams(struct ffmpeg_data *data)
......@@ -368,9 +376,14 @@ static inline bool init_streams(struct ffmpeg_data *data)
if (!create_video_stream(data))
return false;
if (format->audio_codec != AV_CODEC_ID_NONE)
if (!create_audio_stream(data))
return false;
if (format->audio_codec != AV_CODEC_ID_NONE && data->num_audio_streams) {
data->audio_streams = calloc(1,
data->num_audio_streams * sizeof(void*));
for (int i = 0; i < data->num_audio_streams; i++) {
if (!create_audio_stream(data, i))
return false;
}
}
return true;
}
......@@ -457,12 +470,14 @@ static void close_video(struct ffmpeg_data *data)
static void close_audio(struct ffmpeg_data *data)
{
for (size_t i = 0; i < MAX_AV_PLANES; i++)
circlebuf_free(&data->excess_frames[i]);
for (int idx = 0; idx < data->num_audio_streams; idx++) {
for (size_t i = 0; i < MAX_AV_PLANES; i++)
circlebuf_free(&data->excess_frames[idx][i]);
av_freep(&data->samples[0]);
avcodec_close(data->audio->codec);
av_frame_free(&data->aframe);
av_freep(&data->samples[idx][0]);
avcodec_close(data->audio_streams[idx]->codec);
av_frame_free(&data->aframe[idx]);
}
}
static void ffmpeg_data_free(struct ffmpeg_data *data)
......@@ -472,8 +487,11 @@ static void ffmpeg_data_free(struct ffmpeg_data *data)
if (data->video)
close_video(data);
if (data->audio)
if (data->audio_streams) {
close_audio(data);
free(data->audio_streams);
data->audio_streams = NULL;
}
if (data->output) {
if ((data->output->oformat->flags & AVFMT_NOFILE) == 0)
......@@ -528,7 +546,8 @@ static bool ffmpeg_data_init(struct ffmpeg_data *data,
memset(data, 0, sizeof(struct ffmpeg_data));
data->config = *config;
data->num_audio_streams = config->audio_mix_count;
data->audio_tracks = config->audio_tracks;
if (!config->url || !*config->url)
return false;
......@@ -766,7 +785,7 @@ static void receive_video(void *param, struct video_data *frame)
data->total_frames++;
}
static void encode_audio(struct ffmpeg_output *output,
static void encode_audio(struct ffmpeg_output *output, int idx,
struct AVCodecContext *context, size_t block_size)
{
struct ffmpeg_data *data = &output->ff_data;
......@@ -775,13 +794,13 @@ static void encode_audio(struct ffmpeg_output *output,
int ret, got_packet;
size_t total_size = data->frame_size * block_size * context->channels;
data->aframe->nb_samples = data->frame_size;
data->aframe->pts = av_rescale_q(data->total_samples,
data->aframe[idx]->nb_samples = data->frame_size;
data->aframe[idx]->pts = av_rescale_q(data->total_samples[idx],
(AVRational){1, context->sample_rate},
context->time_base);
ret = avcodec_fill_audio_frame(data->aframe, context->channels,
context->sample_fmt, data->samples[0],
ret = avcodec_fill_audio_frame(data->aframe[idx], context->channels,
context->sample_fmt, data->samples[idx][0],
(int)total_size, 1);
if (ret < 0) {
blog(LOG_WARNING, "encode_audio: avcodec_fill_audio_frame "
......@@ -789,10 +808,10 @@ static void encode_audio(struct ffmpeg_output *output,
return;
}
data->total_samples += data->frame_size;
data->total_samples[idx] += data->frame_size;
#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 40, 101)
ret = avcodec_send_frame(context, data->aframe);
ret = avcodec_send_frame(context, data->aframe[idx]);
if (ret == 0)
ret = avcodec_receive_packet(context, &packet);
......@@ -801,7 +820,7 @@ static void encode_audio(struct ffmpeg_output *output,
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
ret = 0;
#else
ret = avcodec_encode_audio2(context, &packet, data->aframe,
ret = avcodec_encode_audio2(context, &packet, data->aframe[idx],
&got_packet);
#endif
if (ret < 0) {
......@@ -813,11 +832,13 @@ static void encode_audio(struct ffmpeg_output *output,
if (!got_packet)
return;
packet.pts = rescale_ts(packet.pts, context, data->audio->time_base);
packet.dts = rescale_ts(packet.dts, context, data->audio->time_base);
packet.pts = rescale_ts(packet.pts, context,
data->audio_streams[idx]->time_base);
packet.dts = rescale_ts(packet.dts, context,
data->audio_streams[idx]->time_base);
packet.duration = (int)av_rescale_q(packet.duration, context->time_base,
data->audio->time_base);
packet.stream_index = data->audio->index;
data->audio_streams[idx]->time_base);
packet.stream_index = data->audio_streams[idx]->index;
pthread_mutex_lock(&output->write_mutex);
da_push_back(output->packets, &packet);
......@@ -853,18 +874,34 @@ static bool prepare_audio(struct ffmpeg_data *data,
return true;
}
static void receive_audio(void *param, struct audio_data *frame)
/* Given a bitmask for the selected tracks and the mix index,
* this returns the stream index which will be passed to the muxer. */
static int get_track_order(int track_config, size_t mix_index)
{
int position = 0;
for (size_t i = 0; i < mix_index; i++) {
if (track_config & 1 << i)
position++;
}
return position;
}
static void receive_audio(void *param, size_t mix_idx, struct audio_data *frame)
{
struct ffmpeg_output *output = param;
struct ffmpeg_data *data = &output->ff_data;
size_t frame_size_bytes;
struct audio_data in;
int track_order;
// codec doesn't support audio or none configured
if (!data->audio)
/* check that the track was selected */
if ((data->audio_tracks & (1 << mix_idx)) == 0)
return;
AVCodecContext *context = data->audio->codec;
/* get track order (first selected, etc ...) */
track_order = get_track_order(data->audio_tracks, mix_idx);
AVCodecContext *context = data->audio_streams[track_order]->codec;
if (!data->start_timestamp)
return;
......@@ -877,15 +914,16 @@ static void receive_audio(void *param, struct audio_data *frame)
frame_size_bytes = (size_t)data->frame_size * data->audio_size;
for (size_t i = 0; i < data->audio_planes; i++)
circlebuf_push_back(&data->excess_frames[i], in.data[i],
in.frames * data->audio_size);
circlebuf_push_back(&data->excess_frames[track_order][i],
in.data[i], in.frames * data->audio_size);
while (data->excess_frames[0].size >= frame_size_bytes) {
while (data->excess_frames[track_order][0].size >= frame_size_bytes) {
for (size_t i = 0; i < data->audio_planes; i++)
circlebuf_pop_front(&data->excess_frames[i],
data->samples[i], frame_size_bytes);
circlebuf_pop_front(&data->excess_frames[track_order][i],
data->samples[track_order][i],
frame_size_bytes);
encode_audio(output, context, data->audio_size);
encode_audio(output, track_order, context, data->audio_size);
}
}
......@@ -901,7 +939,7 @@ static uint64_t get_packet_sys_dts(struct ffmpeg_output *output,
time_base = data->video->time_base;
start_ts = output->video_start_ts;
} else {
time_base = data->audio->time_base;
time_base = data->audio_streams[0]->time_base;
start_ts = output->audio_start_ts;
}
......@@ -990,6 +1028,18 @@ static inline const char *get_string_or_null(obs_data_t *settings,
return value;
}
static int get_audio_mix_count(int audio_mix_mask)
{
int mix_count = 0;
for (int i = 0; i < MAX_AUDIO_MIXES; i++) {
if ((audio_mix_mask & (1 << i)) != 0) {
mix_count++;
}
}
return mix_count;
}
static bool try_connect(struct ffmpeg_output *output)
{
video_t *video = obs_output_video(output->output);
......@@ -1025,6 +1075,8 @@ static bool try_connect(struct ffmpeg_output *output)
config.height = (int)obs_output_get_height(output->output);
config.format = obs_to_ffmpeg_video_format(
video_output_get_format(video));
config.audio_tracks = (int)obs_output_get_mixers(output->output);
config.audio_mix_count = get_audio_mix_count(config.audio_tracks);
if (format_is_yuv(voi->format)) {
config.color_range = voi->range == VIDEO_RANGE_FULL ?
......@@ -1157,13 +1209,15 @@ static uint64_t ffmpeg_output_total_bytes(void *data)
struct obs_output_info ffmpeg_output = {
.id = "ffmpeg_output",
.flags = OBS_OUTPUT_AUDIO | OBS_OUTPUT_VIDEO,
.flags = OBS_OUTPUT_AUDIO |
OBS_OUTPUT_VIDEO |
OBS_OUTPUT_MULTI_TRACK,
.get_name = ffmpeg_output_getname,
.create = ffmpeg_output_create,
.destroy = ffmpeg_output_destroy,
.start = ffmpeg_output_start,
.stop = ffmpeg_output_stop,
.raw_video = receive_video,
.raw_audio = receive_audio,
.raw_audio2 = receive_audio,
.get_total_bytes = ffmpeg_output_total_bytes,
};
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册