aacenc.c 28.8 KB
Newer Older
K
Kostya Shishkov 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * AAC encoder
 * Copyright (C) 2008 Konstantin Shishkov
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
K
Kostya Shishkov 已提交
24 25 26 27 28
 * AAC encoder
 */

/***********************************
 *              TODOs:
29
 * add sane pulse detection
K
Kostya Shishkov 已提交
30
 * add temporal noise shaping
K
Kostya Shishkov 已提交
31 32
 ***********************************/

33
#include "libavutil/float_dsp.h"
34
#include "libavutil/opt.h"
K
Kostya Shishkov 已提交
35
#include "avcodec.h"
36
#include "put_bits.h"
J
Justin Ruggles 已提交
37
#include "internal.h"
K
Kostya Shishkov 已提交
38
#include "mpeg4audio.h"
39
#include "kbdwin.h"
40
#include "sinewin.h"
K
Kostya Shishkov 已提交
41 42 43

#include "aac.h"
#include "aactab.h"
44 45 46
#include "aacenc.h"

#include "psymodel.h"
K
Kostya Shishkov 已提交
47

48 49
#define AAC_MAX_CHANNELS 6

50 51 52 53 54 55
#define ERROR_IF(cond, ...) \
    if (cond) { \
        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
        return AVERROR(EINVAL); \
    }

56 57
float ff_aac_pow34sf_tab[428];

K
Kostya Shishkov 已提交
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
static const uint8_t swb_size_1024_96[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_64[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
};

static const uint8_t swb_size_1024_48[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
    96
};

static const uint8_t swb_size_1024_32[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
};

static const uint8_t swb_size_1024_24[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_16[] = {
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
};

static const uint8_t swb_size_1024_8[] = {
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
};

101
static const uint8_t *swb_size_1024[] = {
K
Kostya Shishkov 已提交
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
};

static const uint8_t swb_size_128_96[] = {
    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
};

static const uint8_t swb_size_128_48[] = {
    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
};

static const uint8_t swb_size_128_24[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
};

static const uint8_t swb_size_128_16[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
};

static const uint8_t swb_size_128_8[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
};

128
static const uint8_t *swb_size_128[] = {
K
Kostya Shishkov 已提交
129 130 131 132 133 134 135 136 137 138
    /* the last entry on the following row is swb_size_128_64 but is a
       duplicate of swb_size_128_96 */
    swb_size_128_96, swb_size_128_96, swb_size_128_96,
    swb_size_128_48, swb_size_128_48, swb_size_128_48,
    swb_size_128_24, swb_size_128_24, swb_size_128_16,
    swb_size_128_16, swb_size_128_16, swb_size_128_8
};

/** default channel configurations */
static const uint8_t aac_chan_configs[6][5] = {
139 140 141 142 143 144
 {1, TYPE_SCE},                               // 1 channel  - single channel element
 {1, TYPE_CPE},                               // 2 channels - channel pair
 {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
K
Kostya Shishkov 已提交
145 146
};

147
/**
148
 * Table to remap channels from libavcodec's default order to AAC order.
149 150 151 152 153 154 155 156 157 158
 */
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
    { 0 },
    { 0, 1 },
    { 2, 0, 1 },
    { 2, 0, 1, 3 },
    { 2, 0, 1, 3, 4 },
    { 2, 0, 1, 4, 5, 3 },
};

K
Kostya Shishkov 已提交
159 160 161 162 163 164 165 166 167 168 169 170
/**
 * Make AAC audio config object.
 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 */
static void put_audio_specific_config(AVCodecContext *avctx)
{
    PutBitContext pb;
    AACEncContext *s = avctx->priv_data;

    init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
    put_bits(&pb, 5, 2); //object type - AAC-LC
    put_bits(&pb, 4, s->samplerate_index); //sample rate index
171
    put_bits(&pb, 4, s->channels);
K
Kostya Shishkov 已提交
172 173 174 175
    //GASpecificConfig
    put_bits(&pb, 1, 0); //frame length - 1024 samples
    put_bits(&pb, 1, 0); //does not depend on core coder
    put_bits(&pb, 1, 0); //is not extension
A
Alex Converse 已提交
176 177

    //Explicitly Mark SBR absent
178
    put_bits(&pb, 11, 0x2b7); //sync extension
A
Alex Converse 已提交
179 180
    put_bits(&pb, 5,  AOT_SBR);
    put_bits(&pb, 1,  0);
K
Kostya Shishkov 已提交
181 182 183
    flush_put_bits(&pb);
}

N
Nathan Caldwell 已提交
184
#define WINDOW_FUNC(type) \
185
static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
186 187
                                    SingleChannelElement *sce, \
                                    const float *audio)
K
Kostya Shishkov 已提交
188

N
Nathan Caldwell 已提交
189 190 191 192
WINDOW_FUNC(only_long)
{
    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
193
    float *out = sce->ret_buf;
194

195 196
    fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
    fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
N
Nathan Caldwell 已提交
197
}
198

N
Nathan Caldwell 已提交
199 200 201 202
WINDOW_FUNC(long_start)
{
    const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
203
    float *out = sce->ret_buf;
N
Nathan Caldwell 已提交
204

205
    fdsp->vector_fmul(out, audio, lwindow, 1024);
206
    memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
207
    fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
N
Nathan Caldwell 已提交
208 209
    memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
}
210

N
Nathan Caldwell 已提交
211 212 213 214
WINDOW_FUNC(long_stop)
{
    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
215
    float *out = sce->ret_buf;
N
Nathan Caldwell 已提交
216 217

    memset(out, 0, sizeof(out[0]) * 448);
218
    fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
N
Nathan Caldwell 已提交
219
    memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
220
    fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
N
Nathan Caldwell 已提交
221
}
222

N
Nathan Caldwell 已提交
223 224 225 226 227
WINDOW_FUNC(eight_short)
{
    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *in = audio + 448;
228
    float *out = sce->ret_buf;
229
    int w;
N
Nathan Caldwell 已提交
230

231
    for (w = 0; w < 8; w++) {
232
        fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
N
Nathan Caldwell 已提交
233 234
        out += 128;
        in  += 128;
235
        fdsp->vector_fmul_reverse(out, in, swindow, 128);
N
Nathan Caldwell 已提交
236 237
        out += 128;
    }
K
Kostya Shishkov 已提交
238 239
}

240
static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
241 242
                                     SingleChannelElement *sce,
                                     const float *audio) = {
N
Nathan Caldwell 已提交
243 244 245 246 247 248
    [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
    [LONG_START_SEQUENCE]  = apply_long_start_window,
    [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
    [LONG_STOP_SEQUENCE]   = apply_long_stop_window
};

249 250
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
                                  float *audio)
251
{
N
Nathan Caldwell 已提交
252
    int i;
253
    float *output = sce->ret_buf;
254

255
    apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
N
Nathan Caldwell 已提交
256 257

    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
258
        s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
N
Nathan Caldwell 已提交
259 260 261 262
    else
        for (i = 0; i < 1024; i += 128)
            s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
    memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
263 264
}

K
Kostya Shishkov 已提交
265 266 267 268
/**
 * Encode ics_info element.
 * @see Table 4.6 (syntax of ics_info)
 */
K
Kostya Shishkov 已提交
269
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
K
Kostya Shishkov 已提交
270
{
271
    int w;
K
Kostya Shishkov 已提交
272 273 274 275

    put_bits(&s->pb, 1, 0);                // ics_reserved bit
    put_bits(&s->pb, 2, info->window_sequence[0]);
    put_bits(&s->pb, 1, info->use_kb_window[0]);
276
    if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
K
Kostya Shishkov 已提交
277 278
        put_bits(&s->pb, 6, info->max_sfb);
        put_bits(&s->pb, 1, 0);            // no prediction
279
    } else {
K
Kostya Shishkov 已提交
280
        put_bits(&s->pb, 4, info->max_sfb);
281
        for (w = 1; w < 8; w++)
282
            put_bits(&s->pb, 1, !info->group_len[w]);
K
Kostya Shishkov 已提交
283 284 285
    }
}

K
Kostya Shishkov 已提交
286
/**
287 288
 * Encode MS data.
 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
K
Kostya Shishkov 已提交
289
 */
290
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
K
Kostya Shishkov 已提交
291 292
{
    int i, w;
293 294

    put_bits(pb, 2, cpe->ms_mode);
295 296
    if (cpe->ms_mode == 1)
        for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
297
            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
298 299 300 301 302 303
                put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
}

/**
 * Produce integer coefficients from scalefactors provided by the model.
 */
304
static void adjust_frame_information(ChannelElement *cpe, int chans)
305 306
{
    int i, w, w2, g, ch;
307
    int start, maxsfb, cmaxsfb;
308

309
    for (ch = 0; ch < chans; ch++) {
310 311 312 313
        IndividualChannelStream *ics = &cpe->ch[ch].ics;
        start = 0;
        maxsfb = 0;
        cpe->ch[ch].pulse.num_pulse = 0;
314 315
        for (w = 0; w < ics->num_windows*16; w += 16) {
            for (g = 0; g < ics->num_swb; g++) {
316
                //apply M/S
317
                if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
318
                    for (i = 0; i < ics->swb_sizes[g]; i++) {
319 320 321 322 323 324
                        cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
                        cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
                    }
                }
                start += ics->swb_sizes[g];
            }
325 326
            for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
                ;
327 328 329 330 331
            maxsfb = FFMAX(maxsfb, cmaxsfb);
        }
        ics->max_sfb = maxsfb;

        //adjust zero bands for window groups
332 333
        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
            for (g = 0; g < ics->max_sfb; g++) {
334
                i = 1;
335 336
                for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
                    if (!cpe->ch[ch].zeroes[w2*16 + g]) {
337 338 339 340 341 342 343 344 345
                        i = 0;
                        break;
                    }
                }
                cpe->ch[ch].zeroes[w*16 + g] = i;
            }
        }
    }

346
    if (chans > 1 && cpe->common_window) {
347 348 349 350 351
        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
        int msc = 0;
        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
        ics1->max_sfb = ics0->max_sfb;
352 353
        for (w = 0; w < ics0->num_windows*16; w += 16)
            for (i = 0; i < ics0->max_sfb; i++)
354 355
                if (cpe->ms_mask[w+i])
                    msc++;
356 357 358
        if (msc == 0 || ics0->max_sfb == 0)
            cpe->ms_mode = 0;
        else
359
            cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
360 361 362 363 364 365 366 367 368 369
    }
}

/**
 * Encode scalefactor band coding type.
 */
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
{
    int w;

370
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
371 372 373 374 375 376
        s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
}

/**
 * Encode scalefactors.
 */
377 378
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
                                 SingleChannelElement *sce)
379 380 381 382
{
    int off = sce->sf_idx[0], diff;
    int i, w;

383 384 385
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (!sce->zeroes[w*16 + i]) {
386
                diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
387
                av_assert0(diff >= 0 && diff <= 120);
388 389 390
                off = sce->sf_idx[w*16 + i];
                put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
            }
K
Kostya Shishkov 已提交
391 392 393 394
        }
    }
}

395 396 397
/**
 * Encode pulse data.
 */
398
static void encode_pulses(AACEncContext *s, Pulse *pulse)
399 400 401 402
{
    int i;

    put_bits(&s->pb, 1, !!pulse->num_pulse);
403 404
    if (!pulse->num_pulse)
        return;
405 406 407

    put_bits(&s->pb, 2, pulse->num_pulse - 1);
    put_bits(&s->pb, 6, pulse->start);
408
    for (i = 0; i < pulse->num_pulse; i++) {
409
        put_bits(&s->pb, 5, pulse->pos[i]);
410 411 412 413 414 415 416
        put_bits(&s->pb, 4, pulse->amp[i]);
    }
}

/**
 * Encode spectral coefficients processed by psychoacoustic model.
 */
417
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
418
{
419
    int start, i, w, w2;
420

421
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
422
        start = 0;
423 424
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (sce->zeroes[w*16 + i]) {
425
                start += sce->ics.swb_sizes[i];
426 427
                continue;
            }
428
            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
429
                s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
430 431 432 433
                                                   sce->ics.swb_sizes[i],
                                                   sce->sf_idx[w*16 + i],
                                                   sce->band_type[w*16 + i],
                                                   s->lambda);
434
            start += sce->ics.swb_sizes[i];
435 436 437 438
        }
    }
}

439 440 441
/**
 * Encode one channel of audio data.
 */
442 443 444
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
                                     SingleChannelElement *sce,
                                     int common_window)
445 446
{
    put_bits(&s->pb, 8, sce->sf_idx[0]);
447 448
    if (!common_window)
        put_ics_info(s, &sce->ics);
449 450 451 452 453 454 455 456 457
    encode_band_info(s, sce);
    encode_scale_factors(avctx, s, sce);
    encode_pulses(s, &sce->pulse);
    put_bits(&s->pb, 1, 0); //tns
    put_bits(&s->pb, 1, 0); //ssr
    encode_spectral_coeffs(s, sce);
    return 0;
}

K
Kostya Shishkov 已提交
458 459 460
/**
 * Write some auxiliary information about the created AAC file.
 */
461
static void put_bitstream_info(AACEncContext *s, const char *name)
K
Kostya Shishkov 已提交
462 463 464 465
{
    int i, namelen, padbits;

    namelen = strlen(name) + 2;
466
    put_bits(&s->pb, 3, TYPE_FIL);
K
Kostya Shishkov 已提交
467
    put_bits(&s->pb, 4, FFMIN(namelen, 15));
468
    if (namelen >= 15)
469
        put_bits(&s->pb, 8, namelen - 14);
K
Kostya Shishkov 已提交
470
    put_bits(&s->pb, 4, 0); //extension type - filler
471
    padbits = -put_bits_count(&s->pb) & 7;
472
    avpriv_align_put_bits(&s->pb);
473
    for (i = 0; i < namelen - 2; i++)
K
Kostya Shishkov 已提交
474 475 476 477
        put_bits(&s->pb, 8, name[i]);
    put_bits(&s->pb, 12 - padbits, 0);
}

478
/*
J
Justin Ruggles 已提交
479
 * Copy input samples.
480
 * Channels are reordered from libavcodec's default order to AAC order.
481
 */
J
Justin Ruggles 已提交
482
static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
483
{
J
Justin Ruggles 已提交
484 485 486
    int ch;
    int end = 2048 + (frame ? frame->nb_samples : 0);
    const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
487

J
Justin Ruggles 已提交
488 489
    /* copy and remap input samples */
    for (ch = 0; ch < s->channels; ch++) {
490
        /* copy last 1024 samples of previous frame to the start of the current frame */
491
        memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
492

J
Justin Ruggles 已提交
493
        /* copy new samples and zero any remaining samples */
J
Justin Ruggles 已提交
494
        if (frame) {
J
Justin Ruggles 已提交
495 496 497
            memcpy(&s->planar_samples[ch][2048],
                   frame->extended_data[channel_map[ch]],
                   frame->nb_samples * sizeof(s->planar_samples[0][0]));
498
        }
J
Justin Ruggles 已提交
499 500
        memset(&s->planar_samples[ch][end], 0,
               (3072 - end) * sizeof(s->planar_samples[0][0]));
501 502 503
    }
}

J
Justin Ruggles 已提交
504 505
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                            const AVFrame *frame, int *got_packet_ptr)
506 507
{
    AACEncContext *s = avctx->priv_data;
508
    float **samples = s->planar_samples, *samples2, *la, *overlap;
509
    ChannelElement *cpe;
J
Justin Ruggles 已提交
510
    int i, ch, w, g, chans, tag, start_ch, ret;
511
    int chan_el_counter[4];
512
    FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
513

514
    if (s->last_frame == 2)
515
        return 0;
516

J
Justin Ruggles 已提交
517 518
    /* add current frame to queue */
    if (frame) {
519
        if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
J
Justin Ruggles 已提交
520 521 522
            return ret;
    }

J
Justin Ruggles 已提交
523
    copy_input_samples(s, frame);
524 525
    if (s->psypp)
        ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
526 527

    if (!avctx->frame_number)
528 529 530
        return 0;

    start_ch = 0;
531
    for (i = 0; i < s->chan_map[0]; i++) {
532
        FFPsyWindowInfo* wi = windows + start_ch;
533
        tag      = s->chan_map[i+1];
534 535
        chans    = tag == TYPE_CPE ? 2 : 1;
        cpe      = &s->cpe[i];
536 537 538
        for (ch = 0; ch < chans; ch++) {
            IndividualChannelStream *ics = &cpe->ch[ch].ics;
            int cur_channel = start_ch + ch;
539 540
            overlap  = &samples[cur_channel][0];
            samples2 = overlap + 1024;
541
            la       = samples2 + (448+64);
J
Justin Ruggles 已提交
542
            if (!frame)
543
                la = NULL;
544
            if (tag == TYPE_LFE) {
545 546 547 548
                wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
                wi[ch].window_shape   = 0;
                wi[ch].num_windows    = 1;
                wi[ch].grouping[0]    = 1;
549 550 551 552 553 554

                /* Only the lowest 12 coefficients are used in a LFE channel.
                 * The expression below results in only the bottom 8 coefficients
                 * being used for 11.025kHz to 16kHz sample rates.
                 */
                ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
555
            } else {
556
                wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
A
Alex Converse 已提交
557
                                              ics->window_sequence[0]);
558
            }
559
            ics->window_sequence[1] = ics->window_sequence[0];
560
            ics->window_sequence[0] = wi[ch].window_type[0];
561
            ics->use_kb_window[1]   = ics->use_kb_window[0];
562 563
            ics->use_kb_window[0]   = wi[ch].window_shape;
            ics->num_windows        = wi[ch].num_windows;
564
            ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
565
            ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
566 567
            for (w = 0; w < ics->num_windows; w++)
                ics->group_len[w] = wi[ch].grouping[w];
568

569
            apply_window_and_mdct(s, &cpe->ch[ch], overlap);
570 571 572 573
            if (isnan(cpe->ch->coeffs[0])) {
                av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
                return AVERROR(EINVAL);
            }
574 575 576
        }
        start_ch += chans;
    }
J
James Zern 已提交
577
    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
578
        return ret;
579 580
    do {
        int frame_bits;
J
Justin Ruggles 已提交
581 582 583

        init_put_bits(&s->pb, avpkt->data, avpkt->size);

A
Alex Converse 已提交
584
        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
585
            put_bitstream_info(s, LIBAVCODEC_IDENT);
A
Alex Converse 已提交
586 587
        start_ch = 0;
        memset(chan_el_counter, 0, sizeof(chan_el_counter));
588
        for (i = 0; i < s->chan_map[0]; i++) {
A
Alex Converse 已提交
589
            FFPsyWindowInfo* wi = windows + start_ch;
590
            const float *coeffs[2];
591
            tag      = s->chan_map[i+1];
A
Alex Converse 已提交
592 593
            chans    = tag == TYPE_CPE ? 2 : 1;
            cpe      = &s->cpe[i];
A
Alex Converse 已提交
594 595
            put_bits(&s->pb, 3, tag);
            put_bits(&s->pb, 4, chan_el_counter[tag]++);
596 597
            for (ch = 0; ch < chans; ch++)
                coeffs[ch] = cpe->ch[ch].coeffs;
598
            s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
599
            for (ch = 0; ch < chans; ch++) {
600
                s->cur_channel = start_ch + ch;
601
                s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
A
Alex Converse 已提交
602 603 604 605 606 607 608
            }
            cpe->common_window = 0;
            if (chans > 1
                && wi[0].window_type[0] == wi[1].window_type[0]
                && wi[0].window_shape   == wi[1].window_shape) {

                cpe->common_window = 1;
609 610
                for (w = 0; w < wi[0].num_windows; w++) {
                    if (wi[0].grouping[w] != wi[1].grouping[w]) {
A
Alex Converse 已提交
611 612 613
                        cpe->common_window = 0;
                        break;
                    }
614 615
                }
            }
616
            s->cur_channel = start_ch;
617 618 619 620 621 622 623 624 625 626
            if (s->options.stereo_mode && cpe->common_window) {
                if (s->options.stereo_mode > 0) {
                    IndividualChannelStream *ics = &cpe->ch[0].ics;
                    for (w = 0; w < ics->num_windows; w += ics->group_len[w])
                        for (g = 0;  g < ics->num_swb; g++)
                            cpe->ms_mask[w*16+g] = 1;
                } else if (s->coder->search_for_ms) {
                    s->coder->search_for_ms(s, cpe, s->lambda);
                }
            }
627
            adjust_frame_information(cpe, chans);
A
Alex Converse 已提交
628 629 630 631 632 633
            if (chans == 2) {
                put_bits(&s->pb, 1, cpe->common_window);
                if (cpe->common_window) {
                    put_ics_info(s, &cpe->ch[0].ics);
                    encode_ms_info(&s->pb, cpe);
                }
634
            }
635 636 637
            for (ch = 0; ch < chans; ch++) {
                s->cur_channel = start_ch + ch;
                encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
A
Alex Converse 已提交
638 639
            }
            start_ch += chans;
640 641
        }

642
        frame_bits = put_bits_count(&s->pb);
643 644
        if (frame_bits <= 6144 * s->channels - 3) {
            s->psy.bitres.bits = frame_bits / s->channels;
645
            break;
646
        }
647 648 649 650 651

        s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;

    } while (1);

652 653 654 655 656
    put_bits(&s->pb, 3, TYPE_END);
    flush_put_bits(&s->pb);
    avctx->frame_bits = put_bits_count(&s->pb);

    // rate control stuff
657
    if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
658 659
        float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
        s->lambda *= ratio;
660
        s->lambda = FFMIN(s->lambda, 65536.f);
661 662
    }

J
Justin Ruggles 已提交
663
    if (!frame)
664
        s->last_frame++;
665

J
Justin Ruggles 已提交
666 667 668 669 670 671
    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
                       &avpkt->duration);

    avpkt->size = put_bits_count(&s->pb) >> 3;
    *got_packet_ptr = 1;
    return 0;
672 673
}

K
Kostya Shishkov 已提交
674 675 676 677 678 679
static av_cold int aac_encode_end(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;

    ff_mdct_end(&s->mdct1024);
    ff_mdct_end(&s->mdct128);
680
    ff_psy_end(&s->psy);
681 682
    if (s->psypp)
        ff_psy_preprocess_end(s->psypp);
683
    av_freep(&s->buffer.samples);
K
Kostya Shishkov 已提交
684
    av_freep(&s->cpe);
685
    av_freep(&s->fdsp);
J
Justin Ruggles 已提交
686
    ff_af_queue_close(&s->afq);
K
Kostya Shishkov 已提交
687 688 689
    return 0;
}

690 691 692 693
static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
{
    int ret = 0;

694 695 696
    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
    if (!s->fdsp)
        return AVERROR(ENOMEM);
697 698 699 700 701 702 703

    // window init
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows(7);

704
    if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
705
        return ret;
706
    if (ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0))
707 708 709 710 711 712 713
        return ret;

    return 0;
}

static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
{
714
    int ch;
715 716
    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
717 718
    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);

719
    for(ch = 0; ch < s->channels; ch++)
720
        s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
721

722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
    return 0;
alloc_fail:
    return AVERROR(ENOMEM);
}

static av_cold int aac_encode_init(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;
    int i, ret = 0;
    const uint8_t *sizes[2];
    uint8_t grouping[AAC_MAX_CHANNELS];
    int lengths[2];

    avctx->frame_size = 1024;

    for (i = 0; i < 16; i++)
        if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
            break;

741 742
    s->channels = avctx->channels;

743
    ERROR_IF(i >= 12,
744
             "Unsupported sample rate %d\n", avctx->sample_rate);
745 746
    ERROR_IF(s->channels > AAC_MAX_CHANNELS,
             "Unsupported number of channels: %d\n", s->channels);
747 748
    ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
             "Unsupported profile %d\n", avctx->profile);
749
    ERROR_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
750 751 752 753
             "Too many bits per frame requested\n");

    s->samplerate_index = i;

754
    s->chan_map = aac_chan_configs[s->channels-1];
755

756
    if ((ret = dsp_init(avctx, s)) < 0)
757 758
        goto fail;

759
    if ((ret = alloc_buffers(avctx, s)) < 0)
760 761 762 763 764 765 766 767 768 769 770
        goto fail;

    avctx->extradata_size = 5;
    put_audio_specific_config(avctx);

    sizes[0]   = swb_size_1024[i];
    sizes[1]   = swb_size_128[i];
    lengths[0] = ff_aac_num_swb_1024[i];
    lengths[1] = ff_aac_num_swb_128[i];
    for (i = 0; i < s->chan_map[0]; i++)
        grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
771 772
    if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
                           s->chan_map[0], grouping)) < 0)
773 774
        goto fail;
    s->psypp = ff_psy_preprocess_init(avctx);
775
    s->coder = &ff_aac_coders[s->options.aac_coder];
776

777 778 779
    if (HAVE_MIPSDSPR1)
        ff_aac_coder_init_mips(s);

780
    s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
781 782 783

    ff_aac_tableinit();

784 785 786
    for (i = 0; i < 428; i++)
        ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));

787
    avctx->initial_padding = 1024;
J
Justin Ruggles 已提交
788 789
    ff_af_queue_init(avctx, &s->afq);

790 791 792 793 794 795
    return 0;
fail:
    aac_encode_end(avctx);
    return ret;
}

796 797
#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption aacenc_options[] = {
798
    {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
799 800 801
        {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
802 803 804 805 806
    {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
        {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
        {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
        {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
        {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
807 808 809 810 811 812 813 814 815 816
    {NULL}
};

static const AVClass aacenc_class = {
    "AAC encoder",
    av_default_item_name,
    aacenc_options,
    LIBAVUTIL_VERSION_INT,
};

817 818 819 820 821 822 823
/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
 * failures */
static const int mpeg4audio_sample_rates[16] = {
    96000, 88200, 64000, 48000, 44100, 32000,
    24000, 22050, 16000, 12000, 11025, 8000, 7350
};

824
AVCodec ff_aac_encoder = {
825
    .name           = "aac",
826
    .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
827
    .type           = AVMEDIA_TYPE_AUDIO,
828
    .id             = AV_CODEC_ID_AAC,
829 830
    .priv_data_size = sizeof(AACEncContext),
    .init           = aac_encode_init,
J
Justin Ruggles 已提交
831
    .encode2        = aac_encode_frame,
832
    .close          = aac_encode_end,
833
    .supported_samplerates = mpeg4audio_sample_rates,
834 835
    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
                      CODEC_CAP_EXPERIMENTAL,
J
Justin Ruggles 已提交
836
    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
837 838
                                                     AV_SAMPLE_FMT_NONE },
    .priv_class     = &aacenc_class,
K
Kostya Shishkov 已提交
839
};