aacenc.c 27.8 KB
Newer Older
K
Kostya Shishkov 已提交
1 2 3 4
/*
 * AAC encoder
 * Copyright (C) 2008 Konstantin Shishkov
 *
5
 * This file is part of Libav.
K
Kostya Shishkov 已提交
6
 *
7
 * Libav is free software; you can redistribute it and/or
K
Kostya Shishkov 已提交
8 9 10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
12
 * Libav is distributed in the hope that it will be useful,
K
Kostya Shishkov 已提交
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
K
Kostya Shishkov 已提交
19 20 21 22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
K
Kostya Shishkov 已提交
24 25 26 27 28
 * AAC encoder
 */

/***********************************
 *              TODOs:
29
 * add sane pulse detection
K
Kostya Shishkov 已提交
30
 * add temporal noise shaping
K
Kostya Shishkov 已提交
31 32
 ***********************************/

33
#include "libavutil/float_dsp.h"
34
#include "libavutil/opt.h"
K
Kostya Shishkov 已提交
35
#include "avcodec.h"
36
#include "put_bits.h"
K
Kostya Shishkov 已提交
37
#include "dsputil.h"
J
Justin Ruggles 已提交
38
#include "internal.h"
K
Kostya Shishkov 已提交
39
#include "mpeg4audio.h"
40
#include "kbdwin.h"
41
#include "sinewin.h"
K
Kostya Shishkov 已提交
42 43 44

#include "aac.h"
#include "aactab.h"
45 46 47
#include "aacenc.h"

#include "psymodel.h"
K
Kostya Shishkov 已提交
48

49 50
#define AAC_MAX_CHANNELS 6

51 52 53 54 55 56
#define ERROR_IF(cond, ...) \
    if (cond) { \
        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
        return AVERROR(EINVAL); \
    }

57 58
float ff_aac_pow34sf_tab[428];

K
Kostya Shishkov 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
static const uint8_t swb_size_1024_96[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_64[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
};

static const uint8_t swb_size_1024_48[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
    96
};

static const uint8_t swb_size_1024_32[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
};

static const uint8_t swb_size_1024_24[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_16[] = {
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
};

static const uint8_t swb_size_1024_8[] = {
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
};

102
static const uint8_t *swb_size_1024[] = {
K
Kostya Shishkov 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
};

static const uint8_t swb_size_128_96[] = {
    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
};

static const uint8_t swb_size_128_48[] = {
    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
};

static const uint8_t swb_size_128_24[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
};

static const uint8_t swb_size_128_16[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
};

static const uint8_t swb_size_128_8[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
};

129
static const uint8_t *swb_size_128[] = {
K
Kostya Shishkov 已提交
130 131 132 133 134 135 136 137 138 139
    /* the last entry on the following row is swb_size_128_64 but is a
       duplicate of swb_size_128_96 */
    swb_size_128_96, swb_size_128_96, swb_size_128_96,
    swb_size_128_48, swb_size_128_48, swb_size_128_48,
    swb_size_128_24, swb_size_128_24, swb_size_128_16,
    swb_size_128_16, swb_size_128_16, swb_size_128_8
};

/** default channel configurations */
static const uint8_t aac_chan_configs[6][5] = {
140 141 142 143 144 145
 {1, TYPE_SCE},                               // 1 channel  - single channel element
 {1, TYPE_CPE},                               // 2 channels - channel pair
 {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
K
Kostya Shishkov 已提交
146 147
};

148 149 150 151 152 153 154 155 156 157 158 159
/**
 * Table to remap channels from Libav's default order to AAC order.
 */
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
    { 0 },
    { 0, 1 },
    { 2, 0, 1 },
    { 2, 0, 1, 3 },
    { 2, 0, 1, 3, 4 },
    { 2, 0, 1, 4, 5, 3 },
};

K
Kostya Shishkov 已提交
160 161 162 163 164 165 166 167 168 169 170 171
/**
 * Make AAC audio config object.
 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 */
static void put_audio_specific_config(AVCodecContext *avctx)
{
    PutBitContext pb;
    AACEncContext *s = avctx->priv_data;

    init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
    put_bits(&pb, 5, 2); //object type - AAC-LC
    put_bits(&pb, 4, s->samplerate_index); //sample rate index
172
    put_bits(&pb, 4, s->channels);
K
Kostya Shishkov 已提交
173 174 175 176
    //GASpecificConfig
    put_bits(&pb, 1, 0); //frame length - 1024 samples
    put_bits(&pb, 1, 0); //does not depend on core coder
    put_bits(&pb, 1, 0); //is not extension
A
Alex Converse 已提交
177 178

    //Explicitly Mark SBR absent
179
    put_bits(&pb, 11, 0x2b7); //sync extension
A
Alex Converse 已提交
180 181
    put_bits(&pb, 5,  AOT_SBR);
    put_bits(&pb, 1,  0);
K
Kostya Shishkov 已提交
182 183 184
    flush_put_bits(&pb);
}

N
Nathan Caldwell 已提交
185
#define WINDOW_FUNC(type) \
186 187 188
static void apply_ ##type ##_window(DSPContext *dsp, AVFloatDSPContext *fdsp, \
                                    SingleChannelElement *sce, \
                                    const float *audio)
N
Nathan Caldwell 已提交
189 190 191 192 193

WINDOW_FUNC(only_long)
{
    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
194
    float *out = sce->ret_buf;
N
Nathan Caldwell 已提交
195

196
    fdsp->vector_fmul       (out,        audio,        lwindow, 1024);
N
Nathan Caldwell 已提交
197 198 199 200 201 202 203
    dsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
}

WINDOW_FUNC(long_start)
{
    const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
204
    float *out = sce->ret_buf;
N
Nathan Caldwell 已提交
205

206
    fdsp->vector_fmul(out, audio, lwindow, 1024);
207 208
    memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
    dsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
N
Nathan Caldwell 已提交
209 210 211 212 213 214 215
    memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
}

WINDOW_FUNC(long_stop)
{
    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
216
    float *out = sce->ret_buf;
N
Nathan Caldwell 已提交
217 218

    memset(out, 0, sizeof(out[0]) * 448);
219
    fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
N
Nathan Caldwell 已提交
220 221 222 223 224 225 226 227 228
    memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
    dsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
}

WINDOW_FUNC(eight_short)
{
    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *in = audio + 448;
229
    float *out = sce->ret_buf;
230
    int w;
N
Nathan Caldwell 已提交
231

232
    for (w = 0; w < 8; w++) {
233
        fdsp->vector_fmul       (out, in, w ? pwindow : swindow, 128);
N
Nathan Caldwell 已提交
234 235 236 237 238 239 240
        out += 128;
        in  += 128;
        dsp->vector_fmul_reverse(out, in, swindow, 128);
        out += 128;
    }
}

241 242 243
static void (*const apply_window[4])(DSPContext *dsp, AVFloatDSPContext *fdsp,
                                     SingleChannelElement *sce,
                                     const float *audio) = {
N
Nathan Caldwell 已提交
244 245 246 247 248 249
    [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
    [LONG_START_SEQUENCE]  = apply_long_start_window,
    [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
    [LONG_STOP_SEQUENCE]   = apply_long_stop_window
};

250 251
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
                                  float *audio)
252
{
N
Nathan Caldwell 已提交
253
    int i;
254
    float *output = sce->ret_buf;
255

256
    apply_window[sce->ics.window_sequence[0]](&s->dsp, &s->fdsp, sce, audio);
N
Nathan Caldwell 已提交
257 258

    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
259
        s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
N
Nathan Caldwell 已提交
260 261 262 263
    else
        for (i = 0; i < 1024; i += 128)
            s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
    memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
264 265
}

K
Kostya Shishkov 已提交
266 267 268 269
/**
 * Encode ics_info element.
 * @see Table 4.6 (syntax of ics_info)
 */
K
Kostya Shishkov 已提交
270
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
K
Kostya Shishkov 已提交
271
{
272
    int w;
K
Kostya Shishkov 已提交
273 274 275 276

    put_bits(&s->pb, 1, 0);                // ics_reserved bit
    put_bits(&s->pb, 2, info->window_sequence[0]);
    put_bits(&s->pb, 1, info->use_kb_window[0]);
277
    if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
K
Kostya Shishkov 已提交
278 279
        put_bits(&s->pb, 6, info->max_sfb);
        put_bits(&s->pb, 1, 0);            // no prediction
280
    } else {
K
Kostya Shishkov 已提交
281
        put_bits(&s->pb, 4, info->max_sfb);
282
        for (w = 1; w < 8; w++)
283
            put_bits(&s->pb, 1, !info->group_len[w]);
K
Kostya Shishkov 已提交
284 285 286
    }
}

K
Kostya Shishkov 已提交
287
/**
288 289
 * Encode MS data.
 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
K
Kostya Shishkov 已提交
290
 */
291
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
K
Kostya Shishkov 已提交
292 293
{
    int i, w;
294 295

    put_bits(pb, 2, cpe->ms_mode);
296 297
    if (cpe->ms_mode == 1)
        for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
298
            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
299 300 301 302 303 304
                put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
}

/**
 * Produce integer coefficients from scalefactors provided by the model.
 */
305
static void adjust_frame_information(ChannelElement *cpe, int chans)
306 307
{
    int i, w, w2, g, ch;
308
    int start, maxsfb, cmaxsfb;
309

310
    for (ch = 0; ch < chans; ch++) {
311 312 313 314
        IndividualChannelStream *ics = &cpe->ch[ch].ics;
        start = 0;
        maxsfb = 0;
        cpe->ch[ch].pulse.num_pulse = 0;
315 316
        for (w = 0; w < ics->num_windows*16; w += 16) {
            for (g = 0; g < ics->num_swb; g++) {
317
                //apply M/S
318
                if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
319
                    for (i = 0; i < ics->swb_sizes[g]; i++) {
320 321 322 323 324 325
                        cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
                        cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
                    }
                }
                start += ics->swb_sizes[g];
            }
326 327
            for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
                ;
328 329 330 331 332
            maxsfb = FFMAX(maxsfb, cmaxsfb);
        }
        ics->max_sfb = maxsfb;

        //adjust zero bands for window groups
333 334
        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
            for (g = 0; g < ics->max_sfb; g++) {
335
                i = 1;
336 337
                for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
                    if (!cpe->ch[ch].zeroes[w2*16 + g]) {
338 339 340 341 342 343 344 345 346
                        i = 0;
                        break;
                    }
                }
                cpe->ch[ch].zeroes[w*16 + g] = i;
            }
        }
    }

347
    if (chans > 1 && cpe->common_window) {
348 349 350 351 352
        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
        int msc = 0;
        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
        ics1->max_sfb = ics0->max_sfb;
353 354
        for (w = 0; w < ics0->num_windows*16; w += 16)
            for (i = 0; i < ics0->max_sfb; i++)
355 356
                if (cpe->ms_mask[w+i])
                    msc++;
357 358 359
        if (msc == 0 || ics0->max_sfb == 0)
            cpe->ms_mode = 0;
        else
360
            cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
361 362 363 364 365 366 367 368 369 370
    }
}

/**
 * Encode scalefactor band coding type.
 */
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
{
    int w;

371
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
372 373 374 375 376 377
        s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
}

/**
 * Encode scalefactors.
 */
378 379
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
                                 SingleChannelElement *sce)
380 381 382 383
{
    int off = sce->sf_idx[0], diff;
    int i, w;

384 385 386
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (!sce->zeroes[w*16 + i]) {
387
                diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
388 389
                if (diff < 0 || diff > 120)
                    av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
390 391 392
                off = sce->sf_idx[w*16 + i];
                put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
            }
K
Kostya Shishkov 已提交
393 394 395 396
        }
    }
}

397 398 399
/**
 * Encode pulse data.
 */
400
static void encode_pulses(AACEncContext *s, Pulse *pulse)
401 402 403 404
{
    int i;

    put_bits(&s->pb, 1, !!pulse->num_pulse);
405 406
    if (!pulse->num_pulse)
        return;
407 408 409

    put_bits(&s->pb, 2, pulse->num_pulse - 1);
    put_bits(&s->pb, 6, pulse->start);
410
    for (i = 0; i < pulse->num_pulse; i++) {
411
        put_bits(&s->pb, 5, pulse->pos[i]);
412 413 414 415 416 417 418
        put_bits(&s->pb, 4, pulse->amp[i]);
    }
}

/**
 * Encode spectral coefficients processed by psychoacoustic model.
 */
419
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
420
{
421
    int start, i, w, w2;
422

423
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
424
        start = 0;
425 426
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (sce->zeroes[w*16 + i]) {
427
                start += sce->ics.swb_sizes[i];
428 429
                continue;
            }
430
            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
431
                s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
432 433 434 435
                                                   sce->ics.swb_sizes[i],
                                                   sce->sf_idx[w*16 + i],
                                                   sce->band_type[w*16 + i],
                                                   s->lambda);
436
            start += sce->ics.swb_sizes[i];
437 438 439 440
        }
    }
}

441 442 443
/**
 * Encode one channel of audio data.
 */
444 445 446
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
                                     SingleChannelElement *sce,
                                     int common_window)
447 448
{
    put_bits(&s->pb, 8, sce->sf_idx[0]);
449 450
    if (!common_window)
        put_ics_info(s, &sce->ics);
451 452 453 454 455 456 457 458 459
    encode_band_info(s, sce);
    encode_scale_factors(avctx, s, sce);
    encode_pulses(s, &sce->pulse);
    put_bits(&s->pb, 1, 0); //tns
    put_bits(&s->pb, 1, 0); //ssr
    encode_spectral_coeffs(s, sce);
    return 0;
}

K
Kostya Shishkov 已提交
460 461 462
/**
 * Write some auxiliary information about the created AAC file.
 */
463
static void put_bitstream_info(AACEncContext *s, const char *name)
K
Kostya Shishkov 已提交
464 465 466 467
{
    int i, namelen, padbits;

    namelen = strlen(name) + 2;
468
    put_bits(&s->pb, 3, TYPE_FIL);
K
Kostya Shishkov 已提交
469
    put_bits(&s->pb, 4, FFMIN(namelen, 15));
470
    if (namelen >= 15)
471
        put_bits(&s->pb, 8, namelen - 14);
K
Kostya Shishkov 已提交
472
    put_bits(&s->pb, 4, 0); //extension type - filler
473
    padbits = -put_bits_count(&s->pb) & 7;
474
    avpriv_align_put_bits(&s->pb);
475
    for (i = 0; i < namelen - 2; i++)
K
Kostya Shishkov 已提交
476 477 478 479
        put_bits(&s->pb, 8, name[i]);
    put_bits(&s->pb, 12 - padbits, 0);
}

480
/*
J
Justin Ruggles 已提交
481
 * Copy input samples.
482 483
 * Channels are reordered from Libav's default order to AAC order.
 */
J
Justin Ruggles 已提交
484
static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
485
{
J
Justin Ruggles 已提交
486 487 488
    int ch;
    int end = 2048 + (frame ? frame->nb_samples : 0);
    const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
489

J
Justin Ruggles 已提交
490 491
    /* copy and remap input samples */
    for (ch = 0; ch < s->channels; ch++) {
492
        /* copy last 1024 samples of previous frame to the start of the current frame */
493
        memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
494

J
Justin Ruggles 已提交
495
        /* copy new samples and zero any remaining samples */
J
Justin Ruggles 已提交
496
        if (frame) {
J
Justin Ruggles 已提交
497 498 499
            memcpy(&s->planar_samples[ch][2048],
                   frame->extended_data[channel_map[ch]],
                   frame->nb_samples * sizeof(s->planar_samples[0][0]));
500
        }
J
Justin Ruggles 已提交
501 502
        memset(&s->planar_samples[ch][end], 0,
               (3072 - end) * sizeof(s->planar_samples[0][0]));
503 504 505
    }
}

J
Justin Ruggles 已提交
506 507
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                            const AVFrame *frame, int *got_packet_ptr)
508 509
{
    AACEncContext *s = avctx->priv_data;
510
    float **samples = s->planar_samples, *samples2, *la, *overlap;
511
    ChannelElement *cpe;
J
Justin Ruggles 已提交
512
    int i, ch, w, g, chans, tag, start_ch, ret;
513
    int chan_el_counter[4];
514
    FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
515

516
    if (s->last_frame == 2)
517
        return 0;
518

J
Justin Ruggles 已提交
519 520
    /* add current frame to queue */
    if (frame) {
521
        if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
J
Justin Ruggles 已提交
522 523 524
            return ret;
    }

J
Justin Ruggles 已提交
525
    copy_input_samples(s, frame);
526 527
    if (s->psypp)
        ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
528 529

    if (!avctx->frame_number)
530 531 532
        return 0;

    start_ch = 0;
533
    for (i = 0; i < s->chan_map[0]; i++) {
534
        FFPsyWindowInfo* wi = windows + start_ch;
535
        tag      = s->chan_map[i+1];
536 537
        chans    = tag == TYPE_CPE ? 2 : 1;
        cpe      = &s->cpe[i];
538 539 540
        for (ch = 0; ch < chans; ch++) {
            IndividualChannelStream *ics = &cpe->ch[ch].ics;
            int cur_channel = start_ch + ch;
541 542
            overlap  = &samples[cur_channel][0];
            samples2 = overlap + 1024;
543
            la       = samples2 + (448+64);
J
Justin Ruggles 已提交
544
            if (!frame)
545
                la = NULL;
546
            if (tag == TYPE_LFE) {
547 548 549 550
                wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
                wi[ch].window_shape   = 0;
                wi[ch].num_windows    = 1;
                wi[ch].grouping[0]    = 1;
551 552 553 554 555 556

                /* Only the lowest 12 coefficients are used in a LFE channel.
                 * The expression below results in only the bottom 8 coefficients
                 * being used for 11.025kHz to 16kHz sample rates.
                 */
                ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
557
            } else {
558 559
                wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
                                              ics->window_sequence[0]);
560
            }
561
            ics->window_sequence[1] = ics->window_sequence[0];
562
            ics->window_sequence[0] = wi[ch].window_type[0];
563
            ics->use_kb_window[1]   = ics->use_kb_window[0];
564 565
            ics->use_kb_window[0]   = wi[ch].window_shape;
            ics->num_windows        = wi[ch].num_windows;
566
            ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
567
            ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
568 569
            for (w = 0; w < ics->num_windows; w++)
                ics->group_len[w] = wi[ch].grouping[w];
570

571
            apply_window_and_mdct(s, &cpe->ch[ch], overlap);
572 573 574
        }
        start_ch += chans;
    }
575 576 577 578 579
    if ((ret = ff_alloc_packet(avpkt, 768 * s->channels))) {
        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
        return ret;
    }

580 581
    do {
        int frame_bits;
J
Justin Ruggles 已提交
582 583 584

        init_put_bits(&s->pb, avpkt->data, avpkt->size);

A
Alex Converse 已提交
585
        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
586
            put_bitstream_info(s, LIBAVCODEC_IDENT);
A
Alex Converse 已提交
587 588
        start_ch = 0;
        memset(chan_el_counter, 0, sizeof(chan_el_counter));
589
        for (i = 0; i < s->chan_map[0]; i++) {
A
Alex Converse 已提交
590
            FFPsyWindowInfo* wi = windows + start_ch;
591
            const float *coeffs[2];
592
            tag      = s->chan_map[i+1];
A
Alex Converse 已提交
593 594
            chans    = tag == TYPE_CPE ? 2 : 1;
            cpe      = &s->cpe[i];
A
Alex Converse 已提交
595 596
            put_bits(&s->pb, 3, tag);
            put_bits(&s->pb, 4, chan_el_counter[tag]++);
597 598
            for (ch = 0; ch < chans; ch++)
                coeffs[ch] = cpe->ch[ch].coeffs;
599
            s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
600
            for (ch = 0; ch < chans; ch++) {
601
                s->cur_channel = start_ch * 2 + ch;
602
                s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
A
Alex Converse 已提交
603 604 605 606 607 608 609
            }
            cpe->common_window = 0;
            if (chans > 1
                && wi[0].window_type[0] == wi[1].window_type[0]
                && wi[0].window_shape   == wi[1].window_shape) {

                cpe->common_window = 1;
610 611
                for (w = 0; w < wi[0].num_windows; w++) {
                    if (wi[0].grouping[w] != wi[1].grouping[w]) {
A
Alex Converse 已提交
612 613 614
                        cpe->common_window = 0;
                        break;
                    }
615 616
                }
            }
617
            s->cur_channel = start_ch * 2;
618 619 620 621 622 623 624 625 626 627
            if (s->options.stereo_mode && cpe->common_window) {
                if (s->options.stereo_mode > 0) {
                    IndividualChannelStream *ics = &cpe->ch[0].ics;
                    for (w = 0; w < ics->num_windows; w += ics->group_len[w])
                        for (g = 0;  g < ics->num_swb; g++)
                            cpe->ms_mask[w*16+g] = 1;
                } else if (s->coder->search_for_ms) {
                    s->coder->search_for_ms(s, cpe, s->lambda);
                }
            }
628
            adjust_frame_information(cpe, chans);
A
Alex Converse 已提交
629 630 631 632 633 634
            if (chans == 2) {
                put_bits(&s->pb, 1, cpe->common_window);
                if (cpe->common_window) {
                    put_ics_info(s, &cpe->ch[0].ics);
                    encode_ms_info(&s->pb, cpe);
                }
635
            }
636 637 638
            for (ch = 0; ch < chans; ch++) {
                s->cur_channel = start_ch + ch;
                encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
A
Alex Converse 已提交
639 640
            }
            start_ch += chans;
641 642
        }

643
        frame_bits = put_bits_count(&s->pb);
644 645
        if (frame_bits <= 6144 * s->channels - 3) {
            s->psy.bitres.bits = frame_bits / s->channels;
646
            break;
647
        }
648 649 650 651 652

        s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;

    } while (1);

653 654 655 656 657
    put_bits(&s->pb, 3, TYPE_END);
    flush_put_bits(&s->pb);
    avctx->frame_bits = put_bits_count(&s->pb);

    // rate control stuff
658
    if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
659 660
        float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
        s->lambda *= ratio;
661
        s->lambda = FFMIN(s->lambda, 65536.f);
662 663
    }

J
Justin Ruggles 已提交
664
    if (!frame)
665
        s->last_frame++;
666

J
Justin Ruggles 已提交
667 668 669 670 671 672
    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
                       &avpkt->duration);

    avpkt->size = put_bits_count(&s->pb) >> 3;
    *got_packet_ptr = 1;
    return 0;
673 674
}

675 676 677 678 679 680 681 682 683
static av_cold int aac_encode_end(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;

    ff_mdct_end(&s->mdct1024);
    ff_mdct_end(&s->mdct128);
    ff_psy_end(&s->psy);
    if (s->psypp)
        ff_psy_preprocess_end(s->psypp);
684
    av_freep(&s->buffer.samples);
685
    av_freep(&s->cpe);
J
Justin Ruggles 已提交
686 687 688 689
    ff_af_queue_close(&s->afq);
#if FF_API_OLD_ENCODE_AUDIO
    av_freep(&avctx->coded_frame);
#endif
690 691 692 693 694 695 696
    return 0;
}

static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
{
    int ret = 0;

697
    ff_dsputil_init(&s->dsp, avctx);
698
    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
699 700 701 702 703 704 705

    // window init
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows(7);

706
    if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
707
        return ret;
708
    if (ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0))
709 710 711 712 713 714 715
        return ret;

    return 0;
}

static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
{
716
    int ch;
717
    FF_ALLOCZ_OR_GOTO(avctx, s->buffer.samples, 3 * 1024 * s->channels * sizeof(s->buffer.samples[0]), alloc_fail);
718 719 720
    FF_ALLOCZ_OR_GOTO(avctx, s->cpe, sizeof(ChannelElement) * s->chan_map[0], alloc_fail);
    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);

721
    for(ch = 0; ch < s->channels; ch++)
722
        s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
723

J
Justin Ruggles 已提交
724 725 726 727 728
#if FF_API_OLD_ENCODE_AUDIO
    if (!(avctx->coded_frame = avcodec_alloc_frame()))
        goto alloc_fail;
#endif

729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
    return 0;
alloc_fail:
    return AVERROR(ENOMEM);
}

static av_cold int aac_encode_init(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;
    int i, ret = 0;
    const uint8_t *sizes[2];
    uint8_t grouping[AAC_MAX_CHANNELS];
    int lengths[2];

    avctx->frame_size = 1024;

    for (i = 0; i < 16; i++)
        if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
            break;

748 749
    s->channels = avctx->channels;

750 751
    ERROR_IF(i == 16,
             "Unsupported sample rate %d\n", avctx->sample_rate);
752 753
    ERROR_IF(s->channels > AAC_MAX_CHANNELS,
             "Unsupported number of channels: %d\n", s->channels);
754 755
    ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
             "Unsupported profile %d\n", avctx->profile);
756
    ERROR_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
757 758 759 760
             "Too many bits per frame requested\n");

    s->samplerate_index = i;

761
    s->chan_map = aac_chan_configs[s->channels-1];
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786

    if (ret = dsp_init(avctx, s))
        goto fail;

    if (ret = alloc_buffers(avctx, s))
        goto fail;

    avctx->extradata_size = 5;
    put_audio_specific_config(avctx);

    sizes[0]   = swb_size_1024[i];
    sizes[1]   = swb_size_128[i];
    lengths[0] = ff_aac_num_swb_1024[i];
    lengths[1] = ff_aac_num_swb_128[i];
    for (i = 0; i < s->chan_map[0]; i++)
        grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
    if (ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], grouping))
        goto fail;
    s->psypp = ff_psy_preprocess_init(avctx);
    s->coder = &ff_aac_coders[2];

    s->lambda = avctx->global_quality ? avctx->global_quality : 120;

    ff_aac_tableinit();

787 788 789
    for (i = 0; i < 428; i++)
        ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));

J
Justin Ruggles 已提交
790 791 792
    avctx->delay = 1024;
    ff_af_queue_init(avctx, &s->afq);

793 794 795 796 797 798
    return 0;
fail:
    aac_encode_end(avctx);
    return ret;
}

799 800
#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption aacenc_options[] = {
801
    {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
802 803 804
        {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
805 806 807 808 809 810 811 812 813 814
    {NULL}
};

static const AVClass aacenc_class = {
    "AAC encoder",
    av_default_item_name,
    aacenc_options,
    LIBAVUTIL_VERSION_INT,
};

815
AVCodec ff_aac_encoder = {
816 817
    .name           = "aac",
    .type           = AVMEDIA_TYPE_AUDIO,
818
    .id             = AV_CODEC_ID_AAC,
819 820
    .priv_data_size = sizeof(AACEncContext),
    .init           = aac_encode_init,
J
Justin Ruggles 已提交
821
    .encode2        = aac_encode_frame,
822
    .close          = aac_encode_end,
823 824
    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
                      CODEC_CAP_EXPERIMENTAL,
J
Justin Ruggles 已提交
825
    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
826
                                                     AV_SAMPLE_FMT_NONE },
827
    .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
828
    .priv_class     = &aacenc_class,
K
Kostya Shishkov 已提交
829
};