aacenc.c 27.5 KB
Newer Older
K
Kostya Shishkov 已提交
1 2 3 4
/*
 * AAC encoder
 * Copyright (C) 2008 Konstantin Shishkov
 *
5
 * This file is part of Libav.
K
Kostya Shishkov 已提交
6
 *
7
 * Libav is free software; you can redistribute it and/or
K
Kostya Shishkov 已提交
8 9 10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
12
 * Libav is distributed in the hope that it will be useful,
K
Kostya Shishkov 已提交
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
K
Kostya Shishkov 已提交
19 20 21 22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
K
Kostya Shishkov 已提交
24 25 26 27 28
 * AAC encoder
 */

/***********************************
 *              TODOs:
29
 * add sane pulse detection
K
Kostya Shishkov 已提交
30
 * add temporal noise shaping
K
Kostya Shishkov 已提交
31 32
 ***********************************/

33
#include "libavutil/opt.h"
K
Kostya Shishkov 已提交
34
#include "avcodec.h"
35
#include "put_bits.h"
K
Kostya Shishkov 已提交
36
#include "dsputil.h"
J
Justin Ruggles 已提交
37
#include "internal.h"
K
Kostya Shishkov 已提交
38
#include "mpeg4audio.h"
39
#include "kbdwin.h"
40
#include "sinewin.h"
K
Kostya Shishkov 已提交
41 42 43

#include "aac.h"
#include "aactab.h"
44 45 46
#include "aacenc.h"

#include "psymodel.h"
K
Kostya Shishkov 已提交
47

48 49
#define AAC_MAX_CHANNELS 6

50 51 52 53 54 55
#define ERROR_IF(cond, ...) \
    if (cond) { \
        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
        return AVERROR(EINVAL); \
    }

56 57
float ff_aac_pow34sf_tab[428];

K
Kostya Shishkov 已提交
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
static const uint8_t swb_size_1024_96[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_64[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
};

static const uint8_t swb_size_1024_48[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
    96
};

static const uint8_t swb_size_1024_32[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
};

static const uint8_t swb_size_1024_24[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_16[] = {
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
};

static const uint8_t swb_size_1024_8[] = {
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
};

101
static const uint8_t *swb_size_1024[] = {
K
Kostya Shishkov 已提交
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
};

static const uint8_t swb_size_128_96[] = {
    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
};

static const uint8_t swb_size_128_48[] = {
    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
};

static const uint8_t swb_size_128_24[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
};

static const uint8_t swb_size_128_16[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
};

static const uint8_t swb_size_128_8[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
};

128
static const uint8_t *swb_size_128[] = {
K
Kostya Shishkov 已提交
129 130 131 132 133 134 135 136 137 138
    /* the last entry on the following row is swb_size_128_64 but is a
       duplicate of swb_size_128_96 */
    swb_size_128_96, swb_size_128_96, swb_size_128_96,
    swb_size_128_48, swb_size_128_48, swb_size_128_48,
    swb_size_128_24, swb_size_128_24, swb_size_128_16,
    swb_size_128_16, swb_size_128_16, swb_size_128_8
};

/** default channel configurations */
static const uint8_t aac_chan_configs[6][5] = {
139 140 141 142 143 144
 {1, TYPE_SCE},                               // 1 channel  - single channel element
 {1, TYPE_CPE},                               // 2 channels - channel pair
 {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
K
Kostya Shishkov 已提交
145 146
};

147 148 149 150 151 152 153 154 155 156 157 158
/**
 * Table to remap channels from Libav's default order to AAC order.
 */
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
    { 0 },
    { 0, 1 },
    { 2, 0, 1 },
    { 2, 0, 1, 3 },
    { 2, 0, 1, 3, 4 },
    { 2, 0, 1, 4, 5, 3 },
};

K
Kostya Shishkov 已提交
159 160 161 162 163 164 165 166 167 168 169 170
/**
 * Make AAC audio config object.
 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 */
static void put_audio_specific_config(AVCodecContext *avctx)
{
    PutBitContext pb;
    AACEncContext *s = avctx->priv_data;

    init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
    put_bits(&pb, 5, 2); //object type - AAC-LC
    put_bits(&pb, 4, s->samplerate_index); //sample rate index
171
    put_bits(&pb, 4, s->channels);
K
Kostya Shishkov 已提交
172 173 174 175
    //GASpecificConfig
    put_bits(&pb, 1, 0); //frame length - 1024 samples
    put_bits(&pb, 1, 0); //does not depend on core coder
    put_bits(&pb, 1, 0); //is not extension
A
Alex Converse 已提交
176 177

    //Explicitly Mark SBR absent
178
    put_bits(&pb, 11, 0x2b7); //sync extension
A
Alex Converse 已提交
179 180
    put_bits(&pb, 5,  AOT_SBR);
    put_bits(&pb, 1,  0);
K
Kostya Shishkov 已提交
181 182 183
    flush_put_bits(&pb);
}

N
Nathan Caldwell 已提交
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
#define WINDOW_FUNC(type) \
static void apply_ ##type ##_window(DSPContext *dsp, SingleChannelElement *sce, const float *audio)

WINDOW_FUNC(only_long)
{
    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    float *out = sce->ret;

    dsp->vector_fmul        (out,        audio,        lwindow, 1024);
    dsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
}

WINDOW_FUNC(long_start)
{
    const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    float *out = sce->ret;

    dsp->vector_fmul(out, audio, lwindow, 1024);
204 205
    memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
    dsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
N
Nathan Caldwell 已提交
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
    memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
}

WINDOW_FUNC(long_stop)
{
    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
    float *out = sce->ret;

    memset(out, 0, sizeof(out[0]) * 448);
    dsp->vector_fmul(out + 448, audio + 448, swindow, 128);
    memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
    dsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
}

WINDOW_FUNC(eight_short)
{
    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *in = audio + 448;
    float *out = sce->ret;
227
    int w;
N
Nathan Caldwell 已提交
228

229
    for (w = 0; w < 8; w++) {
N
Nathan Caldwell 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
        dsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
        out += 128;
        in  += 128;
        dsp->vector_fmul_reverse(out, in, swindow, 128);
        out += 128;
    }
}

static void (*const apply_window[4])(DSPContext *dsp, SingleChannelElement *sce, const float *audio) = {
    [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
    [LONG_START_SEQUENCE]  = apply_long_start_window,
    [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
    [LONG_STOP_SEQUENCE]   = apply_long_stop_window
};

245 246
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
                                  float *audio)
247
{
N
Nathan Caldwell 已提交
248
    int i;
Y
Young Han Lee 已提交
249
    float *output = sce->ret;
250

N
Nathan Caldwell 已提交
251 252 253
    apply_window[sce->ics.window_sequence[0]](&s->dsp, sce, audio);

    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
254
        s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
N
Nathan Caldwell 已提交
255 256 257 258
    else
        for (i = 0; i < 1024; i += 128)
            s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
    memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
259 260
}

K
Kostya Shishkov 已提交
261 262 263 264
/**
 * Encode ics_info element.
 * @see Table 4.6 (syntax of ics_info)
 */
K
Kostya Shishkov 已提交
265
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
K
Kostya Shishkov 已提交
266
{
267
    int w;
K
Kostya Shishkov 已提交
268 269 270 271

    put_bits(&s->pb, 1, 0);                // ics_reserved bit
    put_bits(&s->pb, 2, info->window_sequence[0]);
    put_bits(&s->pb, 1, info->use_kb_window[0]);
272
    if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
K
Kostya Shishkov 已提交
273 274
        put_bits(&s->pb, 6, info->max_sfb);
        put_bits(&s->pb, 1, 0);            // no prediction
275
    } else {
K
Kostya Shishkov 已提交
276
        put_bits(&s->pb, 4, info->max_sfb);
277
        for (w = 1; w < 8; w++)
278
            put_bits(&s->pb, 1, !info->group_len[w]);
K
Kostya Shishkov 已提交
279 280 281
    }
}

K
Kostya Shishkov 已提交
282
/**
283 284
 * Encode MS data.
 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
K
Kostya Shishkov 已提交
285
 */
286
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
K
Kostya Shishkov 已提交
287 288
{
    int i, w;
289 290

    put_bits(pb, 2, cpe->ms_mode);
291 292
    if (cpe->ms_mode == 1)
        for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
293
            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
294 295 296 297 298 299 300 301 302
                put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
}

/**
 * Produce integer coefficients from scalefactors provided by the model.
 */
static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans)
{
    int i, w, w2, g, ch;
303
    int start, maxsfb, cmaxsfb;
304

305
    for (ch = 0; ch < chans; ch++) {
306 307 308 309
        IndividualChannelStream *ics = &cpe->ch[ch].ics;
        start = 0;
        maxsfb = 0;
        cpe->ch[ch].pulse.num_pulse = 0;
310 311
        for (w = 0; w < ics->num_windows*16; w += 16) {
            for (g = 0; g < ics->num_swb; g++) {
312
                //apply M/S
313
                if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
314
                    for (i = 0; i < ics->swb_sizes[g]; i++) {
315 316 317 318 319 320
                        cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
                        cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
                    }
                }
                start += ics->swb_sizes[g];
            }
321 322
            for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
                ;
323 324 325 326 327
            maxsfb = FFMAX(maxsfb, cmaxsfb);
        }
        ics->max_sfb = maxsfb;

        //adjust zero bands for window groups
328 329
        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
            for (g = 0; g < ics->max_sfb; g++) {
330
                i = 1;
331 332
                for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
                    if (!cpe->ch[ch].zeroes[w2*16 + g]) {
333 334 335 336 337 338 339 340 341
                        i = 0;
                        break;
                    }
                }
                cpe->ch[ch].zeroes[w*16 + g] = i;
            }
        }
    }

342
    if (chans > 1 && cpe->common_window) {
343 344 345 346 347
        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
        int msc = 0;
        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
        ics1->max_sfb = ics0->max_sfb;
348 349
        for (w = 0; w < ics0->num_windows*16; w += 16)
            for (i = 0; i < ics0->max_sfb; i++)
350 351
                if (cpe->ms_mask[w+i])
                    msc++;
352 353 354
        if (msc == 0 || ics0->max_sfb == 0)
            cpe->ms_mode = 0;
        else
355
            cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
356 357 358 359 360 361 362 363 364 365
    }
}

/**
 * Encode scalefactor band coding type.
 */
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
{
    int w;

366
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
367 368 369 370 371 372
        s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
}

/**
 * Encode scalefactors.
 */
373 374
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
                                 SingleChannelElement *sce)
375 376 377 378
{
    int off = sce->sf_idx[0], diff;
    int i, w;

379 380 381
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (!sce->zeroes[w*16 + i]) {
382
                diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
383 384
                if (diff < 0 || diff > 120)
                    av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
385 386 387
                off = sce->sf_idx[w*16 + i];
                put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
            }
K
Kostya Shishkov 已提交
388 389 390 391
        }
    }
}

392 393 394
/**
 * Encode pulse data.
 */
395
static void encode_pulses(AACEncContext *s, Pulse *pulse)
396 397 398 399
{
    int i;

    put_bits(&s->pb, 1, !!pulse->num_pulse);
400 401
    if (!pulse->num_pulse)
        return;
402 403 404

    put_bits(&s->pb, 2, pulse->num_pulse - 1);
    put_bits(&s->pb, 6, pulse->start);
405
    for (i = 0; i < pulse->num_pulse; i++) {
406
        put_bits(&s->pb, 5, pulse->pos[i]);
407 408 409 410 411 412 413
        put_bits(&s->pb, 4, pulse->amp[i]);
    }
}

/**
 * Encode spectral coefficients processed by psychoacoustic model.
 */
414
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
415
{
416
    int start, i, w, w2;
417

418
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
419
        start = 0;
420 421
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (sce->zeroes[w*16 + i]) {
422
                start += sce->ics.swb_sizes[i];
423 424
                continue;
            }
425
            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
426
                s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
427 428 429 430
                                                   sce->ics.swb_sizes[i],
                                                   sce->sf_idx[w*16 + i],
                                                   sce->band_type[w*16 + i],
                                                   s->lambda);
431
            start += sce->ics.swb_sizes[i];
432 433 434 435
        }
    }
}

436 437 438
/**
 * Encode one channel of audio data.
 */
439 440 441
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
                                     SingleChannelElement *sce,
                                     int common_window)
442 443
{
    put_bits(&s->pb, 8, sce->sf_idx[0]);
444 445
    if (!common_window)
        put_ics_info(s, &sce->ics);
446 447 448 449 450 451 452 453 454
    encode_band_info(s, sce);
    encode_scale_factors(avctx, s, sce);
    encode_pulses(s, &sce->pulse);
    put_bits(&s->pb, 1, 0); //tns
    put_bits(&s->pb, 1, 0); //ssr
    encode_spectral_coeffs(s, sce);
    return 0;
}

K
Kostya Shishkov 已提交
455 456 457
/**
 * Write some auxiliary information about the created AAC file.
 */
458 459
static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s,
                               const char *name)
K
Kostya Shishkov 已提交
460 461 462 463
{
    int i, namelen, padbits;

    namelen = strlen(name) + 2;
464
    put_bits(&s->pb, 3, TYPE_FIL);
K
Kostya Shishkov 已提交
465
    put_bits(&s->pb, 4, FFMIN(namelen, 15));
466
    if (namelen >= 15)
467
        put_bits(&s->pb, 8, namelen - 14);
K
Kostya Shishkov 已提交
468
    put_bits(&s->pb, 4, 0); //extension type - filler
469
    padbits = -put_bits_count(&s->pb) & 7;
470
    avpriv_align_put_bits(&s->pb);
471
    for (i = 0; i < namelen - 2; i++)
K
Kostya Shishkov 已提交
472 473 474 475
        put_bits(&s->pb, 8, name[i]);
    put_bits(&s->pb, 12 - padbits, 0);
}

476 477 478 479
/*
 * Deinterleave input samples.
 * Channels are reordered from Libav's default order to AAC order.
 */
480
static void deinterleave_input_samples(AACEncContext *s, const AVFrame *frame)
481 482 483 484 485 486 487 488
{
    int ch, i;
    const int sinc = s->channels;
    const uint8_t *channel_map = aac_chan_maps[sinc - 1];

    /* deinterleave and remap input samples */
    for (ch = 0; ch < sinc; ch++) {
        /* copy last 1024 samples of previous frame to the start of the current frame */
489
        memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
490 491

        /* deinterleave */
J
Justin Ruggles 已提交
492 493 494 495 496 497 498
        i = 2048;
        if (frame) {
            const float *sptr = ((const float *)frame->data[0]) + channel_map[ch];
            for (; i < 2048 + frame->nb_samples; i++) {
                s->planar_samples[ch][i] = *sptr;
                sptr += sinc;
            }
499
        }
500 501
        memset(&s->planar_samples[ch][i], 0,
               (3072 - i) * sizeof(s->planar_samples[0][0]));
502 503 504
    }
}

J
Justin Ruggles 已提交
505 506
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                            const AVFrame *frame, int *got_packet_ptr)
507 508
{
    AACEncContext *s = avctx->priv_data;
509
    float **samples = s->planar_samples, *samples2, *la, *overlap;
510
    ChannelElement *cpe;
J
Justin Ruggles 已提交
511
    int i, ch, w, g, chans, tag, start_ch, ret;
512
    int chan_el_counter[4];
513
    FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
514

515
    if (s->last_frame == 2)
516
        return 0;
517

J
Justin Ruggles 已提交
518 519 520 521 522 523 524
    /* add current frame to queue */
    if (frame) {
        if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
            return ret;
    }

    deinterleave_input_samples(s, frame);
525 526
    if (s->psypp)
        ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
527 528

    if (!avctx->frame_number)
529 530 531
        return 0;

    start_ch = 0;
532
    for (i = 0; i < s->chan_map[0]; i++) {
533
        FFPsyWindowInfo* wi = windows + start_ch;
534
        tag      = s->chan_map[i+1];
535 536
        chans    = tag == TYPE_CPE ? 2 : 1;
        cpe      = &s->cpe[i];
537 538 539
        for (ch = 0; ch < chans; ch++) {
            IndividualChannelStream *ics = &cpe->ch[ch].ics;
            int cur_channel = start_ch + ch;
540 541
            overlap  = &samples[cur_channel][0];
            samples2 = overlap + 1024;
542
            la       = samples2 + (448+64);
J
Justin Ruggles 已提交
543
            if (!frame)
544
                la = NULL;
545
            if (tag == TYPE_LFE) {
546 547 548 549
                wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
                wi[ch].window_shape   = 0;
                wi[ch].num_windows    = 1;
                wi[ch].grouping[0]    = 1;
550 551 552 553 554 555

                /* Only the lowest 12 coefficients are used in a LFE channel.
                 * The expression below results in only the bottom 8 coefficients
                 * being used for 11.025kHz to 16kHz sample rates.
                 */
                ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
556
            } else {
557 558
                wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
                                              ics->window_sequence[0]);
559
            }
560
            ics->window_sequence[1] = ics->window_sequence[0];
561
            ics->window_sequence[0] = wi[ch].window_type[0];
562
            ics->use_kb_window[1]   = ics->use_kb_window[0];
563 564
            ics->use_kb_window[0]   = wi[ch].window_shape;
            ics->num_windows        = wi[ch].num_windows;
565
            ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
566
            ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
567 568
            for (w = 0; w < ics->num_windows; w++)
                ics->group_len[w] = wi[ch].grouping[w];
569

570
            apply_window_and_mdct(s, &cpe->ch[ch], overlap);
571 572 573
        }
        start_ch += chans;
    }
574 575
    do {
        int frame_bits;
J
Justin Ruggles 已提交
576 577 578 579 580 581 582

        if ((ret = ff_alloc_packet(avpkt, 768 * s->channels))) {
            av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
            return ret;
        }
        init_put_bits(&s->pb, avpkt->data, avpkt->size);

A
Alex Converse 已提交
583 584 585 586
        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
            put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
        start_ch = 0;
        memset(chan_el_counter, 0, sizeof(chan_el_counter));
587
        for (i = 0; i < s->chan_map[0]; i++) {
A
Alex Converse 已提交
588
            FFPsyWindowInfo* wi = windows + start_ch;
589
            const float *coeffs[2];
590
            tag      = s->chan_map[i+1];
A
Alex Converse 已提交
591 592
            chans    = tag == TYPE_CPE ? 2 : 1;
            cpe      = &s->cpe[i];
A
Alex Converse 已提交
593 594
            put_bits(&s->pb, 3, tag);
            put_bits(&s->pb, 4, chan_el_counter[tag]++);
595 596
            for (ch = 0; ch < chans; ch++)
                coeffs[ch] = cpe->ch[ch].coeffs;
597
            s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
598
            for (ch = 0; ch < chans; ch++) {
599
                s->cur_channel = start_ch * 2 + ch;
600
                s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
A
Alex Converse 已提交
601 602 603 604 605 606 607
            }
            cpe->common_window = 0;
            if (chans > 1
                && wi[0].window_type[0] == wi[1].window_type[0]
                && wi[0].window_shape   == wi[1].window_shape) {

                cpe->common_window = 1;
608 609
                for (w = 0; w < wi[0].num_windows; w++) {
                    if (wi[0].grouping[w] != wi[1].grouping[w]) {
A
Alex Converse 已提交
610 611 612
                        cpe->common_window = 0;
                        break;
                    }
613 614
                }
            }
615
            s->cur_channel = start_ch * 2;
616 617 618 619 620 621 622 623 624 625
            if (s->options.stereo_mode && cpe->common_window) {
                if (s->options.stereo_mode > 0) {
                    IndividualChannelStream *ics = &cpe->ch[0].ics;
                    for (w = 0; w < ics->num_windows; w += ics->group_len[w])
                        for (g = 0;  g < ics->num_swb; g++)
                            cpe->ms_mask[w*16+g] = 1;
                } else if (s->coder->search_for_ms) {
                    s->coder->search_for_ms(s, cpe, s->lambda);
                }
            }
A
Alex Converse 已提交
626 627 628 629 630 631 632
            adjust_frame_information(s, cpe, chans);
            if (chans == 2) {
                put_bits(&s->pb, 1, cpe->common_window);
                if (cpe->common_window) {
                    put_ics_info(s, &cpe->ch[0].ics);
                    encode_ms_info(&s->pb, cpe);
                }
633
            }
634 635 636
            for (ch = 0; ch < chans; ch++) {
                s->cur_channel = start_ch + ch;
                encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
A
Alex Converse 已提交
637 638
            }
            start_ch += chans;
639 640
        }

641
        frame_bits = put_bits_count(&s->pb);
642 643
        if (frame_bits <= 6144 * s->channels - 3) {
            s->psy.bitres.bits = frame_bits / s->channels;
644
            break;
645
        }
646 647 648 649 650

        s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;

    } while (1);

651 652 653 654 655
    put_bits(&s->pb, 3, TYPE_END);
    flush_put_bits(&s->pb);
    avctx->frame_bits = put_bits_count(&s->pb);

    // rate control stuff
656
    if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
657 658
        float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
        s->lambda *= ratio;
659
        s->lambda = FFMIN(s->lambda, 65536.f);
660 661
    }

J
Justin Ruggles 已提交
662
    if (!frame)
663
        s->last_frame++;
664

J
Justin Ruggles 已提交
665 666 667 668 669 670
    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
                       &avpkt->duration);

    avpkt->size = put_bits_count(&s->pb) >> 3;
    *got_packet_ptr = 1;
    return 0;
671 672
}

673 674 675 676 677 678 679 680 681
static av_cold int aac_encode_end(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;

    ff_mdct_end(&s->mdct1024);
    ff_mdct_end(&s->mdct128);
    ff_psy_end(&s->psy);
    if (s->psypp)
        ff_psy_preprocess_end(s->psypp);
682
    av_freep(&s->buffer.samples);
683
    av_freep(&s->cpe);
J
Justin Ruggles 已提交
684 685 686 687
    ff_af_queue_close(&s->afq);
#if FF_API_OLD_ENCODE_AUDIO
    av_freep(&avctx->coded_frame);
#endif
688 689 690 691 692 693 694
    return 0;
}

static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
{
    int ret = 0;

695
    ff_dsputil_init(&s->dsp, avctx);
696 697 698 699 700 701 702

    // window init
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows(7);

703
    if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
704
        return ret;
705
    if (ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0))
706 707 708 709 710 711 712
        return ret;

    return 0;
}

static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
{
713
    int ch;
714
    FF_ALLOCZ_OR_GOTO(avctx, s->buffer.samples, 3 * 1024 * s->channels * sizeof(s->buffer.samples[0]), alloc_fail);
715 716 717
    FF_ALLOCZ_OR_GOTO(avctx, s->cpe, sizeof(ChannelElement) * s->chan_map[0], alloc_fail);
    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);

718
    for(ch = 0; ch < s->channels; ch++)
719
        s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
720

J
Justin Ruggles 已提交
721 722 723 724 725
#if FF_API_OLD_ENCODE_AUDIO
    if (!(avctx->coded_frame = avcodec_alloc_frame()))
        goto alloc_fail;
#endif

726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
    return 0;
alloc_fail:
    return AVERROR(ENOMEM);
}

static av_cold int aac_encode_init(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;
    int i, ret = 0;
    const uint8_t *sizes[2];
    uint8_t grouping[AAC_MAX_CHANNELS];
    int lengths[2];

    avctx->frame_size = 1024;

    for (i = 0; i < 16; i++)
        if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
            break;

745 746
    s->channels = avctx->channels;

747 748
    ERROR_IF(i == 16,
             "Unsupported sample rate %d\n", avctx->sample_rate);
749 750
    ERROR_IF(s->channels > AAC_MAX_CHANNELS,
             "Unsupported number of channels: %d\n", s->channels);
751 752
    ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
             "Unsupported profile %d\n", avctx->profile);
753
    ERROR_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
754 755 756 757
             "Too many bits per frame requested\n");

    s->samplerate_index = i;

758
    s->chan_map = aac_chan_configs[s->channels-1];
759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783

    if (ret = dsp_init(avctx, s))
        goto fail;

    if (ret = alloc_buffers(avctx, s))
        goto fail;

    avctx->extradata_size = 5;
    put_audio_specific_config(avctx);

    sizes[0]   = swb_size_1024[i];
    sizes[1]   = swb_size_128[i];
    lengths[0] = ff_aac_num_swb_1024[i];
    lengths[1] = ff_aac_num_swb_128[i];
    for (i = 0; i < s->chan_map[0]; i++)
        grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
    if (ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], grouping))
        goto fail;
    s->psypp = ff_psy_preprocess_init(avctx);
    s->coder = &ff_aac_coders[2];

    s->lambda = avctx->global_quality ? avctx->global_quality : 120;

    ff_aac_tableinit();

784 785 786
    for (i = 0; i < 428; i++)
        ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));

J
Justin Ruggles 已提交
787 788 789
    avctx->delay = 1024;
    ff_af_queue_init(avctx, &s->afq);

790 791 792 793 794 795
    return 0;
fail:
    aac_encode_end(avctx);
    return ret;
}

796 797
#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption aacenc_options[] = {
798 799 800 801
    {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.dbl = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
        {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.dbl =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.dbl =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
802 803 804 805 806 807 808 809 810 811
    {NULL}
};

static const AVClass aacenc_class = {
    "AAC encoder",
    av_default_item_name,
    aacenc_options,
    LIBAVUTIL_VERSION_INT,
};

812
AVCodec ff_aac_encoder = {
813 814 815 816 817
    .name           = "aac",
    .type           = AVMEDIA_TYPE_AUDIO,
    .id             = CODEC_ID_AAC,
    .priv_data_size = sizeof(AACEncContext),
    .init           = aac_encode_init,
J
Justin Ruggles 已提交
818
    .encode2        = aac_encode_frame,
819
    .close          = aac_encode_end,
820
    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
821
    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
K
Kostya Shishkov 已提交
822
    .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
823
    .priv_class = &aacenc_class,
K
Kostya Shishkov 已提交
824
};