aacenc.c 27.9 KB
Newer Older
K
Kostya Shishkov 已提交
1 2 3 4
/*
 * AAC encoder
 * Copyright (C) 2008 Konstantin Shishkov
 *
5
 * This file is part of Libav.
K
Kostya Shishkov 已提交
6
 *
7
 * Libav is free software; you can redistribute it and/or
K
Kostya Shishkov 已提交
8 9 10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
12
 * Libav is distributed in the hope that it will be useful,
K
Kostya Shishkov 已提交
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
K
Kostya Shishkov 已提交
19 20 21 22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
K
Kostya Shishkov 已提交
24 25 26 27 28
 * AAC encoder
 */

/***********************************
 *              TODOs:
29
 * add sane pulse detection
K
Kostya Shishkov 已提交
30
 * add temporal noise shaping
K
Kostya Shishkov 已提交
31 32
 ***********************************/

33
#include "libavutil/float_dsp.h"
34
#include "libavutil/opt.h"
K
Kostya Shishkov 已提交
35
#include "avcodec.h"
36
#include "put_bits.h"
K
Kostya Shishkov 已提交
37
#include "dsputil.h"
J
Justin Ruggles 已提交
38
#include "internal.h"
K
Kostya Shishkov 已提交
39
#include "mpeg4audio.h"
40
#include "kbdwin.h"
41
#include "sinewin.h"
K
Kostya Shishkov 已提交
42 43 44

#include "aac.h"
#include "aactab.h"
45 46 47
#include "aacenc.h"

#include "psymodel.h"
K
Kostya Shishkov 已提交
48

49 50
#define AAC_MAX_CHANNELS 6

51 52 53 54 55 56
#define ERROR_IF(cond, ...) \
    if (cond) { \
        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
        return AVERROR(EINVAL); \
    }

57 58
float ff_aac_pow34sf_tab[428];

K
Kostya Shishkov 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
static const uint8_t swb_size_1024_96[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_64[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
};

static const uint8_t swb_size_1024_48[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
    96
};

static const uint8_t swb_size_1024_32[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
};

static const uint8_t swb_size_1024_24[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_16[] = {
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
};

static const uint8_t swb_size_1024_8[] = {
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
};

102
static const uint8_t *swb_size_1024[] = {
K
Kostya Shishkov 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
};

static const uint8_t swb_size_128_96[] = {
    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
};

static const uint8_t swb_size_128_48[] = {
    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
};

static const uint8_t swb_size_128_24[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
};

static const uint8_t swb_size_128_16[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
};

static const uint8_t swb_size_128_8[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
};

129
static const uint8_t *swb_size_128[] = {
K
Kostya Shishkov 已提交
130 131 132 133 134 135 136 137 138 139
    /* the last entry on the following row is swb_size_128_64 but is a
       duplicate of swb_size_128_96 */
    swb_size_128_96, swb_size_128_96, swb_size_128_96,
    swb_size_128_48, swb_size_128_48, swb_size_128_48,
    swb_size_128_24, swb_size_128_24, swb_size_128_16,
    swb_size_128_16, swb_size_128_16, swb_size_128_8
};

/** default channel configurations */
static const uint8_t aac_chan_configs[6][5] = {
140 141 142 143 144 145
 {1, TYPE_SCE},                               // 1 channel  - single channel element
 {1, TYPE_CPE},                               // 2 channels - channel pair
 {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
K
Kostya Shishkov 已提交
146 147
};

148 149 150 151 152 153 154 155 156 157 158 159
/**
 * Table to remap channels from Libav's default order to AAC order.
 */
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
    { 0 },
    { 0, 1 },
    { 2, 0, 1 },
    { 2, 0, 1, 3 },
    { 2, 0, 1, 3, 4 },
    { 2, 0, 1, 4, 5, 3 },
};

K
Kostya Shishkov 已提交
160 161 162 163 164 165 166 167 168 169 170 171
/**
 * Make AAC audio config object.
 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 */
static void put_audio_specific_config(AVCodecContext *avctx)
{
    PutBitContext pb;
    AACEncContext *s = avctx->priv_data;

    init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
    put_bits(&pb, 5, 2); //object type - AAC-LC
    put_bits(&pb, 4, s->samplerate_index); //sample rate index
172
    put_bits(&pb, 4, s->channels);
K
Kostya Shishkov 已提交
173 174 175 176
    //GASpecificConfig
    put_bits(&pb, 1, 0); //frame length - 1024 samples
    put_bits(&pb, 1, 0); //does not depend on core coder
    put_bits(&pb, 1, 0); //is not extension
A
Alex Converse 已提交
177 178

    //Explicitly Mark SBR absent
179
    put_bits(&pb, 11, 0x2b7); //sync extension
A
Alex Converse 已提交
180 181
    put_bits(&pb, 5,  AOT_SBR);
    put_bits(&pb, 1,  0);
K
Kostya Shishkov 已提交
182 183 184
    flush_put_bits(&pb);
}

N
Nathan Caldwell 已提交
185
#define WINDOW_FUNC(type) \
186 187 188
static void apply_ ##type ##_window(DSPContext *dsp, AVFloatDSPContext *fdsp, \
                                    SingleChannelElement *sce, \
                                    const float *audio)
N
Nathan Caldwell 已提交
189 190 191 192 193 194 195

WINDOW_FUNC(only_long)
{
    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    float *out = sce->ret;

196
    fdsp->vector_fmul       (out,        audio,        lwindow, 1024);
N
Nathan Caldwell 已提交
197 198 199 200 201 202 203 204 205
    dsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
}

WINDOW_FUNC(long_start)
{
    const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    float *out = sce->ret;

206
    fdsp->vector_fmul(out, audio, lwindow, 1024);
207 208
    memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
    dsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
N
Nathan Caldwell 已提交
209 210 211 212 213 214 215 216 217 218
    memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
}

WINDOW_FUNC(long_stop)
{
    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
    float *out = sce->ret;

    memset(out, 0, sizeof(out[0]) * 448);
219
    fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
N
Nathan Caldwell 已提交
220 221 222 223 224 225 226 227 228 229
    memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
    dsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
}

WINDOW_FUNC(eight_short)
{
    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float *in = audio + 448;
    float *out = sce->ret;
230
    int w;
N
Nathan Caldwell 已提交
231

232
    for (w = 0; w < 8; w++) {
233
        fdsp->vector_fmul       (out, in, w ? pwindow : swindow, 128);
N
Nathan Caldwell 已提交
234 235 236 237 238 239 240
        out += 128;
        in  += 128;
        dsp->vector_fmul_reverse(out, in, swindow, 128);
        out += 128;
    }
}

241 242 243
static void (*const apply_window[4])(DSPContext *dsp, AVFloatDSPContext *fdsp,
                                     SingleChannelElement *sce,
                                     const float *audio) = {
N
Nathan Caldwell 已提交
244 245 246 247 248 249
    [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
    [LONG_START_SEQUENCE]  = apply_long_start_window,
    [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
    [LONG_STOP_SEQUENCE]   = apply_long_stop_window
};

250 251
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
                                  float *audio)
252
{
N
Nathan Caldwell 已提交
253
    int i;
Y
Young Han Lee 已提交
254
    float *output = sce->ret;
255

256
    apply_window[sce->ics.window_sequence[0]](&s->dsp, &s->fdsp, sce, audio);
N
Nathan Caldwell 已提交
257 258

    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
259
        s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
N
Nathan Caldwell 已提交
260 261 262 263
    else
        for (i = 0; i < 1024; i += 128)
            s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
    memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
264 265
}

K
Kostya Shishkov 已提交
266 267 268 269
/**
 * Encode ics_info element.
 * @see Table 4.6 (syntax of ics_info)
 */
K
Kostya Shishkov 已提交
270
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
K
Kostya Shishkov 已提交
271
{
272
    int w;
K
Kostya Shishkov 已提交
273 274 275 276

    put_bits(&s->pb, 1, 0);                // ics_reserved bit
    put_bits(&s->pb, 2, info->window_sequence[0]);
    put_bits(&s->pb, 1, info->use_kb_window[0]);
277
    if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
K
Kostya Shishkov 已提交
278 279
        put_bits(&s->pb, 6, info->max_sfb);
        put_bits(&s->pb, 1, 0);            // no prediction
280
    } else {
K
Kostya Shishkov 已提交
281
        put_bits(&s->pb, 4, info->max_sfb);
282
        for (w = 1; w < 8; w++)
283
            put_bits(&s->pb, 1, !info->group_len[w]);
K
Kostya Shishkov 已提交
284 285 286
    }
}

K
Kostya Shishkov 已提交
287
/**
288 289
 * Encode MS data.
 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
K
Kostya Shishkov 已提交
290
 */
291
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
K
Kostya Shishkov 已提交
292 293
{
    int i, w;
294 295

    put_bits(pb, 2, cpe->ms_mode);
296 297
    if (cpe->ms_mode == 1)
        for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
298
            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
299 300 301 302 303 304 305 306 307
                put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
}

/**
 * Produce integer coefficients from scalefactors provided by the model.
 */
static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans)
{
    int i, w, w2, g, ch;
308
    int start, maxsfb, cmaxsfb;
309

310
    for (ch = 0; ch < chans; ch++) {
311 312 313 314
        IndividualChannelStream *ics = &cpe->ch[ch].ics;
        start = 0;
        maxsfb = 0;
        cpe->ch[ch].pulse.num_pulse = 0;
315 316
        for (w = 0; w < ics->num_windows*16; w += 16) {
            for (g = 0; g < ics->num_swb; g++) {
317
                //apply M/S
318
                if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
319
                    for (i = 0; i < ics->swb_sizes[g]; i++) {
320 321 322 323 324 325
                        cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
                        cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
                    }
                }
                start += ics->swb_sizes[g];
            }
326 327
            for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
                ;
328 329 330 331 332
            maxsfb = FFMAX(maxsfb, cmaxsfb);
        }
        ics->max_sfb = maxsfb;

        //adjust zero bands for window groups
333 334
        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
            for (g = 0; g < ics->max_sfb; g++) {
335
                i = 1;
336 337
                for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
                    if (!cpe->ch[ch].zeroes[w2*16 + g]) {
338 339 340 341 342 343 344 345 346
                        i = 0;
                        break;
                    }
                }
                cpe->ch[ch].zeroes[w*16 + g] = i;
            }
        }
    }

347
    if (chans > 1 && cpe->common_window) {
348 349 350 351 352
        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
        int msc = 0;
        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
        ics1->max_sfb = ics0->max_sfb;
353 354
        for (w = 0; w < ics0->num_windows*16; w += 16)
            for (i = 0; i < ics0->max_sfb; i++)
355 356
                if (cpe->ms_mask[w+i])
                    msc++;
357 358 359
        if (msc == 0 || ics0->max_sfb == 0)
            cpe->ms_mode = 0;
        else
360
            cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
361 362 363 364 365 366 367 368 369 370
    }
}

/**
 * Encode scalefactor band coding type.
 */
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
{
    int w;

371
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
372 373 374 375 376 377
        s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
}

/**
 * Encode scalefactors.
 */
378 379
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
                                 SingleChannelElement *sce)
380 381 382 383
{
    int off = sce->sf_idx[0], diff;
    int i, w;

384 385 386
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (!sce->zeroes[w*16 + i]) {
387
                diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
388 389
                if (diff < 0 || diff > 120)
                    av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
390 391 392
                off = sce->sf_idx[w*16 + i];
                put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
            }
K
Kostya Shishkov 已提交
393 394 395 396
        }
    }
}

397 398 399
/**
 * Encode pulse data.
 */
400
static void encode_pulses(AACEncContext *s, Pulse *pulse)
401 402 403 404
{
    int i;

    put_bits(&s->pb, 1, !!pulse->num_pulse);
405 406
    if (!pulse->num_pulse)
        return;
407 408 409

    put_bits(&s->pb, 2, pulse->num_pulse - 1);
    put_bits(&s->pb, 6, pulse->start);
410
    for (i = 0; i < pulse->num_pulse; i++) {
411
        put_bits(&s->pb, 5, pulse->pos[i]);
412 413 414 415 416 417 418
        put_bits(&s->pb, 4, pulse->amp[i]);
    }
}

/**
 * Encode spectral coefficients processed by psychoacoustic model.
 */
419
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
420
{
421
    int start, i, w, w2;
422

423
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
424
        start = 0;
425 426
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (sce->zeroes[w*16 + i]) {
427
                start += sce->ics.swb_sizes[i];
428 429
                continue;
            }
430
            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
431
                s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
432 433 434 435
                                                   sce->ics.swb_sizes[i],
                                                   sce->sf_idx[w*16 + i],
                                                   sce->band_type[w*16 + i],
                                                   s->lambda);
436
            start += sce->ics.swb_sizes[i];
437 438 439 440
        }
    }
}

441 442 443
/**
 * Encode one channel of audio data.
 */
444 445 446
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
                                     SingleChannelElement *sce,
                                     int common_window)
447 448
{
    put_bits(&s->pb, 8, sce->sf_idx[0]);
449 450
    if (!common_window)
        put_ics_info(s, &sce->ics);
451 452 453 454 455 456 457 458 459
    encode_band_info(s, sce);
    encode_scale_factors(avctx, s, sce);
    encode_pulses(s, &sce->pulse);
    put_bits(&s->pb, 1, 0); //tns
    put_bits(&s->pb, 1, 0); //ssr
    encode_spectral_coeffs(s, sce);
    return 0;
}

K
Kostya Shishkov 已提交
460 461 462
/**
 * Write some auxiliary information about the created AAC file.
 */
463 464
static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s,
                               const char *name)
K
Kostya Shishkov 已提交
465 466 467 468
{
    int i, namelen, padbits;

    namelen = strlen(name) + 2;
469
    put_bits(&s->pb, 3, TYPE_FIL);
K
Kostya Shishkov 已提交
470
    put_bits(&s->pb, 4, FFMIN(namelen, 15));
471
    if (namelen >= 15)
472
        put_bits(&s->pb, 8, namelen - 14);
K
Kostya Shishkov 已提交
473
    put_bits(&s->pb, 4, 0); //extension type - filler
474
    padbits = -put_bits_count(&s->pb) & 7;
475
    avpriv_align_put_bits(&s->pb);
476
    for (i = 0; i < namelen - 2; i++)
K
Kostya Shishkov 已提交
477 478 479 480
        put_bits(&s->pb, 8, name[i]);
    put_bits(&s->pb, 12 - padbits, 0);
}

481 482 483 484
/*
 * Deinterleave input samples.
 * Channels are reordered from Libav's default order to AAC order.
 */
485
static void deinterleave_input_samples(AACEncContext *s, const AVFrame *frame)
486 487 488 489 490 491 492 493
{
    int ch, i;
    const int sinc = s->channels;
    const uint8_t *channel_map = aac_chan_maps[sinc - 1];

    /* deinterleave and remap input samples */
    for (ch = 0; ch < sinc; ch++) {
        /* copy last 1024 samples of previous frame to the start of the current frame */
494
        memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
495 496

        /* deinterleave */
J
Justin Ruggles 已提交
497 498 499 500 501 502 503
        i = 2048;
        if (frame) {
            const float *sptr = ((const float *)frame->data[0]) + channel_map[ch];
            for (; i < 2048 + frame->nb_samples; i++) {
                s->planar_samples[ch][i] = *sptr;
                sptr += sinc;
            }
504
        }
505 506
        memset(&s->planar_samples[ch][i], 0,
               (3072 - i) * sizeof(s->planar_samples[0][0]));
507 508 509
    }
}

J
Justin Ruggles 已提交
510 511
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                            const AVFrame *frame, int *got_packet_ptr)
512 513
{
    AACEncContext *s = avctx->priv_data;
514
    float **samples = s->planar_samples, *samples2, *la, *overlap;
515
    ChannelElement *cpe;
J
Justin Ruggles 已提交
516
    int i, ch, w, g, chans, tag, start_ch, ret;
517
    int chan_el_counter[4];
518
    FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
519

520
    if (s->last_frame == 2)
521
        return 0;
522

J
Justin Ruggles 已提交
523 524 525 526 527 528 529
    /* add current frame to queue */
    if (frame) {
        if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
            return ret;
    }

    deinterleave_input_samples(s, frame);
530 531
    if (s->psypp)
        ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
532 533

    if (!avctx->frame_number)
534 535 536
        return 0;

    start_ch = 0;
537
    for (i = 0; i < s->chan_map[0]; i++) {
538
        FFPsyWindowInfo* wi = windows + start_ch;
539
        tag      = s->chan_map[i+1];
540 541
        chans    = tag == TYPE_CPE ? 2 : 1;
        cpe      = &s->cpe[i];
542 543 544
        for (ch = 0; ch < chans; ch++) {
            IndividualChannelStream *ics = &cpe->ch[ch].ics;
            int cur_channel = start_ch + ch;
545 546
            overlap  = &samples[cur_channel][0];
            samples2 = overlap + 1024;
547
            la       = samples2 + (448+64);
J
Justin Ruggles 已提交
548
            if (!frame)
549
                la = NULL;
550
            if (tag == TYPE_LFE) {
551 552 553 554
                wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
                wi[ch].window_shape   = 0;
                wi[ch].num_windows    = 1;
                wi[ch].grouping[0]    = 1;
555 556 557 558 559 560

                /* Only the lowest 12 coefficients are used in a LFE channel.
                 * The expression below results in only the bottom 8 coefficients
                 * being used for 11.025kHz to 16kHz sample rates.
                 */
                ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
561
            } else {
562 563
                wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
                                              ics->window_sequence[0]);
564
            }
565
            ics->window_sequence[1] = ics->window_sequence[0];
566
            ics->window_sequence[0] = wi[ch].window_type[0];
567
            ics->use_kb_window[1]   = ics->use_kb_window[0];
568 569
            ics->use_kb_window[0]   = wi[ch].window_shape;
            ics->num_windows        = wi[ch].num_windows;
570
            ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
571
            ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
572 573
            for (w = 0; w < ics->num_windows; w++)
                ics->group_len[w] = wi[ch].grouping[w];
574

575
            apply_window_and_mdct(s, &cpe->ch[ch], overlap);
576 577 578
        }
        start_ch += chans;
    }
579 580 581 582 583
    if ((ret = ff_alloc_packet(avpkt, 768 * s->channels))) {
        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
        return ret;
    }

584 585
    do {
        int frame_bits;
J
Justin Ruggles 已提交
586 587 588

        init_put_bits(&s->pb, avpkt->data, avpkt->size);

A
Alex Converse 已提交
589 590 591 592
        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
            put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
        start_ch = 0;
        memset(chan_el_counter, 0, sizeof(chan_el_counter));
593
        for (i = 0; i < s->chan_map[0]; i++) {
A
Alex Converse 已提交
594
            FFPsyWindowInfo* wi = windows + start_ch;
595
            const float *coeffs[2];
596
            tag      = s->chan_map[i+1];
A
Alex Converse 已提交
597 598
            chans    = tag == TYPE_CPE ? 2 : 1;
            cpe      = &s->cpe[i];
A
Alex Converse 已提交
599 600
            put_bits(&s->pb, 3, tag);
            put_bits(&s->pb, 4, chan_el_counter[tag]++);
601 602
            for (ch = 0; ch < chans; ch++)
                coeffs[ch] = cpe->ch[ch].coeffs;
603
            s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
604
            for (ch = 0; ch < chans; ch++) {
605
                s->cur_channel = start_ch * 2 + ch;
606
                s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
A
Alex Converse 已提交
607 608 609 610 611 612 613
            }
            cpe->common_window = 0;
            if (chans > 1
                && wi[0].window_type[0] == wi[1].window_type[0]
                && wi[0].window_shape   == wi[1].window_shape) {

                cpe->common_window = 1;
614 615
                for (w = 0; w < wi[0].num_windows; w++) {
                    if (wi[0].grouping[w] != wi[1].grouping[w]) {
A
Alex Converse 已提交
616 617 618
                        cpe->common_window = 0;
                        break;
                    }
619 620
                }
            }
621
            s->cur_channel = start_ch * 2;
622 623 624 625 626 627 628 629 630 631
            if (s->options.stereo_mode && cpe->common_window) {
                if (s->options.stereo_mode > 0) {
                    IndividualChannelStream *ics = &cpe->ch[0].ics;
                    for (w = 0; w < ics->num_windows; w += ics->group_len[w])
                        for (g = 0;  g < ics->num_swb; g++)
                            cpe->ms_mask[w*16+g] = 1;
                } else if (s->coder->search_for_ms) {
                    s->coder->search_for_ms(s, cpe, s->lambda);
                }
            }
A
Alex Converse 已提交
632 633 634 635 636 637 638
            adjust_frame_information(s, cpe, chans);
            if (chans == 2) {
                put_bits(&s->pb, 1, cpe->common_window);
                if (cpe->common_window) {
                    put_ics_info(s, &cpe->ch[0].ics);
                    encode_ms_info(&s->pb, cpe);
                }
639
            }
640 641 642
            for (ch = 0; ch < chans; ch++) {
                s->cur_channel = start_ch + ch;
                encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
A
Alex Converse 已提交
643 644
            }
            start_ch += chans;
645 646
        }

647
        frame_bits = put_bits_count(&s->pb);
648 649
        if (frame_bits <= 6144 * s->channels - 3) {
            s->psy.bitres.bits = frame_bits / s->channels;
650
            break;
651
        }
652 653 654 655 656

        s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;

    } while (1);

657 658 659 660 661
    put_bits(&s->pb, 3, TYPE_END);
    flush_put_bits(&s->pb);
    avctx->frame_bits = put_bits_count(&s->pb);

    // rate control stuff
662
    if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
663 664
        float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
        s->lambda *= ratio;
665
        s->lambda = FFMIN(s->lambda, 65536.f);
666 667
    }

J
Justin Ruggles 已提交
668
    if (!frame)
669
        s->last_frame++;
670

J
Justin Ruggles 已提交
671 672 673 674 675 676
    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
                       &avpkt->duration);

    avpkt->size = put_bits_count(&s->pb) >> 3;
    *got_packet_ptr = 1;
    return 0;
677 678
}

679 680 681 682 683 684 685 686 687
static av_cold int aac_encode_end(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;

    ff_mdct_end(&s->mdct1024);
    ff_mdct_end(&s->mdct128);
    ff_psy_end(&s->psy);
    if (s->psypp)
        ff_psy_preprocess_end(s->psypp);
688
    av_freep(&s->buffer.samples);
689
    av_freep(&s->cpe);
J
Justin Ruggles 已提交
690 691 692 693
    ff_af_queue_close(&s->afq);
#if FF_API_OLD_ENCODE_AUDIO
    av_freep(&avctx->coded_frame);
#endif
694 695 696 697 698 699 700
    return 0;
}

static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
{
    int ret = 0;

701
    ff_dsputil_init(&s->dsp, avctx);
702
    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
703 704 705 706 707 708 709

    // window init
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows(7);

710
    if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
711
        return ret;
712
    if (ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0))
713 714 715 716 717 718 719
        return ret;

    return 0;
}

static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
{
720
    int ch;
721
    FF_ALLOCZ_OR_GOTO(avctx, s->buffer.samples, 3 * 1024 * s->channels * sizeof(s->buffer.samples[0]), alloc_fail);
722 723 724
    FF_ALLOCZ_OR_GOTO(avctx, s->cpe, sizeof(ChannelElement) * s->chan_map[0], alloc_fail);
    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);

725
    for(ch = 0; ch < s->channels; ch++)
726
        s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
727

J
Justin Ruggles 已提交
728 729 730 731 732
#if FF_API_OLD_ENCODE_AUDIO
    if (!(avctx->coded_frame = avcodec_alloc_frame()))
        goto alloc_fail;
#endif

733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
    return 0;
alloc_fail:
    return AVERROR(ENOMEM);
}

static av_cold int aac_encode_init(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;
    int i, ret = 0;
    const uint8_t *sizes[2];
    uint8_t grouping[AAC_MAX_CHANNELS];
    int lengths[2];

    avctx->frame_size = 1024;

    for (i = 0; i < 16; i++)
        if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
            break;

752 753
    s->channels = avctx->channels;

754 755
    ERROR_IF(i == 16,
             "Unsupported sample rate %d\n", avctx->sample_rate);
756 757
    ERROR_IF(s->channels > AAC_MAX_CHANNELS,
             "Unsupported number of channels: %d\n", s->channels);
758 759
    ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
             "Unsupported profile %d\n", avctx->profile);
760
    ERROR_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
761 762 763 764
             "Too many bits per frame requested\n");

    s->samplerate_index = i;

765
    s->chan_map = aac_chan_configs[s->channels-1];
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790

    if (ret = dsp_init(avctx, s))
        goto fail;

    if (ret = alloc_buffers(avctx, s))
        goto fail;

    avctx->extradata_size = 5;
    put_audio_specific_config(avctx);

    sizes[0]   = swb_size_1024[i];
    sizes[1]   = swb_size_128[i];
    lengths[0] = ff_aac_num_swb_1024[i];
    lengths[1] = ff_aac_num_swb_128[i];
    for (i = 0; i < s->chan_map[0]; i++)
        grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
    if (ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], grouping))
        goto fail;
    s->psypp = ff_psy_preprocess_init(avctx);
    s->coder = &ff_aac_coders[2];

    s->lambda = avctx->global_quality ? avctx->global_quality : 120;

    ff_aac_tableinit();

791 792 793
    for (i = 0; i < 428; i++)
        ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));

J
Justin Ruggles 已提交
794 795 796
    avctx->delay = 1024;
    ff_af_queue_init(avctx, &s->afq);

797 798 799 800 801 802
    return 0;
fail:
    aac_encode_end(avctx);
    return ret;
}

803 804
#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption aacenc_options[] = {
805
    {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
806 807 808
        {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
809 810 811 812 813 814 815 816 817 818
    {NULL}
};

static const AVClass aacenc_class = {
    "AAC encoder",
    av_default_item_name,
    aacenc_options,
    LIBAVUTIL_VERSION_INT,
};

819
AVCodec ff_aac_encoder = {
820 821
    .name           = "aac",
    .type           = AVMEDIA_TYPE_AUDIO,
822
    .id             = AV_CODEC_ID_AAC,
823 824
    .priv_data_size = sizeof(AACEncContext),
    .init           = aac_encode_init,
J
Justin Ruggles 已提交
825
    .encode2        = aac_encode_frame,
826
    .close          = aac_encode_end,
827 828 829 830
    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
                      CODEC_CAP_EXPERIMENTAL,
    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
                                                     AV_SAMPLE_FMT_NONE },
831
    .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
832
    .priv_class     = &aacenc_class,
K
Kostya Shishkov 已提交
833
};