aacenc.c 24.1 KB
Newer Older
K
Kostya Shishkov 已提交
1 2 3 4
/*
 * AAC encoder
 * Copyright (C) 2008 Konstantin Shishkov
 *
5
 * This file is part of Libav.
K
Kostya Shishkov 已提交
6
 *
7
 * Libav is free software; you can redistribute it and/or
K
Kostya Shishkov 已提交
8 9 10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
12
 * Libav is distributed in the hope that it will be useful,
K
Kostya Shishkov 已提交
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
K
Kostya Shishkov 已提交
19 20 21 22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
K
Kostya Shishkov 已提交
24 25 26 27 28
 * AAC encoder
 */

/***********************************
 *              TODOs:
29
 * add sane pulse detection
K
Kostya Shishkov 已提交
30
 * add temporal noise shaping
K
Kostya Shishkov 已提交
31 32
 ***********************************/

33
#include "libavutil/opt.h"
K
Kostya Shishkov 已提交
34
#include "avcodec.h"
35
#include "put_bits.h"
K
Kostya Shishkov 已提交
36 37
#include "dsputil.h"
#include "mpeg4audio.h"
38
#include "kbdwin.h"
39
#include "sinewin.h"
K
Kostya Shishkov 已提交
40 41 42

#include "aac.h"
#include "aactab.h"
43 44 45
#include "aacenc.h"

#include "psymodel.h"
K
Kostya Shishkov 已提交
46

47 48
#define AAC_MAX_CHANNELS 6

K
Kostya Shishkov 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
static const uint8_t swb_size_1024_96[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_64[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
};

static const uint8_t swb_size_1024_48[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
    96
};

static const uint8_t swb_size_1024_32[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
};

static const uint8_t swb_size_1024_24[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
};

static const uint8_t swb_size_1024_16[] = {
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
};

static const uint8_t swb_size_1024_8[] = {
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
};

92
static const uint8_t *swb_size_1024[] = {
K
Kostya Shishkov 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
};

static const uint8_t swb_size_128_96[] = {
    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
};

static const uint8_t swb_size_128_48[] = {
    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
};

static const uint8_t swb_size_128_24[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
};

static const uint8_t swb_size_128_16[] = {
    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
};

static const uint8_t swb_size_128_8[] = {
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
};

119
static const uint8_t *swb_size_128[] = {
K
Kostya Shishkov 已提交
120 121 122 123 124 125 126 127 128 129
    /* the last entry on the following row is swb_size_128_64 but is a
       duplicate of swb_size_128_96 */
    swb_size_128_96, swb_size_128_96, swb_size_128_96,
    swb_size_128_48, swb_size_128_48, swb_size_128_48,
    swb_size_128_24, swb_size_128_24, swb_size_128_16,
    swb_size_128_16, swb_size_128_16, swb_size_128_8
};

/** default channel configurations */
static const uint8_t aac_chan_configs[6][5] = {
130 131 132 133 134 135
 {1, TYPE_SCE},                               // 1 channel  - single channel element
 {1, TYPE_CPE},                               // 2 channels - channel pair
 {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
K
Kostya Shishkov 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
};

/**
 * Make AAC audio config object.
 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 */
static void put_audio_specific_config(AVCodecContext *avctx)
{
    PutBitContext pb;
    AACEncContext *s = avctx->priv_data;

    init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
    put_bits(&pb, 5, 2); //object type - AAC-LC
    put_bits(&pb, 4, s->samplerate_index); //sample rate index
    put_bits(&pb, 4, avctx->channels);
    //GASpecificConfig
    put_bits(&pb, 1, 0); //frame length - 1024 samples
    put_bits(&pb, 1, 0); //does not depend on core coder
    put_bits(&pb, 1, 0); //is not extension
A
Alex Converse 已提交
155 156

    //Explicitly Mark SBR absent
157
    put_bits(&pb, 11, 0x2b7); //sync extension
A
Alex Converse 已提交
158 159
    put_bits(&pb, 5,  AOT_SBR);
    put_bits(&pb, 1,  0);
K
Kostya Shishkov 已提交
160 161 162 163 164 165 166
    flush_put_bits(&pb);
}

static av_cold int aac_encode_init(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;
    int i;
167 168
    const uint8_t *sizes[2];
    int lengths[2];
K
Kostya Shishkov 已提交
169 170 171

    avctx->frame_size = 1024;

172 173
    for (i = 0; i < 16; i++)
        if (avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
K
Kostya Shishkov 已提交
174
            break;
175
    if (i == 16) {
K
Kostya Shishkov 已提交
176 177 178
        av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
        return -1;
    }
179
    if (avctx->channels > AAC_MAX_CHANNELS) {
K
Kostya Shishkov 已提交
180 181 182
        av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
        return -1;
    }
183 184 185 186
    if (avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW) {
        av_log(avctx, AV_LOG_ERROR, "Unsupported profile %d\n", avctx->profile);
        return -1;
    }
187 188 189 190
    if (1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * avctx->channels) {
        av_log(avctx, AV_LOG_ERROR, "Too many bits per frame requested\n");
        return -1;
    }
K
Kostya Shishkov 已提交
191 192 193
    s->samplerate_index = i;

    dsputil_init(&s->dsp, avctx);
194 195
    ff_mdct_init(&s->mdct1024, 11, 0, 1.0);
    ff_mdct_init(&s->mdct128,   8, 0, 1.0);
196 197 198
    // window init
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
199 200
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows(7);
K
Kostya Shishkov 已提交
201

202
    s->chan_map           = aac_chan_configs[avctx->channels-1];
203
    s->samples            = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
204
    s->cpe                = av_mallocz(sizeof(ChannelElement) * s->chan_map[0]);
A
Alex Converse 已提交
205 206
    avctx->extradata      = av_mallocz(5 + FF_INPUT_BUFFER_PADDING_SIZE);
    avctx->extradata_size = 5;
K
Kostya Shishkov 已提交
207
    put_audio_specific_config(avctx);
208

209 210
    sizes[0]   = swb_size_1024[i];
    sizes[1]   = swb_size_128[i];
211 212 213 214
    lengths[0] = ff_aac_num_swb_1024[i];
    lengths[1] = ff_aac_num_swb_128[i];
    ff_psy_init(&s->psy, avctx, 2, sizes, lengths);
    s->psypp = ff_psy_preprocess_init(avctx);
215
    s->coder = &ff_aac_coders[2];
216 217

    s->lambda = avctx->global_quality ? avctx->global_quality : 120;
218 219

    ff_aac_tableinit();
220

K
Kostya Shishkov 已提交
221 222 223
    return 0;
}

224
static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s,
225
                                  SingleChannelElement *sce, short *audio)
226
{
227 228
    int i, k;
    const int chans = avctx->channels;
229 230 231
    const float * lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
    const float * swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
    const float * pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
Y
Young Han Lee 已提交
232
    float *output = sce->ret;
233 234

    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
Y
Young Han Lee 已提交
235
        memcpy(output, sce->saved, sizeof(float)*1024);
236
        if (sce->ics.window_sequence[0] == LONG_STOP_SEQUENCE) {
Y
Young Han Lee 已提交
237
            memset(output, 0, sizeof(output[0]) * 448);
238
            for (i = 448; i < 576; i++)
Y
Young Han Lee 已提交
239
                output[i] = sce->saved[i] * pwindow[i - 448];
240
            for (i = 576; i < 704; i++)
Y
Young Han Lee 已提交
241
                output[i] = sce->saved[i];
242
        }
243
        if (sce->ics.window_sequence[0] != LONG_START_SEQUENCE) {
244
            for (i = 0; i < 1024; i++) {
Y
Young Han Lee 已提交
245
                output[i+1024]         = audio[i * chans] * lwindow[1024 - i - 1];
246
                sce->saved[i] = audio[i * chans] * lwindow[i];
247
            }
248
        } else {
249
            for (i = 0; i < 448; i++)
Y
Young Han Lee 已提交
250
                output[i+1024]         = audio[i * chans];
251
            for (; i < 576; i++)
Y
Young Han Lee 已提交
252 253
                output[i+1024]         = audio[i * chans] * swindow[576 - i - 1];
            memset(output+1024+576, 0, sizeof(output[0]) * 448);
254 255
            for (i = 0; i < 1024; i++)
                sce->saved[i] = audio[i * chans];
256
        }
257
        s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
258
    } else {
259
        for (k = 0; k < 1024; k += 128) {
260
            for (i = 448 + k; i < 448 + k + 256; i++)
Y
Young Han Lee 已提交
261
                output[i - 448 - k] = (i < 1024)
262
                                         ? sce->saved[i]
263
                                         : audio[(i-1024)*chans];
Y
Young Han Lee 已提交
264 265
            s->dsp.vector_fmul        (output,     output, k ?  swindow : pwindow, 128);
            s->dsp.vector_fmul_reverse(output+128, output+128, swindow, 128);
266
            s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + k, output);
267
        }
268 269
        for (i = 0; i < 1024; i++)
            sce->saved[i] = audio[i * chans];
270 271 272
    }
}

K
Kostya Shishkov 已提交
273 274 275 276
/**
 * Encode ics_info element.
 * @see Table 4.6 (syntax of ics_info)
 */
K
Kostya Shishkov 已提交
277
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
K
Kostya Shishkov 已提交
278
{
279
    int w;
K
Kostya Shishkov 已提交
280 281 282 283

    put_bits(&s->pb, 1, 0);                // ics_reserved bit
    put_bits(&s->pb, 2, info->window_sequence[0]);
    put_bits(&s->pb, 1, info->use_kb_window[0]);
284
    if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
K
Kostya Shishkov 已提交
285 286
        put_bits(&s->pb, 6, info->max_sfb);
        put_bits(&s->pb, 1, 0);            // no prediction
287
    } else {
K
Kostya Shishkov 已提交
288
        put_bits(&s->pb, 4, info->max_sfb);
289
        for (w = 1; w < 8; w++)
290
            put_bits(&s->pb, 1, !info->group_len[w]);
K
Kostya Shishkov 已提交
291 292 293
    }
}

K
Kostya Shishkov 已提交
294
/**
295 296
 * Encode MS data.
 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
K
Kostya Shishkov 已提交
297
 */
298
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
K
Kostya Shishkov 已提交
299 300
{
    int i, w;
301 302

    put_bits(pb, 2, cpe->ms_mode);
303 304
    if (cpe->ms_mode == 1)
        for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
305
            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
306 307 308 309 310 311 312 313 314
                put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
}

/**
 * Produce integer coefficients from scalefactors provided by the model.
 */
static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans)
{
    int i, w, w2, g, ch;
315
    int start, maxsfb, cmaxsfb;
316

317
    for (ch = 0; ch < chans; ch++) {
318 319 320 321
        IndividualChannelStream *ics = &cpe->ch[ch].ics;
        start = 0;
        maxsfb = 0;
        cpe->ch[ch].pulse.num_pulse = 0;
322 323
        for (w = 0; w < ics->num_windows*16; w += 16) {
            for (g = 0; g < ics->num_swb; g++) {
324
                //apply M/S
325
                if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
326
                    for (i = 0; i < ics->swb_sizes[g]; i++) {
327 328 329 330 331 332
                        cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
                        cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
                    }
                }
                start += ics->swb_sizes[g];
            }
333 334
            for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
                ;
335 336 337 338 339
            maxsfb = FFMAX(maxsfb, cmaxsfb);
        }
        ics->max_sfb = maxsfb;

        //adjust zero bands for window groups
340 341
        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
            for (g = 0; g < ics->max_sfb; g++) {
342
                i = 1;
343 344
                for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
                    if (!cpe->ch[ch].zeroes[w2*16 + g]) {
345 346 347 348 349 350 351 352 353
                        i = 0;
                        break;
                    }
                }
                cpe->ch[ch].zeroes[w*16 + g] = i;
            }
        }
    }

354
    if (chans > 1 && cpe->common_window) {
355 356 357 358 359
        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
        int msc = 0;
        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
        ics1->max_sfb = ics0->max_sfb;
360 361
        for (w = 0; w < ics0->num_windows*16; w += 16)
            for (i = 0; i < ics0->max_sfb; i++)
362 363
                if (cpe->ms_mask[w+i])
                    msc++;
364 365 366 367
        if (msc == 0 || ics0->max_sfb == 0)
            cpe->ms_mode = 0;
        else
            cpe->ms_mode = msc < ics0->max_sfb ? 1 : 2;
368 369 370 371 372 373 374 375 376 377
    }
}

/**
 * Encode scalefactor band coding type.
 */
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
{
    int w;

378
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
379 380 381 382 383 384
        s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
}

/**
 * Encode scalefactors.
 */
385 386
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
                                 SingleChannelElement *sce)
387 388 389 390
{
    int off = sce->sf_idx[0], diff;
    int i, w;

391 392 393
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (!sce->zeroes[w*16 + i]) {
394
                diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
395 396
                if (diff < 0 || diff > 120)
                    av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
397 398 399
                off = sce->sf_idx[w*16 + i];
                put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
            }
K
Kostya Shishkov 已提交
400 401 402 403
        }
    }
}

404 405 406
/**
 * Encode pulse data.
 */
407
static void encode_pulses(AACEncContext *s, Pulse *pulse)
408 409 410 411
{
    int i;

    put_bits(&s->pb, 1, !!pulse->num_pulse);
412 413
    if (!pulse->num_pulse)
        return;
414 415 416

    put_bits(&s->pb, 2, pulse->num_pulse - 1);
    put_bits(&s->pb, 6, pulse->start);
417
    for (i = 0; i < pulse->num_pulse; i++) {
418
        put_bits(&s->pb, 5, pulse->pos[i]);
419 420 421 422 423 424 425
        put_bits(&s->pb, 4, pulse->amp[i]);
    }
}

/**
 * Encode spectral coefficients processed by psychoacoustic model.
 */
426
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
427
{
428
    int start, i, w, w2;
429

430
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
431
        start = 0;
432 433
        for (i = 0; i < sce->ics.max_sfb; i++) {
            if (sce->zeroes[w*16 + i]) {
434
                start += sce->ics.swb_sizes[i];
435 436
                continue;
            }
437
            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
438
                s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
439 440 441 442
                                                   sce->ics.swb_sizes[i],
                                                   sce->sf_idx[w*16 + i],
                                                   sce->band_type[w*16 + i],
                                                   s->lambda);
443
            start += sce->ics.swb_sizes[i];
444 445 446 447
        }
    }
}

448 449 450
/**
 * Encode one channel of audio data.
 */
451 452 453
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
                                     SingleChannelElement *sce,
                                     int common_window)
454 455
{
    put_bits(&s->pb, 8, sce->sf_idx[0]);
456 457
    if (!common_window)
        put_ics_info(s, &sce->ics);
458 459 460 461 462 463 464 465 466
    encode_band_info(s, sce);
    encode_scale_factors(avctx, s, sce);
    encode_pulses(s, &sce->pulse);
    put_bits(&s->pb, 1, 0); //tns
    put_bits(&s->pb, 1, 0); //ssr
    encode_spectral_coeffs(s, sce);
    return 0;
}

K
Kostya Shishkov 已提交
467 468 469
/**
 * Write some auxiliary information about the created AAC file.
 */
470 471
static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s,
                               const char *name)
K
Kostya Shishkov 已提交
472 473 474 475
{
    int i, namelen, padbits;

    namelen = strlen(name) + 2;
476
    put_bits(&s->pb, 3, TYPE_FIL);
K
Kostya Shishkov 已提交
477
    put_bits(&s->pb, 4, FFMIN(namelen, 15));
478
    if (namelen >= 15)
K
Kostya Shishkov 已提交
479 480 481 482
        put_bits(&s->pb, 8, namelen - 16);
    put_bits(&s->pb, 4, 0); //extension type - filler
    padbits = 8 - (put_bits_count(&s->pb) & 7);
    align_put_bits(&s->pb);
483
    for (i = 0; i < namelen - 2; i++)
K
Kostya Shishkov 已提交
484 485 486 487
        put_bits(&s->pb, 8, name[i]);
    put_bits(&s->pb, 12 - padbits, 0);
}

488 489 490 491 492 493
static int aac_encode_frame(AVCodecContext *avctx,
                            uint8_t *frame, int buf_size, void *data)
{
    AACEncContext *s = avctx->priv_data;
    int16_t *samples = s->samples, *samples2, *la;
    ChannelElement *cpe;
494
    int i, ch, w, g, chans, tag, start_ch;
495
    int chan_el_counter[4];
496
    FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
497

498
    if (s->last_frame)
499
        return 0;
500 501
    if (data) {
        if (!s->psypp) {
502 503
            memcpy(s->samples + 1024 * avctx->channels, data,
                   1024 * avctx->channels * sizeof(s->samples[0]));
504
        } else {
505 506
            start_ch = 0;
            samples2 = s->samples + 1024 * avctx->channels;
507 508
            for (i = 0; i < s->chan_map[0]; i++) {
                tag = s->chan_map[i+1];
509
                chans = tag == TYPE_CPE ? 2 : 1;
510 511
                ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch,
                                  samples2 + start_ch, start_ch, chans);
512 513 514 515
                start_ch += chans;
            }
        }
    }
516
    if (!avctx->frame_number) {
517 518
        memcpy(s->samples, s->samples + 1024 * avctx->channels,
               1024 * avctx->channels * sizeof(s->samples[0]));
519 520 521 522
        return 0;
    }

    start_ch = 0;
523
    for (i = 0; i < s->chan_map[0]; i++) {
524
        FFPsyWindowInfo* wi = windows + start_ch;
525
        tag      = s->chan_map[i+1];
526 527
        chans    = tag == TYPE_CPE ? 2 : 1;
        cpe      = &s->cpe[i];
528 529 530
        for (ch = 0; ch < chans; ch++) {
            IndividualChannelStream *ics = &cpe->ch[ch].ics;
            int cur_channel = start_ch + ch;
531 532 533 534
            samples2 = samples + cur_channel;
            la       = samples2 + (448+64) * avctx->channels;
            if (!data)
                la = NULL;
535
            if (tag == TYPE_LFE) {
536 537 538 539
                wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
                wi[ch].window_shape   = 0;
                wi[ch].num_windows    = 1;
                wi[ch].grouping[0]    = 1;
540
            } else {
541 542
                wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
                                              ics->window_sequence[0]);
543
            }
544
            ics->window_sequence[1] = ics->window_sequence[0];
545
            ics->window_sequence[0] = wi[ch].window_type[0];
546
            ics->use_kb_window[1]   = ics->use_kb_window[0];
547 548
            ics->use_kb_window[0]   = wi[ch].window_shape;
            ics->num_windows        = wi[ch].num_windows;
549
            ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
550
            ics->num_swb            = tag == TYPE_LFE ? 12 : s->psy.num_bands[ics->num_windows == 8];
551 552
            for (w = 0; w < ics->num_windows; w++)
                ics->group_len[w] = wi[ch].grouping[w];
553

554
            apply_window_and_mdct(avctx, s, &cpe->ch[ch], samples2);
555 556 557
        }
        start_ch += chans;
    }
558 559
    do {
        int frame_bits;
A
Alex Converse 已提交
560 561 562 563 564
        init_put_bits(&s->pb, frame, buf_size*8);
        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
            put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
        start_ch = 0;
        memset(chan_el_counter, 0, sizeof(chan_el_counter));
565
        for (i = 0; i < s->chan_map[0]; i++) {
A
Alex Converse 已提交
566
            FFPsyWindowInfo* wi = windows + start_ch;
567
            tag      = s->chan_map[i+1];
A
Alex Converse 已提交
568 569
            chans    = tag == TYPE_CPE ? 2 : 1;
            cpe      = &s->cpe[i];
A
Alex Converse 已提交
570 571
            put_bits(&s->pb, 3, tag);
            put_bits(&s->pb, 4, chan_el_counter[tag]++);
572 573
            for (ch = 0; ch < chans; ch++) {
                s->cur_channel = start_ch + ch;
574
                s->psy.model->analyze(&s->psy, s->cur_channel, cpe->ch[ch].coeffs, &wi[ch]);
575
                s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
A
Alex Converse 已提交
576 577 578 579 580 581 582
            }
            cpe->common_window = 0;
            if (chans > 1
                && wi[0].window_type[0] == wi[1].window_type[0]
                && wi[0].window_shape   == wi[1].window_shape) {

                cpe->common_window = 1;
583 584
                for (w = 0; w < wi[0].num_windows; w++) {
                    if (wi[0].grouping[w] != wi[1].grouping[w]) {
A
Alex Converse 已提交
585 586 587
                        cpe->common_window = 0;
                        break;
                    }
588 589
                }
            }
590
            s->cur_channel = start_ch;
591 592 593 594 595 596 597 598 599 600
            if (s->options.stereo_mode && cpe->common_window) {
                if (s->options.stereo_mode > 0) {
                    IndividualChannelStream *ics = &cpe->ch[0].ics;
                    for (w = 0; w < ics->num_windows; w += ics->group_len[w])
                        for (g = 0;  g < ics->num_swb; g++)
                            cpe->ms_mask[w*16+g] = 1;
                } else if (s->coder->search_for_ms) {
                    s->coder->search_for_ms(s, cpe, s->lambda);
                }
            }
A
Alex Converse 已提交
601 602 603 604 605 606 607
            adjust_frame_information(s, cpe, chans);
            if (chans == 2) {
                put_bits(&s->pb, 1, cpe->common_window);
                if (cpe->common_window) {
                    put_ics_info(s, &cpe->ch[0].ics);
                    encode_ms_info(&s->pb, cpe);
                }
608
            }
609 610 611
            for (ch = 0; ch < chans; ch++) {
                s->cur_channel = start_ch + ch;
                encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
A
Alex Converse 已提交
612 613
            }
            start_ch += chans;
614 615
        }

616
        frame_bits = put_bits_count(&s->pb);
617 618
        if (frame_bits <= 6144 * avctx->channels - 3) {
            s->psy.bitres.bits = frame_bits / avctx->channels;
619
            break;
620
        }
621 622 623 624 625

        s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;

    } while (1);

626 627 628 629 630
    put_bits(&s->pb, 3, TYPE_END);
    flush_put_bits(&s->pb);
    avctx->frame_bits = put_bits_count(&s->pb);

    // rate control stuff
631
    if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
632 633
        float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
        s->lambda *= ratio;
634
        s->lambda = FFMIN(s->lambda, 65536.f);
635 636
    }

637
    if (!data)
638
        s->last_frame = 1;
639 640
    memcpy(s->samples, s->samples + 1024 * avctx->channels,
           1024 * avctx->channels * sizeof(s->samples[0]));
641 642 643
    return put_bits_count(&s->pb)>>3;
}

K
Kostya Shishkov 已提交
644 645 646 647 648 649
static av_cold int aac_encode_end(AVCodecContext *avctx)
{
    AACEncContext *s = avctx->priv_data;

    ff_mdct_end(&s->mdct1024);
    ff_mdct_end(&s->mdct128);
650 651
    ff_psy_end(&s->psy);
    ff_psy_preprocess_end(s->psypp);
K
Kostya Shishkov 已提交
652 653 654 655 656
    av_freep(&s->samples);
    av_freep(&s->cpe);
    return 0;
}

657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption aacenc_options[] = {
    {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), FF_OPT_TYPE_INT, {.dbl = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
        {"auto",     "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_off",   "Disable Mid/Side coding", 0, FF_OPT_TYPE_CONST, {.dbl =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
        {"ms_force", "Force Mid/Side for the whole frame if possible", 0, FF_OPT_TYPE_CONST, {.dbl =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
    {NULL}
};

static const AVClass aacenc_class = {
    "AAC encoder",
    av_default_item_name,
    aacenc_options,
    LIBAVUTIL_VERSION_INT,
};

673
AVCodec ff_aac_encoder = {
K
Kostya Shishkov 已提交
674
    "aac",
675
    AVMEDIA_TYPE_AUDIO,
K
Kostya Shishkov 已提交
676 677 678 679 680
    CODEC_ID_AAC,
    sizeof(AACEncContext),
    aac_encode_init,
    aac_encode_frame,
    aac_encode_end,
681
    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
682
    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
K
Kostya Shishkov 已提交
683
    .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
684
    .priv_class = &aacenc_class,
K
Kostya Shishkov 已提交
685
};