Add an HE-AAC v1 decoder.

A large portion of this code was orignally authored by Robert Swain. The rest was written by me. Full history is available at: svn://svn.ffmpeg.org/soc/aac-sbr http://github.com/aconverse/ffmpeg-heaac/tree/sbr_pub Originally committed as revision 22316 to svn://svn.ffmpeg.org/ffmpeg/trunk

Add an HE-AAC v1 decoder.
A large portion of this code was orignally authored by Robert Swain. The rest was written by me. Full history is available at: svn://svn.ffmpeg.org/soc/aac-sbr http://github.com/aconverse/ffmpeg-heaac/tree/sbr_pub Originally committed as revision 22316 to svn://svn.ffmpeg.org/ffmpeg/trunk
ed492b61 · Alex Converse · f19341e1 · ed492b61 · ed492b61 · ed492b61
10 changed file
--- a/Changelog
+++ b/Changelog
@@ -61,6 +61,7 @@ version <next>:
 - FFprobe tool
 - AMR-NB decoder
 - RTSP muxer
+- HE-AAC v1 decoder




--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -41,7 +41,7 @@ OBJS-$(CONFIG_VAAPI)                   += vaapi.o
 OBJS-$(CONFIG_VDPAU)                   += vdpau.o

 # decoders/encoders/hardware accelerators
-OBJS-$(CONFIG_AAC_DECODER)             += aac.o aactab.o
+OBJS-$(CONFIG_AAC_DECODER)             += aac.o aactab.o aacsbr.o
 OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
                                          aacpsy.o aactab.o      \
                                          psymodel.o iirfilter.o \

--- a/libavcodec/aac.c
+++ b/libavcodec/aac.c
@@ -62,7 +62,7 @@
 * N                    MIDI
 * N                    Harmonic and Individual Lines plus Noise
 * N                    Text-To-Speech Interface
- * N (in progress)      Spectral Band Replication
+ * Y                    Spectral Band Replication
 * Y (not in this code) Layer-1
 * Y (not in this code) Layer-2
 * Y (not in this code) Layer-3
@@ -86,6 +86,8 @@
 #include "aac.h"
 #include "aactab.h"
 #include "aacdectab.h"
+#include "sbr.h"
+#include "aacsbr.h"
 #include "mpeg4audio.h"
 #include "aac_parser.h"

@@ -180,14 +182,18 @@ static av_cold int che_configure(AACContext *ac,
    if (che_pos[type][id]) {
        if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
            return AVERROR(ENOMEM);
+        ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
        if (type != TYPE_CCE) {
            ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
            if (type == TYPE_CPE) {
                ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
            }
        }
-    } else
+    } else {
+        if (ac->che[type][id])
+            ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
        av_freep(&ac->che[type][id]);
+    }
    return 0;
 }

@@ -530,6 +536,8 @@ static av_cold int aac_decode_init(AVCodecContext *avccontext)
    AAC_INIT_VLC_STATIC( 9, 366);
    AAC_INIT_VLC_STATIC(10, 462);

+    ff_aac_sbr_init();
+
    dsputil_init(&ac->dsp, avccontext);

    ac->random_state = 0x1f2e3d4c;
@@ -1544,23 +1552,6 @@ static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
    return 0;
 }

-/**
- * Decode Spectral Band Replication extension data; reference: table 4.55.
- *
- * @param   crc flag indicating the presence of CRC checksum
- * @param   cnt length of TYPE_FIL syntactic element in bytes
- *
- * @return  Returns number of bytes consumed from the TYPE_FIL element.
- */
-static int decode_sbr_extension(AACContext *ac, GetBitContext *gb,
-                                int crc, int cnt)
-{
-    // TODO : sbr_extension implementation
-    av_log_missing_feature(ac->avccontext, "SBR", 0);
-    skip_bits_long(gb, 8 * cnt - 4); // -4 due to reading extension type
-    return cnt;
-}
-
 /**
 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
 *
@@ -1641,7 +1632,8 @@ static int decode_dynamic_range(DynamicRangeControl *che_drc,
 *
 * @return Returns number of bytes consumed
 */
-static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt)
+static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
+                                    ChannelElement *che, enum RawDataBlockType elem_type)
 {
    int crc_flag = 0;
    int res = cnt;
@@ -1649,7 +1641,21 @@ static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt)
    case EXT_SBR_DATA_CRC:
        crc_flag++;
    case EXT_SBR_DATA:
-        res = decode_sbr_extension(ac, gb, crc_flag, cnt);
+        if (!che) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
+            return res;
+        } else if (!ac->m4ac.sbr) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
+            skip_bits_long(gb, 8 * cnt - 4);
+            return res;
+        } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
+            skip_bits_long(gb, 8 * cnt - 4);
+            return res;
+        } else {
+            ac->m4ac.sbr = 1;
+        }
+        res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
        break;
    case EXT_DYNAMIC_RANGE:
        res = decode_dynamic_range(&ac->che_drc, gb, cnt);
@@ -1830,8 +1836,9 @@ static void apply_independent_coupling(AACContext *ac,
    const float bias = ac->add_bias;
    const float *src = cce->ch[0].ret;
    float *dest = target->ret;
+    const int len = 1024 << (ac->m4ac.sbr == 1);

-    for (i = 0; i < 1024; i++)
+    for (i = 0; i < len; i++)
        dest[i] += gain * (src[i] - bias);
 }

@@ -1889,10 +1896,18 @@ static void spectral_to_sample(AACContext *ac)
                    apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
                if (type <= TYPE_CPE)
                    apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
-                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT)
+                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
                    imdct_and_windowing(ac, &che->ch[0]);
-                if (type == TYPE_CPE)
+                    if (ac->m4ac.sbr > 0) {
+                        ff_sbr_dequant(ac, &che->sbr, type == TYPE_CPE ? TYPE_CPE : TYPE_SCE);
+                        ff_sbr_apply(ac, &che->sbr, 0, che->ch[0].ret, che->ch[0].ret);
+                    }
+                }
+                if (type == TYPE_CPE) {
                    imdct_and_windowing(ac, &che->ch[1]);
+                    if (ac->m4ac.sbr > 0)
+                        ff_sbr_apply(ac, &che->sbr, 1, che->ch[1].ret, che->ch[1].ret);
+                }
                if (type <= TYPE_CCE)
                    apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
            }
@@ -1942,9 +1957,9 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
    const uint8_t *buf = avpkt->data;
    int buf_size = avpkt->size;
    AACContext *ac = avccontext->priv_data;
-    ChannelElement *che = NULL;
+    ChannelElement *che = NULL, *che_prev = NULL;
    GetBitContext gb;
-    enum RawDataBlockType elem_type;
+    enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
    int err, elem_id, data_size_tmp;
    int buf_consumed;
    int samples = 1024, multiplier;
@@ -2014,7 +2029,7 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
                    return -1;
            }
            while (elem_id > 0)
-                elem_id -= decode_extension_payload(ac, &gb, elem_id);
+                elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
            err = 0; /* FIXME */
            break;

@@ -2023,6 +2038,9 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
            break;
        }

+        che_prev       = che;
+        elem_type_prev = elem_type;
+
        if (err)
            return err;

@@ -2034,14 +2052,14 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,

    spectral_to_sample(ac);

-    multiplier = 0;
+    multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
    samples <<= multiplier;
    if (ac->output_configured < OC_LOCKED) {
        avccontext->sample_rate = ac->m4ac.sample_rate << multiplier;
        avccontext->frame_size = samples;
    }

-    data_size_tmp = 1024 * avccontext->channels * sizeof(int16_t);
+    data_size_tmp = samples * avccontext->channels * sizeof(int16_t);
    if (*data_size < data_size_tmp) {
        av_log(avccontext, AV_LOG_ERROR,
               "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
@@ -2050,7 +2068,7 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
    }
    *data_size = data_size_tmp;

-    ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, 1024, avccontext->channels);
+    ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avccontext->channels);

    if (ac->output_configured)
        ac->output_configured = OC_LOCKED;
@@ -2065,8 +2083,11 @@ static av_cold int aac_decode_close(AVCodecContext *avccontext)
    int i, type;

    for (i = 0; i < MAX_ELEM_ID; i++) {
-        for (type = 0; type < 4; type++)
+        for (type = 0; type < 4; type++) {
+            if (ac->che[type][i])
+                ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
            av_freep(&ac->che[type][i]);
+        }
    }

    ff_mdct_end(&ac->mdct);

--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -34,6 +34,7 @@
 #include "dsputil.h"
 #include "fft.h"
 #include "mpeg4audio.h"
+#include "sbr.h"

 #include <stdint.h>

@@ -217,7 +218,7 @@ typedef struct {
    uint8_t zeroes[128];                      ///< band is not coded (used by encoder)
    DECLARE_ALIGNED(16, float, coeffs)[1024]; ///< coefficients for IMDCT
    DECLARE_ALIGNED(16, float, saved)[1024];  ///< overlap
-    DECLARE_ALIGNED(16, float, ret)[1024];    ///< PCM output
+    DECLARE_ALIGNED(16, float, ret)[2048];    ///< PCM output
    PredictorState predictor_state[MAX_PREDICTORS];
 } SingleChannelElement;

@@ -233,6 +234,7 @@ typedef struct {
    SingleChannelElement ch[2];
    // CCE specific
    ChannelCoupling coup;
+    SpectralBandReplication sbr;
 } ChannelElement;

 /**

--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
--- a/libavcodec/aacsbr.h
+++ b/libavcodec/aacsbr.h
+/*
+ * AAC Spectral Band Replication function declarations
+ * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
+ * Copyright (c) 2010      Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/aacsbr.h
+ * AAC Spectral Band Replication function declarations
+ * @author Robert Swain ( rob opendot cl )
+ */
+
+#ifndef AVCODEC_AACSBR_H
+#define AVCODEC_AACSBR_H
+
+#include "get_bits.h"
+#include "aac.h"
+#include "sbr.h"
+
+/** Initialize SBR. */
+av_cold void ff_aac_sbr_init(void);
+/** Initialize one SBR context. */
+av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr);
+/** Close one SBR context. */
+av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr);
+/** Decode one SBR element. */
+int ff_decode_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
+                            GetBitContext *gb, int crc, int cnt, int id_aac);
+/** Dequantized all channels in one SBR element. */
+void ff_sbr_dequant(AACContext *ac, SpectralBandReplication *sbr, int id_aac);
+/** Apply dequantized SBR to a single AAC channel. */
+void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int ch,
+                  const float* in, float* out);
+
+#endif /* AVCODEC_AACSBR_H */
--- a/libavcodec/aacsbrdata.h
+++ b/libavcodec/aacsbrdata.h
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,7 +30,7 @@
 #include "libavutil/avutil.h"

 #define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 56
+#define LIBAVCODEC_VERSION_MINOR 57
 #define LIBAVCODEC_VERSION_MICRO  0

 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \

--- a/libavcodec/mpeg4audio.h
+++ b/libavcodec/mpeg4audio.h
@@ -57,7 +57,7 @@ enum AudioObjectType {
    AOT_AAC_LC,                ///< Y                       Low Complexity
    AOT_AAC_SSR,               ///< N (code in SoC repo)    Scalable Sample Rate
    AOT_AAC_LTP,               ///< N (code in SoC repo)    Long Term Prediction
-    AOT_SBR,                   ///< N (in progress)         Spectral Band Replication
+    AOT_SBR,                   ///< Y                       Spectral Band Replication
    AOT_AAC_SCALABLE,          ///< N                       Scalable
    AOT_TWINVQ,                ///< N                       Twin Vector Quantizer
    AOT_CELP,                  ///< N                       Code Excited Linear Prediction

--- a/libavcodec/sbr.h
+++ b/libavcodec/sbr.h
+/*
+ * Spectral Band Replication definitions and structures
+ * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
+ * Copyright (c) 2010      Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/sbr.h
+ * Spectral Band Replication definitions and structures
+ * @author Robert Swain ( rob opendot cl )
+ */
+
+#ifndef AVCODEC_SBR_H
+#define AVCODEC_SBR_H
+
+#include <stdint.h>
+#include "fft.h"
+
+/**
+ * Spectral Band Replication header - spectrum parameters that invoke a reset if they differ from the previous header.
+ */
+typedef struct {
+    uint8_t bs_start_freq;
+    uint8_t bs_stop_freq;
+    uint8_t bs_xover_band;
+
+    /**
+     * @defgroup bs_header_extra_1     Variables associated with bs_header_extra_1
+     * @{
+     */
+    uint8_t bs_freq_scale;
+    uint8_t bs_alter_scale;
+    uint8_t bs_noise_bands;
+    /** @} */
+} SpectrumParameters;
+
+#define SBR_SYNTHESIS_BUF_SIZE ((1280-128)*2)
+
+/**
+ * Spectral Band Replication per channel data
+ */
+typedef struct {
+    /**
+     * @defgroup bitstream     Main bitstream data variables
+     * @{
+     */
+    unsigned           bs_frame_class;
+    unsigned           bs_add_harmonic_flag;
+    unsigned           bs_num_env[2];
+    uint8_t            bs_freq_res[7];
+    uint8_t            bs_var_bord[2];
+    uint8_t            bs_num_rel[2];
+    uint8_t            bs_rel_bord[2][3];
+    unsigned           bs_pointer;
+    unsigned           bs_num_noise;
+    uint8_t            bs_df_env[5];
+    uint8_t            bs_df_noise[2];
+    uint8_t            bs_invf_mode[2][5];
+    uint8_t            bs_add_harmonic[48];
+    unsigned           bs_amp_res;
+    /** @} */
+
+    /**
+     * @defgroup state         State variables
+     * @{
+     */
+    DECLARE_ALIGNED(16, float, synthesis_filterbank_samples)[SBR_SYNTHESIS_BUF_SIZE];
+    DECLARE_ALIGNED(16, float, analysis_filterbank_samples) [1312];
+    int                synthesis_filterbank_samples_offset;
+    ///l_APrev and l_A
+    int                e_a[2];
+    ///Chirp factors
+    float              bw_array[5];
+    ///QMF values of the original signal
+    float              W[2][32][32][2];
+    ///QMF output of the HF adjustor
+    float              Y[2][38][64][2];
+    float              g_temp[42][48];
+    float              q_temp[42][48];
+    uint8_t            s_indexmapped[8][48];
+    ///Envelope scalefactors
+    float              env_facs[6][48];
+    ///Noise scalefactors
+    float              noise_facs[3][5];
+    ///Envelope time borders
+    uint8_t            t_env[8];
+    ///Envelope time border of the last envelope of the previous frame
+    uint8_t            t_env_num_env_old;
+    ///Noise time borders
+    uint8_t            t_q[3];
+    unsigned           f_indexnoise;
+    unsigned           f_indexsine;
+    /** @} */
+} SBRData;
+
+/**
+ * Spectral Band Replication
+ */
+typedef struct {
+    int                sample_rate;
+    int                start;
+    int                reset;
+    SpectrumParameters spectrum_params;
+    int                bs_amp_res_header;
+    /**
+     * @defgroup bs_header_extra_2     variables associated with bs_header_extra_2
+     * @{
+     */
+    unsigned           bs_limiter_bands;
+    unsigned           bs_limiter_gains;
+    unsigned           bs_interpol_freq;
+    unsigned           bs_smoothing_mode;
+    /** @} */
+    unsigned           bs_coupling;
+    unsigned           k[5]; ///< k0, k1, k2
+    ///kx', and kx respectively, kx is the first QMF subband where SBR is used.
+    ///kx' is its value from the previous frame
+    unsigned           kx[2];
+    ///M' and M respectively, M is the number of QMF subbands that use SBR.
+    unsigned           m[2];
+    ///The number of frequency bands in f_master
+    unsigned           n_master;
+    SBRData            data[2];
+    ///N_Low and N_High respectively, the number of frequency bands for low and high resolution
+    unsigned           n[2];
+    ///Number of noise floor bands
+    unsigned           n_q;
+    ///Number of limiter bands
+    unsigned           n_lim;
+    ///The master QMF frequency grouping
+    uint16_t           f_master[49];
+    ///Frequency borders for low resolution SBR
+    uint16_t           f_tablelow[25];
+    ///Frequency borders for high resolution SBR
+    uint16_t           f_tablehigh[49];
+    ///Frequency borders for noise floors
+    uint16_t           f_tablenoise[6];
+    ///Frequency borders for the limiter
+    uint16_t           f_tablelim[29];
+    unsigned           num_patches;
+    uint8_t            patch_num_subbands[6];
+    uint8_t            patch_start_subband[6];
+    ///QMF low frequency input to the HF generator
+    float              X_low[32][40][2];
+    ///QMF output of the HF generator
+    float              X_high[64][40][2];
+    ///QMF values of the reconstructed signal
+    DECLARE_ALIGNED(16, float, X)[2][32][64];
+    ///Zeroth coefficient used to filter the subband signals
+    float              alpha0[64][2];
+    ///First coefficient used to filter the subband signals
+    float              alpha1[64][2];
+    ///Dequantized envelope scalefactors, remapped
+    float              e_origmapped[7][48];
+    ///Dequantized noise scalefactors, remapped
+    float              q_mapped[7][48];
+    ///Sinusoidal presence, remapped
+    uint8_t            s_mapped[7][48];
+    ///Estimated envelope
+    float              e_curr[7][48];
+    ///Amplitude adjusted noise scalefactors
+    float              q_m[7][48];
+    ///Sinusoidal levels
+    float              s_m[7][48];
+    float              gain[7][48];
+    DECLARE_ALIGNED(16, float, qmf_filter_scratch)[5][64];
+    RDFTContext        rdft;
+    FFTContext         mdct;
+} SpectralBandReplication;
+
+#endif /* AVCODEC_SBR_H */