diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 8d5ea77e8178d330acab363268957f0ba78984fc..4749d8c61cf135067f72feca094b50d6cc7a7db5 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -593,19 +593,18 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne const float lambda = s->lambda; const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f; const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda); - const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/100.f); - - if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) - return; + const float spread_threshold = NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f); + memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + int wstart = sce->ics.swb_offset[w*16]; for (g = 0; g < sce->ics.num_swb; g++) { int noise_sfi; float dist1 = 0.0f, dist2 = 0.0f, noise_amp; - float pns_energy = 0.0f, energy_ratio, dist_thresh; + float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh; float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f; const int start = sce->ics.swb_offset[w*16+g]; - const float freq = start*freq_mult; + const float freq = (start-wstart)*freq_mult; const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); if (freq < NOISE_LOW_LIMIT || avctx->cutoff && freq >= avctx->cutoff) continue; @@ -617,18 +616,22 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne } /* Ramps down at ~8000Hz and loosens the dist threshold */ - dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 1.27f); + dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 2.5f); - if (sce->zeroes[w*16+g] || spread < spread_threshold || - sfb_energy > threshold*thr_mult*freq_boost) { + /* zero and energy close to threshold usually means hole avoidance, + * we do want to remain avoiding holes with PNS + */ + if (((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.5f/freq_boost)) || spread < spread_threshold || + (sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost)) { sce->pns_ener[w*16+g] = sfb_energy; continue; } - noise_sfi = av_clip(roundf(log2f(sfb_energy)*2), -100, 155); /* Quantize */ + pns_tgt_energy = sfb_energy*spread*spread/sce->ics.group_len[w]; + noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */ noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { - float band_energy, scale; + float band_energy, scale, pns_senergy; const int start_c = sce->ics.swb_offset[(w+w2)*16+g]; band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; for (i = 0; i < sce->ics.swb_sizes[g]; i++) @@ -636,7 +639,8 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); scale = noise_amp/sqrtf(band_energy); s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]); - pns_energy += s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); + pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); + pns_energy += pns_senergy; abs_pow34_v(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]); abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]); dist1 += quantize_band_cost(s, &sce->coeffs[start_c], @@ -645,23 +649,14 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne sce->sf_idx[(w+w2)*16+g], sce->band_alt[(w+w2)*16+g], lambda/band->threshold, INFINITY, NULL, 0); - dist2 += quantize_band_cost(s, PNS, - PNS34, - sce->ics.swb_sizes[g], - noise_sfi, - NOISE_BT, - lambda/band->threshold, INFINITY, NULL, 0); + /* Estimate rd on average as 9 bits for CB and sf + spread energy * lambda/thr */ + dist2 += 9+band->energy/(band->spread*band->spread)*lambda/band->threshold; } - energy_ratio = sfb_energy/pns_energy; /* Compensates for quantization error */ - sce->pns_ener[w*16+g] = energy_ratio*sfb_energy; - if (energy_ratio > 0.85f && energy_ratio < 1.25f && dist1/dist2 > dist_thresh) { + energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */ + sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy; + if (energy_ratio > 0.85f && energy_ratio < 1.25f && (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || dist2*dist_thresh < dist1)) { sce->band_type[w*16+g] = NOISE_BT; sce->zeroes[w*16+g] = 0; - if (sce->band_type[w*16+g-1] != NOISE_BT && /* Prevent holes */ - sce->band_type[w*16+g-2] == NOISE_BT) { - sce->band_type[w*16+g-1] = NOISE_BT; - sce->zeroes[w*16+g-1] = 0; - } } } } diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak index 8e9c91507f91ee11d0b7be1e1b97104872beb5bf..d6a355e45b0e69bd6cee88e407f2d7a0d6537402 100644 --- a/tests/fate/aac.mak +++ b/tests/fate/aac.mak @@ -174,7 +174,7 @@ fate-aac-pns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.w fate-aac-pns-encode: CMP_SHIFT = -4096 fate-aac-pns-encode: CMP_TARGET = 623.77 fate-aac-pns-encode: SIZE_TOLERANCE = 3560 -fate-aac-pns-encode: FUZZ = 1 +fate-aac-pns-encode: FUZZ = 25 FATE_AAC_ENCODE += fate-aac-tns-encode fate-aac-tns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_tns 1 -aac_is 0 -aac_pns 0 -b:a 128k