libavcodec/wmavoice.c
Go to the documentation of this file.
00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #define UNCHECKED_BITSTREAM_READER 1
00029 
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "get_bits.h"
00033 #include "put_bits.h"
00034 #include "wmavoice_data.h"
00035 #include "celp_math.h"
00036 #include "celp_filters.h"
00037 #include "acelp_vectors.h"
00038 #include "acelp_filters.h"
00039 #include "lsp.h"
00040 #include "libavutil/lzo.h"
00041 #include "dct.h"
00042 #include "rdft.h"
00043 #include "sinewin.h"
00044 
00045 #define MAX_BLOCKS           8   ///< maximum number of blocks per frame
00046 #define MAX_LSPS             16  ///< maximum filter order
00047 #define MAX_LSPS_ALIGN16     16  ///< same as #MAX_LSPS; needs to be multiple
00048 
00049 #define MAX_FRAMES           3   ///< maximum number of frames per superframe
00050 #define MAX_FRAMESIZE        160 ///< maximum number of samples per frame
00051 #define MAX_SIGNAL_HISTORY   416 ///< maximum excitation signal history
00052 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00053 
00054 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
00055 
00056 #define VLC_NBITS            6   ///< number of bits to read per VLC iteration
00057 
00061 static VLC frame_type_vlc;
00062 
00066 enum {
00067     ACB_TYPE_NONE       = 0, 
00068     ACB_TYPE_ASYMMETRIC = 1, 
00069 
00070 
00071 
00072 
00073     ACB_TYPE_HAMMING    = 2  
00074 
00075 
00076 };
00077 
00081 enum {
00082     FCB_TYPE_SILENCE    = 0, 
00083 
00084 
00085     FCB_TYPE_HARDCODED  = 1, 
00086 
00087     FCB_TYPE_AW_PULSES  = 2, 
00088 
00089     FCB_TYPE_EXC_PULSES = 3, 
00090 
00091 
00092 };
00093 
00097 static const struct frame_type_desc {
00098     uint8_t n_blocks;     
00099 
00100     uint8_t log_n_blocks; 
00101     uint8_t acb_type;     
00102     uint8_t fcb_type;     
00103     uint8_t dbl_pulses;   
00104 
00105 
00106     uint16_t frame_size;  
00107 
00108 } frame_descs[17] = {
00109     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00110     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00111     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00112     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00113     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00114     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00115     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00116     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00117     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00118     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00119     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00120     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00121     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00122     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00123     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00124     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00125     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00126 };
00127 
00131 typedef struct {
00136     AVFrame frame;
00137     GetBitContext gb;             
00138 
00139 
00140 
00141     int8_t vbm_tree[25];          
00142 
00143     int spillover_bitsize;        
00144 
00145 
00146     int history_nsamples;         
00147 
00148 
00149     /* postfilter specific values */
00150     int do_apf;                   
00151 
00152     int denoise_strength;         
00153 
00154     int denoise_tilt_corr;        
00155 
00156     int dc_level;                 
00157 
00158 
00159     int lsps;                     
00160     int lsp_q_mode;               
00161     int lsp_def_mode;             
00162 
00163     int frame_lsp_bitsize;        
00164 
00165     int sframe_lsp_bitsize;       
00166 
00167 
00168     int min_pitch_val;            
00169     int max_pitch_val;            
00170     int pitch_nbits;              
00171 
00172     int block_pitch_nbits;        
00173 
00174     int block_pitch_range;        
00175     int block_delta_pitch_nbits;  
00176 
00177 
00178 
00179     int block_delta_pitch_hrange; 
00180 
00181     uint16_t block_conv_table[4]; 
00182 
00183 
00193     int spillover_nbits;          
00194 
00195 
00196 
00197     int has_residual_lsps;        
00198 
00199 
00200 
00201 
00202     int skip_bits_next;           
00203 
00204 
00205 
00206     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00209     int sframe_cache_size;        
00210 
00211 
00212 
00213 
00214     PutBitContext pb;             
00215 
00225     double prev_lsps[MAX_LSPS];   
00226 
00227     int last_pitch_val;           
00228     int last_acb_type;            
00229     int pitch_diff_sh16;          
00230 
00231     float silence_gain;           
00232 
00233     int aw_idx_is_ext;            
00234 
00235     int aw_pulse_range;           
00236 
00237 
00238 
00239 
00240 
00241     int aw_n_pulses[2];           
00242 
00243 
00244     int aw_first_pulse_off[2];    
00245 
00246     int aw_next_pulse_off_cache;  
00247 
00248 
00249 
00250 
00251 
00252     int frame_cntr;               
00253 
00254     float gain_pred_err[6];       
00255     float excitation_history[MAX_SIGNAL_HISTORY];
00259     float synth_history[MAX_LSPS]; 
00260 
00269     RDFTContext rdft, irdft;      
00270 
00271     DCTContext dct, dst;          
00272 
00273     float sin[511], cos[511];     
00274 
00275     float postfilter_agc;         
00276 
00277     float dcf_mem[2];             
00278     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00281     float denoise_filter_cache[MAX_FRAMESIZE];
00282     int   denoise_filter_cache_size; 
00283     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00285     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00287     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00290 
00293 } WMAVoiceContext;
00294 
00304 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00305 {
00306     static const uint8_t bits[] = {
00307          2,  2,  2,  4,  4,  4,
00308          6,  6,  6,  8,  8,  8,
00309         10, 10, 10, 12, 12, 12,
00310         14, 14, 14, 14
00311     };
00312     static const uint16_t codes[] = {
00313           0x0000, 0x0001, 0x0002,        //              00/01/10
00314           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00315           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00316           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00317           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00318           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00319           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00320     };
00321     int cntr[8], n, res;
00322 
00323     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00324     memset(cntr,     0,    sizeof(cntr));
00325     for (n = 0; n < 17; n++) {
00326         res = get_bits(gb, 3);
00327         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00328             return -1;
00329         vbm_tree[res * 3 + cntr[res]++] = n;
00330     }
00331     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00332                     bits, 1, 1, codes, 2, 2, 132);
00333     return 0;
00334 }
00335 
00339 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00340 {
00341     int n, flags, pitch_range, lsp16_flag;
00342     WMAVoiceContext *s = ctx->priv_data;
00343 
00352     if (ctx->extradata_size != 46) {
00353         av_log(ctx, AV_LOG_ERROR,
00354                "Invalid extradata size %d (should be 46)\n",
00355                ctx->extradata_size);
00356         return -1;
00357     }
00358     flags                = AV_RL32(ctx->extradata + 18);
00359     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00360     s->do_apf            =    flags & 0x1;
00361     if (s->do_apf) {
00362         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00363         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00364         ff_dct_init(&s->dct,  6, DCT_I);
00365         ff_dct_init(&s->dst,  6, DST_I);
00366 
00367         ff_sine_window_init(s->cos, 256);
00368         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00369         for (n = 0; n < 255; n++) {
00370             s->sin[n]       = -s->sin[510 - n];
00371             s->cos[510 - n] =  s->cos[n];
00372         }
00373     }
00374     s->denoise_strength  =   (flags >> 2) & 0xF;
00375     if (s->denoise_strength >= 12) {
00376         av_log(ctx, AV_LOG_ERROR,
00377                "Invalid denoise filter strength %d (max=11)\n",
00378                s->denoise_strength);
00379         return -1;
00380     }
00381     s->denoise_tilt_corr = !!(flags & 0x40);
00382     s->dc_level          =   (flags >> 7) & 0xF;
00383     s->lsp_q_mode        = !!(flags & 0x2000);
00384     s->lsp_def_mode      = !!(flags & 0x4000);
00385     lsp16_flag           =    flags & 0x1000;
00386     if (lsp16_flag) {
00387         s->lsps               = 16;
00388         s->frame_lsp_bitsize  = 34;
00389         s->sframe_lsp_bitsize = 60;
00390     } else {
00391         s->lsps               = 10;
00392         s->frame_lsp_bitsize  = 24;
00393         s->sframe_lsp_bitsize = 48;
00394     }
00395     for (n = 0; n < s->lsps; n++)
00396         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00397 
00398     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00399     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00400         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00401         return -1;
00402     }
00403 
00404     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00405     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00406     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00407     if (pitch_range <= 0) {
00408         av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00409         return -1;
00410     }
00411     s->pitch_nbits      = av_ceil_log2(pitch_range);
00412     s->last_pitch_val   = 40;
00413     s->last_acb_type    = ACB_TYPE_NONE;
00414     s->history_nsamples = s->max_pitch_val + 8;
00415 
00416     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00417         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00418             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00419 
00420         av_log(ctx, AV_LOG_ERROR,
00421                "Unsupported samplerate %d (min=%d, max=%d)\n",
00422                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00423 
00424         return -1;
00425     }
00426 
00427     s->block_conv_table[0]      = s->min_pitch_val;
00428     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00429     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00430     s->block_conv_table[3]      = s->max_pitch_val - 1;
00431     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00432     if (s->block_delta_pitch_hrange <= 0) {
00433         av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00434         return -1;
00435     }
00436     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00437     s->block_pitch_range        = s->block_conv_table[2] +
00438                                   s->block_conv_table[3] + 1 +
00439                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00440     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00441 
00442     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00443 
00444     avcodec_get_frame_defaults(&s->frame);
00445     ctx->coded_frame = &s->frame;
00446 
00447     return 0;
00448 }
00449 
00471 static void adaptive_gain_control(float *out, const float *in,
00472                                   const float *speech_synth,
00473                                   int size, float alpha, float *gain_mem)
00474 {
00475     int i;
00476     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00477     float mem = *gain_mem;
00478 
00479     for (i = 0; i < size; i++) {
00480         speech_energy     += fabsf(speech_synth[i]);
00481         postfilter_energy += fabsf(in[i]);
00482     }
00483     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00484 
00485     for (i = 0; i < size; i++) {
00486         mem = alpha * mem + gain_scale_factor;
00487         out[i] = in[i] * mem;
00488     }
00489 
00490     *gain_mem = mem;
00491 }
00492 
00511 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00512                            const float *in, float *out, int size)
00513 {
00514     int n;
00515     float optimal_gain = 0, dot;
00516     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00517                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00518                 *best_hist_ptr;
00519 
00520     /* find best fitting point in history */
00521     do {
00522         dot = ff_dot_productf(in, ptr, size);
00523         if (dot > optimal_gain) {
00524             optimal_gain  = dot;
00525             best_hist_ptr = ptr;
00526         }
00527     } while (--ptr >= end);
00528 
00529     if (optimal_gain <= 0)
00530         return -1;
00531     dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00532     if (dot <= 0) // would be 1.0
00533         return -1;
00534 
00535     if (optimal_gain <= dot) {
00536         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00537     } else
00538         dot = 0.625;
00539 
00540     /* actual smoothing */
00541     for (n = 0; n < size; n++)
00542         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00543 
00544     return 0;
00545 }
00546 
00557 static float tilt_factor(const float *lpcs, int n_lpcs)
00558 {
00559     float rh0, rh1;
00560 
00561     rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
00562     rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00563 
00564     return rh1 / rh0;
00565 }
00566 
00570 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00571                                 int fcb_type, float *coeffs, int remainder)
00572 {
00573     float last_coeff, min = 15.0, max = -15.0;
00574     float irange, angle_mul, gain_mul, range, sq;
00575     int n, idx;
00576 
00577     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00578     s->rdft.rdft_calc(&s->rdft, lpcs);
00579 #define log_range(var, assign) do { \
00580         float tmp = log10f(assign);  var = tmp; \
00581         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00582     } while (0)
00583     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00584     for (n = 1; n < 64; n++)
00585         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00586                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00587     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00588 #undef log_range
00589     range    = max - min;
00590     lpcs[64] = last_coeff;
00591 
00592     /* Now, use this spectrum to pick out these frequencies with higher
00593      * (relative) power/energy (which we then take to be "not noise"),
00594      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00595      * These frequencies will be maintained, while others ("noise") will be
00596      * decreased in the filter output. */
00597     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00598     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00599                                                           (5.0 / 14.7));
00600     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00601     for (n = 0; n <= 64; n++) {
00602         float pwr;
00603 
00604         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00605         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00606         lpcs[n] = angle_mul * pwr;
00607 
00608         /* 70.57 =~ 1/log10(1.0331663) */
00609         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00610         if (idx > 127) { // fallback if index falls outside table range
00611             coeffs[n] = wmavoice_energy_table[127] *
00612                         powf(1.0331663, idx - 127);
00613         } else
00614             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00615     }
00616 
00617     /* calculate the Hilbert transform of the gains, which we do (since this
00618      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00619      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00620      * "moment" of the LPCs in this filter. */
00621     s->dct.dct_calc(&s->dct, lpcs);
00622     s->dst.dct_calc(&s->dst, lpcs);
00623 
00624     /* Split out the coefficient indexes into phase/magnitude pairs */
00625     idx = 255 + av_clip(lpcs[64],               -255, 255);
00626     coeffs[0]  = coeffs[0]  * s->cos[idx];
00627     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00628     last_coeff = coeffs[64] * s->cos[idx];
00629     for (n = 63;; n--) {
00630         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00633 
00634         if (!--n) break;
00635 
00636         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00637         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00638         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00639     }
00640     coeffs[1] = last_coeff;
00641 
00642     /* move into real domain */
00643     s->irdft.rdft_calc(&s->irdft, coeffs);
00644 
00645     /* tilt correction and normalize scale */
00646     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00647     if (s->denoise_tilt_corr) {
00648         float tilt_mem = 0;
00649 
00650         coeffs[remainder - 1] = 0;
00651         ff_tilt_compensation(&tilt_mem,
00652                              -1.8 * tilt_factor(coeffs, remainder - 1),
00653                              coeffs, remainder);
00654     }
00655     sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00656     for (n = 0; n < remainder; n++)
00657         coeffs[n] *= sq;
00658 }
00659 
00686 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00687                            float *synth_pf, int size,
00688                            const float *lpcs)
00689 {
00690     int remainder, lim, n;
00691 
00692     if (fcb_type != FCB_TYPE_SILENCE) {
00693         float *tilted_lpcs = s->tilted_lpcs_pf,
00694               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00695 
00696         tilted_lpcs[0]           = 1.0;
00697         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00698         memset(&tilted_lpcs[s->lsps + 1], 0,
00699                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00700         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00701                              tilted_lpcs, s->lsps + 2);
00702 
00703         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00704          * size is applied to the next frame. All input beyond this is zero,
00705          * and thus all output beyond this will go towards zero, hence we can
00706          * limit to min(size-1, 127-size) as a performance consideration. */
00707         remainder = FFMIN(127 - size, size - 1);
00708         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00709 
00710         /* apply coefficients (in frequency spectrum domain), i.e. complex
00711          * number multiplication */
00712         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00713         s->rdft.rdft_calc(&s->rdft, synth_pf);
00714         s->rdft.rdft_calc(&s->rdft, coeffs);
00715         synth_pf[0] *= coeffs[0];
00716         synth_pf[1] *= coeffs[1];
00717         for (n = 1; n < 64; n++) {
00718             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00719             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00720             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00721         }
00722         s->irdft.rdft_calc(&s->irdft, synth_pf);
00723     }
00724 
00725     /* merge filter output with the history of previous runs */
00726     if (s->denoise_filter_cache_size) {
00727         lim = FFMIN(s->denoise_filter_cache_size, size);
00728         for (n = 0; n < lim; n++)
00729             synth_pf[n] += s->denoise_filter_cache[n];
00730         s->denoise_filter_cache_size -= lim;
00731         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00732                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00733     }
00734 
00735     /* move remainder of filter output into a cache for future runs */
00736     if (fcb_type != FCB_TYPE_SILENCE) {
00737         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00738         for (n = 0; n < lim; n++)
00739             s->denoise_filter_cache[n] += synth_pf[size + n];
00740         if (lim < remainder) {
00741             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00742                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00743             s->denoise_filter_cache_size = remainder;
00744         }
00745     }
00746 }
00747 
00768 static void postfilter(WMAVoiceContext *s, const float *synth,
00769                        float *samples,    int size,
00770                        const float *lpcs, float *zero_exc_pf,
00771                        int fcb_type,      int pitch)
00772 {
00773     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00774           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00775           *synth_filter_in = zero_exc_pf;
00776 
00777     assert(size <= MAX_FRAMESIZE / 2);
00778 
00779     /* generate excitation from input signal */
00780     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00781 
00782     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00783         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00784         synth_filter_in = synth_filter_in_buf;
00785 
00786     /* re-synthesize speech after smoothening, and keep history */
00787     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00788                                  synth_filter_in, size, s->lsps);
00789     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00790            sizeof(synth_pf[0]) * s->lsps);
00791 
00792     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00793 
00794     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00795                           &s->postfilter_agc);
00796 
00797     if (s->dc_level > 8) {
00798         /* remove ultra-low frequency DC noise / highpass filter;
00799          * coefficients are identical to those used in SIPR decoding,
00800          * and very closely resemble those used in AMR-NB decoding. */
00801         ff_acelp_apply_order_2_transfer_function(samples, samples,
00802             (const float[2]) { -1.99997,      1.0 },
00803             (const float[2]) { -1.9330735188, 0.93589198496 },
00804             0.93980580475, s->dcf_mem, size);
00805     }
00806 }
00822 static void dequant_lsps(double *lsps, int num,
00823                          const uint16_t *values,
00824                          const uint16_t *sizes,
00825                          int n_stages, const uint8_t *table,
00826                          const double *mul_q,
00827                          const double *base_q)
00828 {
00829     int n, m;
00830 
00831     memset(lsps, 0, num * sizeof(*lsps));
00832     for (n = 0; n < n_stages; n++) {
00833         const uint8_t *t_off = &table[values[n] * num];
00834         double base = base_q[n], mul = mul_q[n];
00835 
00836         for (m = 0; m < num; m++)
00837             lsps[m] += base + mul * t_off[m];
00838 
00839         table += sizes[n] * num;
00840     }
00841 }
00842 
00854 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00855 {
00856     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00857     static const double mul_lsf[4] = {
00858         5.2187144800e-3,    1.4626986422e-3,
00859         9.6179549166e-4,    1.1325736225e-3
00860     };
00861     static const double base_lsf[4] = {
00862         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00863         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00864     };
00865     uint16_t v[4];
00866 
00867     v[0] = get_bits(gb, 8);
00868     v[1] = get_bits(gb, 6);
00869     v[2] = get_bits(gb, 5);
00870     v[3] = get_bits(gb, 5);
00871 
00872     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00873                  mul_lsf, base_lsf);
00874 }
00875 
00880 static void dequant_lsp10r(GetBitContext *gb,
00881                            double *i_lsps, const double *old,
00882                            double *a1, double *a2, int q_mode)
00883 {
00884     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00885     static const double mul_lsf[3] = {
00886         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00887     };
00888     static const double base_lsf[3] = {
00889         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00890     };
00891     const float (*ipol_tab)[2][10] = q_mode ?
00892         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00893     uint16_t interpol, v[3];
00894     int n;
00895 
00896     dequant_lsp10i(gb, i_lsps);
00897 
00898     interpol = get_bits(gb, 5);
00899     v[0]     = get_bits(gb, 7);
00900     v[1]     = get_bits(gb, 6);
00901     v[2]     = get_bits(gb, 6);
00902 
00903     for (n = 0; n < 10; n++) {
00904         double delta = old[n] - i_lsps[n];
00905         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00906         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00907     }
00908 
00909     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00910                  mul_lsf, base_lsf);
00911 }
00912 
00916 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00917 {
00918     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00919     static const double mul_lsf[5] = {
00920         3.3439586280e-3,    6.9908173703e-4,
00921         3.3216608306e-3,    1.0334960326e-3,
00922         3.1899104283e-3
00923     };
00924     static const double base_lsf[5] = {
00925         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00926         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00927         M_PI * -1.29816e-1
00928     };
00929     uint16_t v[5];
00930 
00931     v[0] = get_bits(gb, 8);
00932     v[1] = get_bits(gb, 6);
00933     v[2] = get_bits(gb, 7);
00934     v[3] = get_bits(gb, 6);
00935     v[4] = get_bits(gb, 7);
00936 
00937     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00938                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00939     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00940                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00941     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00942                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00943 }
00944 
00949 static void dequant_lsp16r(GetBitContext *gb,
00950                            double *i_lsps, const double *old,
00951                            double *a1, double *a2, int q_mode)
00952 {
00953     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00954     static const double mul_lsf[3] = {
00955         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00956     };
00957     static const double base_lsf[3] = {
00958         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00959     };
00960     const float (*ipol_tab)[2][16] = q_mode ?
00961         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00962     uint16_t interpol, v[3];
00963     int n;
00964 
00965     dequant_lsp16i(gb, i_lsps);
00966 
00967     interpol = get_bits(gb, 5);
00968     v[0]     = get_bits(gb, 7);
00969     v[1]     = get_bits(gb, 7);
00970     v[2]     = get_bits(gb, 7);
00971 
00972     for (n = 0; n < 16; n++) {
00973         double delta = old[n] - i_lsps[n];
00974         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00975         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00976     }
00977 
00978     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00979                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00980     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00981                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00982     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00983                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00984 }
00985 
00999 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01000                             const int *pitch)
01001 {
01002     static const int16_t start_offset[94] = {
01003         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
01004          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
01005          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
01006          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
01007          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
01008          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
01009         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01010         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01011     };
01012     int bits, offset;
01013 
01014     /* position of pulse */
01015     s->aw_idx_is_ext = 0;
01016     if ((bits = get_bits(gb, 6)) >= 54) {
01017         s->aw_idx_is_ext = 1;
01018         bits += (bits - 54) * 3 + get_bits(gb, 2);
01019     }
01020 
01021     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01022      * the distribution of the pulses in each block contained in this frame. */
01023     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01024     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01025     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01026     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01027     offset                  += s->aw_n_pulses[0] * pitch[0];
01028     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01029     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01030 
01031     /* if continuing from a position before the block, reset position to
01032      * start of block (when corrected for the range over which it can be
01033      * spread in aw_pulse_set1()). */
01034     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01035         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01036             s->aw_first_pulse_off[1] -= pitch[1];
01037         if (start_offset[bits] < 0)
01038             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01039                 s->aw_first_pulse_off[0] -= pitch[0];
01040     }
01041 }
01042 
01050 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01051                           int block_idx, AMRFixed *fcb)
01052 {
01053     uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
01054     uint16_t *use_mask = use_mask_mem + 2;
01055     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01056      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01057      * of idx are the position of the bit within a particular item in the
01058      * array (0 being the most significant bit, and 15 being the least
01059      * significant bit), and the remainder (>> 4) is the index in the
01060      * use_mask[]-array. This is faster and uses less memory than using a
01061      * 80-byte/80-int array. */
01062     int pulse_off = s->aw_first_pulse_off[block_idx],
01063         pulse_start, n, idx, range, aidx, start_off = 0;
01064 
01065     /* set offset of first pulse to within this block */
01066     if (s->aw_n_pulses[block_idx] > 0)
01067         while (pulse_off + s->aw_pulse_range < 1)
01068             pulse_off += fcb->pitch_lag;
01069 
01070     /* find range per pulse */
01071     if (s->aw_n_pulses[0] > 0) {
01072         if (block_idx == 0) {
01073             range = 32;
01074         } else /* block_idx = 1 */ {
01075             range = 8;
01076             if (s->aw_n_pulses[block_idx] > 0)
01077                 pulse_off = s->aw_next_pulse_off_cache;
01078         }
01079     } else
01080         range = 16;
01081     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01082 
01083     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01084      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01085      * we exclude that range from being pulsed again in this function. */
01086     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01087     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01088     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01089     if (s->aw_n_pulses[block_idx] > 0)
01090         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01091             int excl_range         = s->aw_pulse_range; // always 16 or 24
01092             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01093             int first_sh           = 16 - (idx & 15);
01094             *use_mask_ptr++       &= 0xFFFFu << first_sh;
01095             excl_range            -= first_sh;
01096             if (excl_range >= 16) {
01097                 *use_mask_ptr++    = 0;
01098                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01099             } else
01100                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01101         }
01102 
01103     /* find the 'aidx'th offset that is not excluded */
01104     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01105     for (n = 0; n <= aidx; pulse_start++) {
01106         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01107         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01108             if (use_mask[0])      idx = 0x0F;
01109             else if (use_mask[1]) idx = 0x1F;
01110             else if (use_mask[2]) idx = 0x2F;
01111             else if (use_mask[3]) idx = 0x3F;
01112             else if (use_mask[4]) idx = 0x4F;
01113             else                  return;
01114             idx -= av_log2_16bit(use_mask[idx >> 4]);
01115         }
01116         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01117             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01118             n++;
01119             start_off = idx;
01120         }
01121     }
01122 
01123     fcb->x[fcb->n] = start_off;
01124     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01125     fcb->n++;
01126 
01127     /* set offset for next block, relative to start of that block */
01128     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01129     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01130 }
01131 
01139 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01140                           int block_idx, AMRFixed *fcb)
01141 {
01142     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01143     float v;
01144 
01145     if (s->aw_n_pulses[block_idx] > 0) {
01146         int n, v_mask, i_mask, sh, n_pulses;
01147 
01148         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01149             n_pulses = 3;
01150             v_mask   = 8;
01151             i_mask   = 7;
01152             sh       = 4;
01153         } else { // 4 pulses, 1:sign + 2:index each
01154             n_pulses = 4;
01155             v_mask   = 4;
01156             i_mask   = 3;
01157             sh       = 3;
01158         }
01159 
01160         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01161             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01162             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01163                                  s->aw_first_pulse_off[block_idx];
01164             while (fcb->x[fcb->n] < 0)
01165                 fcb->x[fcb->n] += fcb->pitch_lag;
01166             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01167                 fcb->n++;
01168         }
01169     } else {
01170         int num2 = (val & 0x1FF) >> 1, delta, idx;
01171 
01172         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01173         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01174         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01175         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01176         v = (val & 0x200) ? -1.0 : 1.0;
01177 
01178         fcb->no_repeat_mask |= 3 << fcb->n;
01179         fcb->x[fcb->n]       = idx - delta;
01180         fcb->y[fcb->n]       = v;
01181         fcb->x[fcb->n + 1]   = idx;
01182         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01183         fcb->n              += 2;
01184     }
01185 }
01186 
01200 static int pRNG(int frame_cntr, int block_num, int block_size)
01201 {
01202     /* array to simplify the calculation of z:
01203      * y = (x % 9) * 5 + 6;
01204      * z = (49995 * x) / y;
01205      * Since y only has 9 values, we can remove the division by using a
01206      * LUT and using FASTDIV-style divisions. For each of the 9 values
01207      * of y, we can rewrite z as:
01208      * z = x * (49995 / y) + x * ((49995 % y) / y)
01209      * In this table, each col represents one possible value of y, the
01210      * first number is 49995 / y, and the second is the FASTDIV variant
01211      * of 49995 % y / y. */
01212     static const unsigned int div_tbl[9][2] = {
01213         { 8332,  3 * 715827883U }, // y =  6
01214         { 4545,  0 * 390451573U }, // y = 11
01215         { 3124, 11 * 268435456U }, // y = 16
01216         { 2380, 15 * 204522253U }, // y = 21
01217         { 1922, 23 * 165191050U }, // y = 26
01218         { 1612, 23 * 138547333U }, // y = 31
01219         { 1388, 27 * 119304648U }, // y = 36
01220         { 1219, 16 * 104755300U }, // y = 41
01221         { 1086, 39 *  93368855U }  // y = 46
01222     };
01223     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01224     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01225                                     // so this is effectively a modulo (%)
01226     y = x - 9 * MULH(477218589, x); // x % 9
01227     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01228                                     // z = x * 49995 / (y * 5 + 6)
01229     return z % (1000 - block_size);
01230 }
01231 
01236 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01237                                  int block_idx, int size,
01238                                  const struct frame_type_desc *frame_desc,
01239                                  float *excitation)
01240 {
01241     float gain;
01242     int n, r_idx;
01243 
01244     assert(size <= MAX_FRAMESIZE);
01245 
01246     /* Set the offset from which we start reading wmavoice_std_codebook */
01247     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01248         r_idx = pRNG(s->frame_cntr, block_idx, size);
01249         gain  = s->silence_gain;
01250     } else /* FCB_TYPE_HARDCODED */ {
01251         r_idx = get_bits(gb, 8);
01252         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01253     }
01254 
01255     /* Clear gain prediction parameters */
01256     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01257 
01258     /* Apply gain to hardcoded codebook and use that as excitation signal */
01259     for (n = 0; n < size; n++)
01260         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01261 }
01262 
01267 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01268                                 int block_idx, int size,
01269                                 int block_pitch_sh2,
01270                                 const struct frame_type_desc *frame_desc,
01271                                 float *excitation)
01272 {
01273     static const float gain_coeff[6] = {
01274         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01275     };
01276     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01277     int n, idx, gain_weight;
01278     AMRFixed fcb;
01279 
01280     assert(size <= MAX_FRAMESIZE / 2);
01281     memset(pulses, 0, sizeof(*pulses) * size);
01282 
01283     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01284     fcb.pitch_fac      = 1.0;
01285     fcb.no_repeat_mask = 0;
01286     fcb.n              = 0;
01287 
01288     /* For the other frame types, this is where we apply the innovation
01289      * (fixed) codebook pulses of the speech signal. */
01290     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01291         aw_pulse_set1(s, gb, block_idx, &fcb);
01292         aw_pulse_set2(s, gb, block_idx, &fcb);
01293     } else /* FCB_TYPE_EXC_PULSES */ {
01294         int offset_nbits = 5 - frame_desc->log_n_blocks;
01295 
01296         fcb.no_repeat_mask = -1;
01297         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01298          * (instead of double) for a subset of pulses */
01299         for (n = 0; n < 5; n++) {
01300             float sign;
01301             int pos1, pos2;
01302 
01303             sign           = get_bits1(gb) ? 1.0 : -1.0;
01304             pos1           = get_bits(gb, offset_nbits);
01305             fcb.x[fcb.n]   = n + 5 * pos1;
01306             fcb.y[fcb.n++] = sign;
01307             if (n < frame_desc->dbl_pulses) {
01308                 pos2           = get_bits(gb, offset_nbits);
01309                 fcb.x[fcb.n]   = n + 5 * pos2;
01310                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01311             }
01312         }
01313     }
01314     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01315 
01316     /* Calculate gain for adaptive & fixed codebook signal.
01317      * see ff_amr_set_fixed_gain(). */
01318     idx = get_bits(gb, 7);
01319     fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01320                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01321     acb_gain = wmavoice_gain_codebook_acb[idx];
01322     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01323                         -2.9957322736 /* log(0.05) */,
01324                          1.6094379124 /* log(5.0)  */);
01325 
01326     gain_weight = 8 >> frame_desc->log_n_blocks;
01327     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01328             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01329     for (n = 0; n < gain_weight; n++)
01330         s->gain_pred_err[n] = pred_err;
01331 
01332     /* Calculation of adaptive codebook */
01333     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01334         int len;
01335         for (n = 0; n < size; n += len) {
01336             int next_idx_sh16;
01337             int abs_idx    = block_idx * size + n;
01338             int pitch_sh16 = (s->last_pitch_val << 16) +
01339                              s->pitch_diff_sh16 * abs_idx;
01340             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01341             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01342             idx            = idx_sh16 >> 16;
01343             if (s->pitch_diff_sh16) {
01344                 if (s->pitch_diff_sh16 > 0) {
01345                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01346                 } else
01347                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01348                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01349                               1, size - n);
01350             } else
01351                 len = size;
01352 
01353             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01354                                   wmavoice_ipol1_coeffs, 17,
01355                                   idx, 9, len);
01356         }
01357     } else /* ACB_TYPE_HAMMING */ {
01358         int block_pitch = block_pitch_sh2 >> 2;
01359         idx             = block_pitch_sh2 & 3;
01360         if (idx) {
01361             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01362                                   wmavoice_ipol2_coeffs, 4,
01363                                   idx, 8, size);
01364         } else
01365             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01366                               sizeof(float) * size);
01367     }
01368 
01369     /* Interpolate ACB/FCB and use as excitation signal */
01370     ff_weighted_vector_sumf(excitation, excitation, pulses,
01371                             acb_gain, fcb_gain, size);
01372 }
01373 
01390 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01391                         int block_idx, int size,
01392                         int block_pitch_sh2,
01393                         const double *lsps, const double *prev_lsps,
01394                         const struct frame_type_desc *frame_desc,
01395                         float *excitation, float *synth)
01396 {
01397     double i_lsps[MAX_LSPS];
01398     float lpcs[MAX_LSPS];
01399     float fac;
01400     int n;
01401 
01402     if (frame_desc->acb_type == ACB_TYPE_NONE)
01403         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01404     else
01405         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01406                             frame_desc, excitation);
01407 
01408     /* convert interpolated LSPs to LPCs */
01409     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01410     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01411         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01412     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01413 
01414     /* Speech synthesis */
01415     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01416 }
01417 
01433 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01434                        float *samples,
01435                        const double *lsps, const double *prev_lsps,
01436                        float *excitation, float *synth)
01437 {
01438     WMAVoiceContext *s = ctx->priv_data;
01439     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01440     int pitch[MAX_BLOCKS], last_block_pitch;
01441 
01442     /* Parse frame type ("frame header"), see frame_descs */
01443     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01444         block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01445 
01446     if (bd_idx < 0) {
01447         av_log(ctx, AV_LOG_ERROR,
01448                "Invalid frame type VLC code, skipping\n");
01449         return -1;
01450     }
01451 
01452     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01453     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01454         /* Pitch is provided per frame, which is interpreted as the pitch of
01455          * the last sample of the last block of this frame. We can interpolate
01456          * the pitch of other blocks (and even pitch-per-sample) by gradually
01457          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01458         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01459         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01460         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01461         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01462         if (s->last_acb_type == ACB_TYPE_NONE ||
01463             20 * abs(cur_pitch_val - s->last_pitch_val) >
01464                 (cur_pitch_val + s->last_pitch_val))
01465             s->last_pitch_val = cur_pitch_val;
01466 
01467         /* pitch per block */
01468         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01469             int fac = n * 2 + 1;
01470 
01471             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01472                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01473                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01474         }
01475 
01476         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01477         s->pitch_diff_sh16 =
01478             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01479     }
01480 
01481     /* Global gain (if silence) and pitch-adaptive window coordinates */
01482     switch (frame_descs[bd_idx].fcb_type) {
01483     case FCB_TYPE_SILENCE:
01484         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01485         break;
01486     case FCB_TYPE_AW_PULSES:
01487         aw_parse_coords(s, gb, pitch);
01488         break;
01489     }
01490 
01491     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01492         int bl_pitch_sh2;
01493 
01494         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01495         switch (frame_descs[bd_idx].acb_type) {
01496         case ACB_TYPE_HAMMING: {
01497             /* Pitch is given per block. Per-block pitches are encoded as an
01498              * absolute value for the first block, and then delta values
01499              * relative to this value) for all subsequent blocks. The scale of
01500              * this pitch value is semi-logaritmic compared to its use in the
01501              * decoder, so we convert it to normal scale also. */
01502             int block_pitch,
01503                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01504                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01505                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01506 
01507             if (n == 0) {
01508                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01509             } else
01510                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01511                                  get_bits(gb, s->block_delta_pitch_nbits);
01512             /* Convert last_ so that any next delta is within _range */
01513             last_block_pitch = av_clip(block_pitch,
01514                                        s->block_delta_pitch_hrange,
01515                                        s->block_pitch_range -
01516                                            s->block_delta_pitch_hrange);
01517 
01518             /* Convert semi-log-style scale back to normal scale */
01519             if (block_pitch < t1) {
01520                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01521             } else {
01522                 block_pitch -= t1;
01523                 if (block_pitch < t2) {
01524                     bl_pitch_sh2 =
01525                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01526                 } else {
01527                     block_pitch -= t2;
01528                     if (block_pitch < t3) {
01529                         bl_pitch_sh2 =
01530                             (s->block_conv_table[2] + block_pitch) << 2;
01531                     } else
01532                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01533                 }
01534             }
01535             pitch[n] = bl_pitch_sh2 >> 2;
01536             break;
01537         }
01538 
01539         case ACB_TYPE_ASYMMETRIC: {
01540             bl_pitch_sh2 = pitch[n] << 2;
01541             break;
01542         }
01543 
01544         default: // ACB_TYPE_NONE has no pitch
01545             bl_pitch_sh2 = 0;
01546             break;
01547         }
01548 
01549         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01550                     lsps, prev_lsps, &frame_descs[bd_idx],
01551                     &excitation[n * block_nsamples],
01552                     &synth[n * block_nsamples]);
01553     }
01554 
01555     /* Averaging projection filter, if applicable. Else, just copy samples
01556      * from synthesis buffer */
01557     if (s->do_apf) {
01558         double i_lsps[MAX_LSPS];
01559         float lpcs[MAX_LSPS];
01560 
01561         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01562             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01563         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01564         postfilter(s, synth, samples, 80, lpcs,
01565                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01566                    frame_descs[bd_idx].fcb_type, pitch[0]);
01567 
01568         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01569             i_lsps[n] = cos(lsps[n]);
01570         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01571         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01572                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01573                    frame_descs[bd_idx].fcb_type, pitch[0]);
01574     } else
01575         memcpy(samples, synth, 160 * sizeof(synth[0]));
01576 
01577     /* Cache values for next frame */
01578     s->frame_cntr++;
01579     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01580     s->last_acb_type = frame_descs[bd_idx].acb_type;
01581     switch (frame_descs[bd_idx].acb_type) {
01582     case ACB_TYPE_NONE:
01583         s->last_pitch_val = 0;
01584         break;
01585     case ACB_TYPE_ASYMMETRIC:
01586         s->last_pitch_val = cur_pitch_val;
01587         break;
01588     case ACB_TYPE_HAMMING:
01589         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01590         break;
01591     }
01592 
01593     return 0;
01594 }
01595 
01608 static void stabilize_lsps(double *lsps, int num)
01609 {
01610     int n, m, l;
01611 
01612     /* set minimum value for first, maximum value for last and minimum
01613      * spacing between LSF values.
01614      * Very similar to ff_set_min_dist_lsf(), but in double. */
01615     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01616     for (n = 1; n < num; n++)
01617         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01618     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01619 
01620     /* reorder (looks like one-time / non-recursed bubblesort).
01621      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01622     for (n = 1; n < num; n++) {
01623         if (lsps[n] < lsps[n - 1]) {
01624             for (m = 1; m < num; m++) {
01625                 double tmp = lsps[m];
01626                 for (l = m - 1; l >= 0; l--) {
01627                     if (lsps[l] <= tmp) break;
01628                     lsps[l + 1] = lsps[l];
01629                 }
01630                 lsps[l + 1] = tmp;
01631             }
01632             break;
01633         }
01634     }
01635 }
01636 
01646 static int check_bits_for_superframe(GetBitContext *orig_gb,
01647                                      WMAVoiceContext *s)
01648 {
01649     GetBitContext s_gb, *gb = &s_gb;
01650     int n, need_bits, bd_idx;
01651     const struct frame_type_desc *frame_desc;
01652 
01653     /* initialize a copy */
01654     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01655     skip_bits_long(gb, get_bits_count(orig_gb));
01656     assert(get_bits_left(gb) == get_bits_left(orig_gb));
01657 
01658     /* superframe header */
01659     if (get_bits_left(gb) < 14)
01660         return 1;
01661     if (!get_bits1(gb))
01662         return -1;                        // WMAPro-in-WMAVoice superframe
01663     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01664     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01665         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01666             return 1;
01667         skip_bits_long(gb, s->sframe_lsp_bitsize);
01668     }
01669 
01670     /* frames */
01671     for (n = 0; n < MAX_FRAMES; n++) {
01672         int aw_idx_is_ext = 0;
01673 
01674         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01675            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01676            skip_bits_long(gb, s->frame_lsp_bitsize);
01677         }
01678         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01679         if (bd_idx < 0)
01680             return -1;                   // invalid frame type VLC code
01681         frame_desc = &frame_descs[bd_idx];
01682         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01683             if (get_bits_left(gb) < s->pitch_nbits)
01684                 return 1;
01685             skip_bits_long(gb, s->pitch_nbits);
01686         }
01687         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01688             skip_bits(gb, 8);
01689         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01690             int tmp = get_bits(gb, 6);
01691             if (tmp >= 0x36) {
01692                 skip_bits(gb, 2);
01693                 aw_idx_is_ext = 1;
01694             }
01695         }
01696 
01697         /* blocks */
01698         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01699             need_bits = s->block_pitch_nbits +
01700                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01701         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01702             need_bits = 2 * !aw_idx_is_ext;
01703         } else
01704             need_bits = 0;
01705         need_bits += frame_desc->frame_size;
01706         if (get_bits_left(gb) < need_bits)
01707             return 1;
01708         skip_bits_long(gb, need_bits);
01709     }
01710 
01711     return 0;
01712 }
01713 
01734 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01735 {
01736     WMAVoiceContext *s = ctx->priv_data;
01737     GetBitContext *gb = &s->gb, s_gb;
01738     int n, res, n_samples = 480;
01739     double lsps[MAX_FRAMES][MAX_LSPS];
01740     const double *mean_lsf = s->lsps == 16 ?
01741         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01742     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01743     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01744     float *samples;
01745 
01746     memcpy(synth,      s->synth_history,
01747            s->lsps             * sizeof(*synth));
01748     memcpy(excitation, s->excitation_history,
01749            s->history_nsamples * sizeof(*excitation));
01750 
01751     if (s->sframe_cache_size > 0) {
01752         gb = &s_gb;
01753         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01754         s->sframe_cache_size = 0;
01755     }
01756 
01757     if ((res = check_bits_for_superframe(gb, s)) == 1) {
01758         *got_frame_ptr = 0;
01759         return 1;
01760     }
01761 
01762     /* First bit is speech/music bit, it differentiates between WMAVoice
01763      * speech samples (the actual codec) and WMAVoice music samples, which
01764      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01765      * the wild yet. */
01766     if (!get_bits1(gb)) {
01767         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01768         return -1;
01769     }
01770 
01771     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01772     if (get_bits1(gb)) {
01773         if ((n_samples = get_bits(gb, 12)) > 480) {
01774             av_log(ctx, AV_LOG_ERROR,
01775                    "Superframe encodes >480 samples (%d), not allowed\n",
01776                    n_samples);
01777             return -1;
01778         }
01779     }
01780     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01781     if (s->has_residual_lsps) {
01782         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01783 
01784         for (n = 0; n < s->lsps; n++)
01785             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01786 
01787         if (s->lsps == 10) {
01788             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01789         } else /* s->lsps == 16 */
01790             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01791 
01792         for (n = 0; n < s->lsps; n++) {
01793             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01794             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01795             lsps[2][n] += mean_lsf[n];
01796         }
01797         for (n = 0; n < 3; n++)
01798             stabilize_lsps(lsps[n], s->lsps);
01799     }
01800 
01801     /* get output buffer */
01802     s->frame.nb_samples = 480;
01803     if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01804         av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01805         return res;
01806     }
01807     s->frame.nb_samples = n_samples;
01808     samples = (float *)s->frame.data[0];
01809 
01810     /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
01811     for (n = 0; n < 3; n++) {
01812         if (!s->has_residual_lsps) {
01813             int m;
01814 
01815             if (s->lsps == 10) {
01816                 dequant_lsp10i(gb, lsps[n]);
01817             } else /* s->lsps == 16 */
01818                 dequant_lsp16i(gb, lsps[n]);
01819 
01820             for (m = 0; m < s->lsps; m++)
01821                 lsps[n][m] += mean_lsf[m];
01822             stabilize_lsps(lsps[n], s->lsps);
01823         }
01824 
01825         if ((res = synth_frame(ctx, gb, n,
01826                                &samples[n * MAX_FRAMESIZE],
01827                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01828                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01829                                &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01830             *got_frame_ptr = 0;
01831             return res;
01832         }
01833     }
01834 
01835     /* Statistics? FIXME - we don't check for length, a slight overrun
01836      * will be caught by internal buffer padding, and anything else
01837      * will be skipped, not read. */
01838     if (get_bits1(gb)) {
01839         res = get_bits(gb, 4);
01840         skip_bits(gb, 10 * (res + 1));
01841     }
01842 
01843     *got_frame_ptr = 1;
01844 
01845     /* Update history */
01846     memcpy(s->prev_lsps,           lsps[2],
01847            s->lsps             * sizeof(*s->prev_lsps));
01848     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01849            s->lsps             * sizeof(*synth));
01850     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01851            s->history_nsamples * sizeof(*excitation));
01852     if (s->do_apf)
01853         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01854                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01855 
01856     return 0;
01857 }
01858 
01866 static int parse_packet_header(WMAVoiceContext *s)
01867 {
01868     GetBitContext *gb = &s->gb;
01869     unsigned int res;
01870 
01871     if (get_bits_left(gb) < 11)
01872         return 1;
01873     skip_bits(gb, 4);          // packet sequence number
01874     s->has_residual_lsps = get_bits1(gb);
01875     do {
01876         res = get_bits(gb, 6); // number of superframes per packet
01877                                // (minus first one if there is spillover)
01878         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01879             return 1;
01880     } while (res == 0x3F);
01881     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01882 
01883     return 0;
01884 }
01885 
01901 static void copy_bits(PutBitContext *pb,
01902                       const uint8_t *data, int size,
01903                       GetBitContext *gb, int nbits)
01904 {
01905     int rmn_bytes, rmn_bits;
01906 
01907     rmn_bits = rmn_bytes = get_bits_left(gb);
01908     if (rmn_bits < nbits)
01909         return;
01910     if (nbits > pb->size_in_bits - put_bits_count(pb))
01911         return;
01912     rmn_bits &= 7; rmn_bytes >>= 3;
01913     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01914         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01915     avpriv_copy_bits(pb, data + size - rmn_bytes,
01916                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01917 }
01918 
01930 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01931                                   int *got_frame_ptr, AVPacket *avpkt)
01932 {
01933     WMAVoiceContext *s = ctx->priv_data;
01934     GetBitContext *gb = &s->gb;
01935     int size, res, pos;
01936 
01937     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01938      * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
01939      * feeds us ASF packets, which may concatenate multiple "codec" packets
01940      * in a single "muxer" packet, so we artificially emulate that by
01941      * capping the packet size at ctx->block_align. */
01942     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01943     if (!size) {
01944         *got_frame_ptr = 0;
01945         return 0;
01946     }
01947     init_get_bits(&s->gb, avpkt->data, size << 3);
01948 
01949     /* size == ctx->block_align is used to indicate whether we are dealing with
01950      * a new packet or a packet of which we already read the packet header
01951      * previously. */
01952     if (size == ctx->block_align) { // new packet header
01953         if ((res = parse_packet_header(s)) < 0)
01954             return res;
01955 
01956         /* If the packet header specifies a s->spillover_nbits, then we want
01957          * to push out all data of the previous packet (+ spillover) before
01958          * continuing to parse new superframes in the current packet. */
01959         if (s->spillover_nbits > 0) {
01960             if (s->sframe_cache_size > 0) {
01961                 int cnt = get_bits_count(gb);
01962                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01963                 flush_put_bits(&s->pb);
01964                 s->sframe_cache_size += s->spillover_nbits;
01965                 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01966                     *got_frame_ptr) {
01967                     cnt += s->spillover_nbits;
01968                     s->skip_bits_next = cnt & 7;
01969                     *(AVFrame *)data = s->frame;
01970                     return cnt >> 3;
01971                 } else
01972                     skip_bits_long (gb, s->spillover_nbits - cnt +
01973                                     get_bits_count(gb)); // resync
01974             } else
01975                 skip_bits_long(gb, s->spillover_nbits);  // resync
01976         }
01977     } else if (s->skip_bits_next)
01978         skip_bits(gb, s->skip_bits_next);
01979 
01980     /* Try parsing superframes in current packet */
01981     s->sframe_cache_size = 0;
01982     s->skip_bits_next = 0;
01983     pos = get_bits_left(gb);
01984     if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01985         return res;
01986     } else if (*got_frame_ptr) {
01987         int cnt = get_bits_count(gb);
01988         s->skip_bits_next = cnt & 7;
01989         *(AVFrame *)data = s->frame;
01990         return cnt >> 3;
01991     } else if ((s->sframe_cache_size = pos) > 0) {
01992         /* rewind bit reader to start of last (incomplete) superframe... */
01993         init_get_bits(gb, avpkt->data, size << 3);
01994         skip_bits_long(gb, (size << 3) - pos);
01995         assert(get_bits_left(gb) == pos);
01996 
01997         /* ...and cache it for spillover in next packet */
01998         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01999         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02000         // FIXME bad - just copy bytes as whole and add use the
02001         // skip_bits_next field
02002     }
02003 
02004     return size;
02005 }
02006 
02007 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02008 {
02009     WMAVoiceContext *s = ctx->priv_data;
02010 
02011     if (s->do_apf) {
02012         ff_rdft_end(&s->rdft);
02013         ff_rdft_end(&s->irdft);
02014         ff_dct_end(&s->dct);
02015         ff_dct_end(&s->dst);
02016     }
02017 
02018     return 0;
02019 }
02020 
02021 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02022 {
02023     WMAVoiceContext *s = ctx->priv_data;
02024     int n;
02025 
02026     s->postfilter_agc    = 0;
02027     s->sframe_cache_size = 0;
02028     s->skip_bits_next    = 0;
02029     for (n = 0; n < s->lsps; n++)
02030         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02031     memset(s->excitation_history, 0,
02032            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02033     memset(s->synth_history,      0,
02034            sizeof(*s->synth_history)      * MAX_LSPS);
02035     memset(s->gain_pred_err,      0,
02036            sizeof(s->gain_pred_err));
02037 
02038     if (s->do_apf) {
02039         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02040                sizeof(*s->synth_filter_out_buf) * s->lsps);
02041         memset(s->dcf_mem,              0,
02042                sizeof(*s->dcf_mem)              * 2);
02043         memset(s->zero_exc_pf,          0,
02044                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02045         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02046     }
02047 }
02048 
02049 AVCodec ff_wmavoice_decoder = {
02050     .name           = "wmavoice",
02051     .type           = AVMEDIA_TYPE_AUDIO,
02052     .id             = CODEC_ID_WMAVOICE,
02053     .priv_data_size = sizeof(WMAVoiceContext),
02054     .init           = wmavoice_decode_init,
02055     .close          = wmavoice_decode_end,
02056     .decode         = wmavoice_decode_packet,
02057     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02058     .flush     = wmavoice_flush,
02059     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02060 };