• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/vp8.c

Go to the documentation of this file.
00001 
00025 #include "libavutil/imgutils.h"
00026 #include "avcodec.h"
00027 #include "vp8.h"
00028 #include "vp8data.h"
00029 #include "rectangle.h"
00030 #include "thread.h"
00031 
00032 #if ARCH_ARM
00033 #   include "arm/vp8.h"
00034 #endif
00035 
00036 static void free_buffers(VP8Context *s)
00037 {
00038     av_freep(&s->macroblocks_base);
00039     av_freep(&s->filter_strength);
00040     av_freep(&s->intra4x4_pred_mode_top);
00041     av_freep(&s->top_nnz);
00042     av_freep(&s->edge_emu_buffer);
00043     av_freep(&s->top_border);
00044     av_freep(&s->segmentation_map);
00045 
00046     s->macroblocks = NULL;
00047 }
00048 
00049 static void vp8_decode_flush(AVCodecContext *avctx)
00050 {
00051     VP8Context *s = avctx->priv_data;
00052     int i;
00053 
00054     if (!avctx->is_copy) {
00055         for (i = 0; i < 5; i++)
00056             if (s->frames[i].data[0])
00057                 ff_thread_release_buffer(avctx, &s->frames[i]);
00058     }
00059     memset(s->framep, 0, sizeof(s->framep));
00060 
00061     free_buffers(s);
00062 }
00063 
00064 static int update_dimensions(VP8Context *s, int width, int height)
00065 {
00066     if (width  != s->avctx->width ||
00067         height != s->avctx->height) {
00068         if (av_image_check_size(width, height, 0, s->avctx))
00069             return AVERROR_INVALIDDATA;
00070 
00071         vp8_decode_flush(s->avctx);
00072 
00073         avcodec_set_dimensions(s->avctx, width, height);
00074     }
00075 
00076     s->mb_width  = (s->avctx->coded_width +15) / 16;
00077     s->mb_height = (s->avctx->coded_height+15) / 16;
00078 
00079     s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00080     s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
00081     s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
00082     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00083     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00084     s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
00085 
00086     if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
00087         !s->top_nnz || !s->top_border || !s->segmentation_map)
00088         return AVERROR(ENOMEM);
00089 
00090     s->macroblocks        = s->macroblocks_base + 1;
00091 
00092     return 0;
00093 }
00094 
00095 static void parse_segment_info(VP8Context *s)
00096 {
00097     VP56RangeCoder *c = &s->c;
00098     int i;
00099 
00100     s->segmentation.update_map = vp8_rac_get(c);
00101 
00102     if (vp8_rac_get(c)) { // update segment feature data
00103         s->segmentation.absolute_vals = vp8_rac_get(c);
00104 
00105         for (i = 0; i < 4; i++)
00106             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
00107 
00108         for (i = 0; i < 4; i++)
00109             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00110     }
00111     if (s->segmentation.update_map)
00112         for (i = 0; i < 3; i++)
00113             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00114 }
00115 
00116 static void update_lf_deltas(VP8Context *s)
00117 {
00118     VP56RangeCoder *c = &s->c;
00119     int i;
00120 
00121     for (i = 0; i < 4; i++)
00122         s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
00123 
00124     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
00125         s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
00126 }
00127 
00128 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00129 {
00130     const uint8_t *sizes = buf;
00131     int i;
00132 
00133     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00134 
00135     buf      += 3*(s->num_coeff_partitions-1);
00136     buf_size -= 3*(s->num_coeff_partitions-1);
00137     if (buf_size < 0)
00138         return -1;
00139 
00140     for (i = 0; i < s->num_coeff_partitions-1; i++) {
00141         int size = AV_RL24(sizes + 3*i);
00142         if (buf_size - size < 0)
00143             return -1;
00144 
00145         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00146         buf      += size;
00147         buf_size -= size;
00148     }
00149     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00150 
00151     return 0;
00152 }
00153 
00154 static void get_quants(VP8Context *s)
00155 {
00156     VP56RangeCoder *c = &s->c;
00157     int i, base_qi;
00158 
00159     int yac_qi     = vp8_rac_get_uint(c, 7);
00160     int ydc_delta  = vp8_rac_get_sint(c, 4);
00161     int y2dc_delta = vp8_rac_get_sint(c, 4);
00162     int y2ac_delta = vp8_rac_get_sint(c, 4);
00163     int uvdc_delta = vp8_rac_get_sint(c, 4);
00164     int uvac_delta = vp8_rac_get_sint(c, 4);
00165 
00166     for (i = 0; i < 4; i++) {
00167         if (s->segmentation.enabled) {
00168             base_qi = s->segmentation.base_quant[i];
00169             if (!s->segmentation.absolute_vals)
00170                 base_qi += yac_qi;
00171         } else
00172             base_qi = yac_qi;
00173 
00174         s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00175         s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
00176         s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00177         s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
00178         s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00179         s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00180 
00181         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00182         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00183     }
00184 }
00185 
00199 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00200 {
00201     VP56RangeCoder *c = &s->c;
00202 
00203     if (update)
00204         return VP56_FRAME_CURRENT;
00205 
00206     switch (vp8_rac_get_uint(c, 2)) {
00207     case 1:
00208         return VP56_FRAME_PREVIOUS;
00209     case 2:
00210         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00211     }
00212     return VP56_FRAME_NONE;
00213 }
00214 
00215 static void update_refs(VP8Context *s)
00216 {
00217     VP56RangeCoder *c = &s->c;
00218 
00219     int update_golden = vp8_rac_get(c);
00220     int update_altref = vp8_rac_get(c);
00221 
00222     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00223     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00224 }
00225 
00226 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00227 {
00228     VP56RangeCoder *c = &s->c;
00229     int header_size, hscale, vscale, i, j, k, l, m, ret;
00230     int width  = s->avctx->width;
00231     int height = s->avctx->height;
00232 
00233     s->keyframe  = !(buf[0] & 1);
00234     s->profile   =  (buf[0]>>1) & 7;
00235     s->invisible = !(buf[0] & 0x10);
00236     header_size  = AV_RL24(buf) >> 5;
00237     buf      += 3;
00238     buf_size -= 3;
00239 
00240     if (s->profile > 3)
00241         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00242 
00243     if (!s->profile)
00244         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00245     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
00246         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00247 
00248     if (header_size > buf_size - 7*s->keyframe) {
00249         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00250         return AVERROR_INVALIDDATA;
00251     }
00252 
00253     if (s->keyframe) {
00254         if (AV_RL24(buf) != 0x2a019d) {
00255             av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00256             return AVERROR_INVALIDDATA;
00257         }
00258         width  = AV_RL16(buf+3) & 0x3fff;
00259         height = AV_RL16(buf+5) & 0x3fff;
00260         hscale = buf[4] >> 6;
00261         vscale = buf[6] >> 6;
00262         buf      += 7;
00263         buf_size -= 7;
00264 
00265         if (hscale || vscale)
00266             av_log_missing_feature(s->avctx, "Upscaling", 1);
00267 
00268         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00269         for (i = 0; i < 4; i++)
00270             for (j = 0; j < 16; j++)
00271                 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00272                        sizeof(s->prob->token[i][j]));
00273         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00274         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00275         memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
00276         memset(&s->segmentation, 0, sizeof(s->segmentation));
00277     }
00278 
00279     if (!s->macroblocks_base || /* first frame */
00280         width != s->avctx->width || height != s->avctx->height) {
00281         if ((ret = update_dimensions(s, width, height)) < 0)
00282             return ret;
00283     }
00284 
00285     ff_vp56_init_range_decoder(c, buf, header_size);
00286     buf      += header_size;
00287     buf_size -= header_size;
00288 
00289     if (s->keyframe) {
00290         if (vp8_rac_get(c))
00291             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00292         vp8_rac_get(c); // whether we can skip clamping in dsp functions
00293     }
00294 
00295     if ((s->segmentation.enabled = vp8_rac_get(c)))
00296         parse_segment_info(s);
00297     else
00298         s->segmentation.update_map = 0; // FIXME: move this to some init function?
00299 
00300     s->filter.simple    = vp8_rac_get(c);
00301     s->filter.level     = vp8_rac_get_uint(c, 6);
00302     s->filter.sharpness = vp8_rac_get_uint(c, 3);
00303 
00304     if ((s->lf_delta.enabled = vp8_rac_get(c)))
00305         if (vp8_rac_get(c))
00306             update_lf_deltas(s);
00307 
00308     if (setup_partitions(s, buf, buf_size)) {
00309         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00310         return AVERROR_INVALIDDATA;
00311     }
00312 
00313     get_quants(s);
00314 
00315     if (!s->keyframe) {
00316         update_refs(s);
00317         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
00318         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
00319     }
00320 
00321     // if we aren't saving this frame's probabilities for future frames,
00322     // make a copy of the current probabilities
00323     if (!(s->update_probabilities = vp8_rac_get(c)))
00324         s->prob[1] = s->prob[0];
00325 
00326     s->update_last = s->keyframe || vp8_rac_get(c);
00327 
00328     for (i = 0; i < 4; i++)
00329         for (j = 0; j < 8; j++)
00330             for (k = 0; k < 3; k++)
00331                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00332                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00333                         int prob = vp8_rac_get_uint(c, 8);
00334                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00335                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00336                     }
00337 
00338     if ((s->mbskip_enabled = vp8_rac_get(c)))
00339         s->prob->mbskip = vp8_rac_get_uint(c, 8);
00340 
00341     if (!s->keyframe) {
00342         s->prob->intra  = vp8_rac_get_uint(c, 8);
00343         s->prob->last   = vp8_rac_get_uint(c, 8);
00344         s->prob->golden = vp8_rac_get_uint(c, 8);
00345 
00346         if (vp8_rac_get(c))
00347             for (i = 0; i < 4; i++)
00348                 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00349         if (vp8_rac_get(c))
00350             for (i = 0; i < 3; i++)
00351                 s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
00352 
00353         // 17.2 MV probability update
00354         for (i = 0; i < 2; i++)
00355             for (j = 0; j < 19; j++)
00356                 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00357                     s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00358     }
00359 
00360     return 0;
00361 }
00362 
00363 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00364 {
00365     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00366     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00367 }
00368 
00372 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00373 {
00374     int bit, x = 0;
00375 
00376     if (vp56_rac_get_prob_branchy(c, p[0])) {
00377         int i;
00378 
00379         for (i = 0; i < 3; i++)
00380             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00381         for (i = 9; i > 3; i--)
00382             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00383         if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00384             x += 8;
00385     } else {
00386         // small_mvtree
00387         const uint8_t *ps = p+2;
00388         bit = vp56_rac_get_prob(c, *ps);
00389         ps += 1 + 3*bit;
00390         x  += 4*bit;
00391         bit = vp56_rac_get_prob(c, *ps);
00392         ps += 1 + bit;
00393         x  += 2*bit;
00394         x  += vp56_rac_get_prob(c, *ps);
00395     }
00396 
00397     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00398 }
00399 
00400 static av_always_inline
00401 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00402 {
00403     if (left == top)
00404         return vp8_submv_prob[4-!!left];
00405     if (!top)
00406         return vp8_submv_prob[2];
00407     return vp8_submv_prob[1-!!left];
00408 }
00409 
00414 static av_always_inline
00415 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
00416 {
00417     int part_idx;
00418     int n, num;
00419     VP8Macroblock *top_mb  = &mb[2];
00420     VP8Macroblock *left_mb = &mb[-1];
00421     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00422                   *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
00423                   *mbsplits_cur, *firstidx;
00424     VP56mv *top_mv  = top_mb->bmv;
00425     VP56mv *left_mv = left_mb->bmv;
00426     VP56mv *cur_mv  = mb->bmv;
00427 
00428     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00429         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00430             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00431         } else {
00432             part_idx = VP8_SPLITMVMODE_8x8;
00433         }
00434     } else {
00435         part_idx = VP8_SPLITMVMODE_4x4;
00436     }
00437 
00438     num = vp8_mbsplit_count[part_idx];
00439     mbsplits_cur = vp8_mbsplits[part_idx],
00440     firstidx = vp8_mbfirstidx[part_idx];
00441     mb->partitioning = part_idx;
00442 
00443     for (n = 0; n < num; n++) {
00444         int k = firstidx[n];
00445         uint32_t left, above;
00446         const uint8_t *submv_prob;
00447 
00448         if (!(k & 3))
00449             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00450         else
00451             left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00452         if (k <= 3)
00453             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00454         else
00455             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00456 
00457         submv_prob = get_submv_prob(left, above);
00458 
00459         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00460             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00461                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00462                     mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00463                     mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00464                 } else {
00465                     AV_ZERO32(&mb->bmv[n]);
00466                 }
00467             } else {
00468                 AV_WN32A(&mb->bmv[n], above);
00469             }
00470         } else {
00471             AV_WN32A(&mb->bmv[n], left);
00472         }
00473     }
00474 
00475     return num;
00476 }
00477 
00478 static av_always_inline
00479 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
00480 {
00481     VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
00482                                   mb - 1 /* left */,
00483                                   mb + 1 /* top-left */ };
00484     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00485     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00486     int idx = CNT_ZERO;
00487     int cur_sign_bias = s->sign_bias[mb->ref_frame];
00488     int8_t *sign_bias = s->sign_bias;
00489     VP56mv near_mv[4];
00490     uint8_t cnt[4] = { 0 };
00491     VP56RangeCoder *c = &s->c;
00492 
00493     AV_ZERO32(&near_mv[0]);
00494     AV_ZERO32(&near_mv[1]);
00495     AV_ZERO32(&near_mv[2]);
00496 
00497     /* Process MB on top, left and top-left */
00498     #define MV_EDGE_CHECK(n)\
00499     {\
00500         VP8Macroblock *edge = mb_edge[n];\
00501         int edge_ref = edge->ref_frame;\
00502         if (edge_ref != VP56_FRAME_CURRENT) {\
00503             uint32_t mv = AV_RN32A(&edge->mv);\
00504             if (mv) {\
00505                 if (cur_sign_bias != sign_bias[edge_ref]) {\
00506                     /* SWAR negate of the values in mv. */\
00507                     mv = ~mv;\
00508                     mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00509                 }\
00510                 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00511                     AV_WN32A(&near_mv[++idx], mv);\
00512                 cnt[idx]      += 1 + (n != 2);\
00513             } else\
00514                 cnt[CNT_ZERO] += 1 + (n != 2);\
00515         }\
00516     }
00517 
00518     MV_EDGE_CHECK(0)
00519     MV_EDGE_CHECK(1)
00520     MV_EDGE_CHECK(2)
00521 
00522     mb->partitioning = VP8_SPLITMVMODE_NONE;
00523     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00524         mb->mode = VP8_MVMODE_MV;
00525 
00526         /* If we have three distinct MVs, merge first and last if they're the same */
00527         if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00528             cnt[CNT_NEAREST] += 1;
00529 
00530         /* Swap near and nearest if necessary */
00531         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00532             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
00533             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00534         }
00535 
00536         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00537             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00538 
00539                 /* Choose the best mv out of 0,0 and the nearest mv */
00540                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00541                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
00542                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
00543                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00544 
00545                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00546                     mb->mode = VP8_MVMODE_SPLIT;
00547                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
00548                 } else {
00549                     mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00550                     mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00551                     mb->bmv[0] = mb->mv;
00552                 }
00553             } else {
00554                 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00555                 mb->bmv[0] = mb->mv;
00556             }
00557         } else {
00558             clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00559             mb->bmv[0] = mb->mv;
00560         }
00561     } else {
00562         mb->mode = VP8_MVMODE_ZERO;
00563         AV_ZERO32(&mb->mv);
00564         mb->bmv[0] = mb->mv;
00565     }
00566 }
00567 
00568 static av_always_inline
00569 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
00570                            int mb_x, int keyframe)
00571 {
00572     uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00573     if (keyframe) {
00574         int x, y;
00575         uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
00576         uint8_t* const left = s->intra4x4_pred_mode_left;
00577         for (y = 0; y < 4; y++) {
00578             for (x = 0; x < 4; x++) {
00579                 const uint8_t *ctx;
00580                 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00581                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00582                 left[y] = top[x] = *intra4x4;
00583                 intra4x4++;
00584             }
00585         }
00586     } else {
00587         int i;
00588         for (i = 0; i < 16; i++)
00589             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00590     }
00591 }
00592 
00593 static av_always_inline
00594 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
00595 {
00596     VP56RangeCoder *c = &s->c;
00597 
00598     if (s->segmentation.update_map)
00599         *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
00600     else
00601         *segment = ref ? *ref : *segment;
00602     s->segment = *segment;
00603 
00604     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00605 
00606     if (s->keyframe) {
00607         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00608 
00609         if (mb->mode == MODE_I4x4) {
00610             decode_intra4x4_modes(s, c, mb_x, 1);
00611         } else {
00612             const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00613             AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00614             AV_WN32A(s->intra4x4_pred_mode_left, modes);
00615         }
00616 
00617         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00618         mb->ref_frame = VP56_FRAME_CURRENT;
00619     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00620         // inter MB, 16.2
00621         if (vp56_rac_get_prob_branchy(c, s->prob->last))
00622             mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00623                 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
00624         else
00625             mb->ref_frame = VP56_FRAME_PREVIOUS;
00626         s->ref_count[mb->ref_frame-1]++;
00627 
00628         // motion vectors, 16.3
00629         decode_mvs(s, mb, mb_x, mb_y);
00630     } else {
00631         // intra MB, 16.1
00632         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00633 
00634         if (mb->mode == MODE_I4x4)
00635             decode_intra4x4_modes(s, c, mb_x, 0);
00636 
00637         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00638         mb->ref_frame = VP56_FRAME_CURRENT;
00639         mb->partitioning = VP8_SPLITMVMODE_NONE;
00640         AV_ZERO32(&mb->bmv[0]);
00641     }
00642 }
00643 
00644 #ifndef decode_block_coeffs_internal
00645 
00654 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
00655                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00656                                         int i, uint8_t *token_prob, int16_t qmul[2])
00657 {
00658     goto skip_eob;
00659     do {
00660         int coeff;
00661         if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00662             return i;
00663 
00664 skip_eob:
00665         if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
00666             if (++i == 16)
00667                 return i; // invalid input; blocks should end with EOB
00668             token_prob = probs[i][0];
00669             goto skip_eob;
00670         }
00671 
00672         if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
00673             coeff = 1;
00674             token_prob = probs[i+1][1];
00675         } else {
00676             if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
00677                 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
00678                 if (coeff)
00679                     coeff += vp56_rac_get_prob(c, token_prob[5]);
00680                 coeff += 2;
00681             } else {
00682                 // DCT_CAT*
00683                 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
00684                     if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
00685                         coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
00686                     } else {                                    // DCT_CAT2
00687                         coeff  = 7;
00688                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
00689                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
00690                     }
00691                 } else {    // DCT_CAT3 and up
00692                     int a = vp56_rac_get_prob(c, token_prob[8]);
00693                     int b = vp56_rac_get_prob(c, token_prob[9+a]);
00694                     int cat = (a<<1) + b;
00695                     coeff  = 3 + (8<<cat);
00696                     coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
00697                 }
00698             }
00699             token_prob = probs[i+1][2];
00700         }
00701         block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
00702     } while (++i < 16);
00703 
00704     return i;
00705 }
00706 #endif
00707 
00719 static av_always_inline
00720 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00721                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00722                         int i, int zero_nhood, int16_t qmul[2])
00723 {
00724     uint8_t *token_prob = probs[i][zero_nhood];
00725     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00726         return 0;
00727     return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00728 }
00729 
00730 static av_always_inline
00731 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00732                       uint8_t t_nnz[9], uint8_t l_nnz[9])
00733 {
00734     int i, x, y, luma_start = 0, luma_ctx = 3;
00735     int nnz_pred, nnz, nnz_total = 0;
00736     int segment = s->segment;
00737     int block_dc = 0;
00738 
00739     if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00740         nnz_pred = t_nnz[8] + l_nnz[8];
00741 
00742         // decode DC values and do hadamard
00743         nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
00744                                   s->qmat[segment].luma_dc_qmul);
00745         l_nnz[8] = t_nnz[8] = !!nnz;
00746         if (nnz) {
00747             nnz_total += nnz;
00748             block_dc = 1;
00749             if (nnz == 1)
00750                 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
00751             else
00752                 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
00753         }
00754         luma_start = 1;
00755         luma_ctx = 0;
00756     }
00757 
00758     // luma blocks
00759     for (y = 0; y < 4; y++)
00760         for (x = 0; x < 4; x++) {
00761             nnz_pred = l_nnz[y] + t_nnz[x];
00762             nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
00763                                       nnz_pred, s->qmat[segment].luma_qmul);
00764             // nnz+block_dc may be one more than the actual last index, but we don't care
00765             s->non_zero_count_cache[y][x] = nnz + block_dc;
00766             t_nnz[x] = l_nnz[y] = !!nnz;
00767             nnz_total += nnz;
00768         }
00769 
00770     // chroma blocks
00771     // TODO: what to do about dimensions? 2nd dim for luma is x,
00772     // but for chroma it's (y<<1)|x
00773     for (i = 4; i < 6; i++)
00774         for (y = 0; y < 2; y++)
00775             for (x = 0; x < 2; x++) {
00776                 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00777                 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
00778                                           nnz_pred, s->qmat[segment].chroma_qmul);
00779                 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
00780                 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00781                 nnz_total += nnz;
00782             }
00783 
00784     // if there were no coded coeffs despite the macroblock not being marked skip,
00785     // we MUST not do the inner loop filter and should not do IDCT
00786     // Since skip isn't used for bitstream prediction, just manually set it.
00787     if (!nnz_total)
00788         mb->skip = 1;
00789 }
00790 
00791 static av_always_inline
00792 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00793                       int linesize, int uvlinesize, int simple)
00794 {
00795     AV_COPY128(top_border, src_y + 15*linesize);
00796     if (!simple) {
00797         AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00798         AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00799     }
00800 }
00801 
00802 static av_always_inline
00803 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00804                     int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00805                     int simple, int xchg)
00806 {
00807     uint8_t *top_border_m1 = top_border-32;     // for TL prediction
00808     src_y  -=   linesize;
00809     src_cb -= uvlinesize;
00810     src_cr -= uvlinesize;
00811 
00812 #define XCHG(a,b,xchg) do {                     \
00813         if (xchg) AV_SWAP64(b,a);               \
00814         else      AV_COPY64(b,a);               \
00815     } while (0)
00816 
00817     XCHG(top_border_m1+8, src_y-8, xchg);
00818     XCHG(top_border,      src_y,   xchg);
00819     XCHG(top_border+8,    src_y+8, 1);
00820     if (mb_x < mb_width-1)
00821         XCHG(top_border+32, src_y+16, 1);
00822 
00823     // only copy chroma for normal loop filter
00824     // or to initialize the top row to 127
00825     if (!simple || !mb_y) {
00826         XCHG(top_border_m1+16, src_cb-8, xchg);
00827         XCHG(top_border_m1+24, src_cr-8, xchg);
00828         XCHG(top_border+16,    src_cb, 1);
00829         XCHG(top_border+24,    src_cr, 1);
00830     }
00831 }
00832 
00833 static av_always_inline
00834 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00835 {
00836     if (!mb_x) {
00837         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00838     } else {
00839         return mb_y ? mode : LEFT_DC_PRED8x8;
00840     }
00841 }
00842 
00843 static av_always_inline
00844 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00845 {
00846     if (!mb_x) {
00847         return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00848     } else {
00849         return mb_y ? mode : HOR_PRED8x8;
00850     }
00851 }
00852 
00853 static av_always_inline
00854 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00855 {
00856     if (mode == DC_PRED8x8) {
00857         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00858     } else {
00859         return mode;
00860     }
00861 }
00862 
00863 static av_always_inline
00864 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00865 {
00866     switch (mode) {
00867     case DC_PRED8x8:
00868         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00869     case VERT_PRED8x8:
00870         return !mb_y ? DC_127_PRED8x8 : mode;
00871     case HOR_PRED8x8:
00872         return !mb_x ? DC_129_PRED8x8 : mode;
00873     case PLANE_PRED8x8 /*TM*/:
00874         return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00875     }
00876     return mode;
00877 }
00878 
00879 static av_always_inline
00880 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00881 {
00882     if (!mb_x) {
00883         return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00884     } else {
00885         return mb_y ? mode : HOR_VP8_PRED;
00886     }
00887 }
00888 
00889 static av_always_inline
00890 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
00891 {
00892     switch (mode) {
00893     case VERT_PRED:
00894         if (!mb_x && mb_y) {
00895             *copy_buf = 1;
00896             return mode;
00897         }
00898         /* fall-through */
00899     case DIAG_DOWN_LEFT_PRED:
00900     case VERT_LEFT_PRED:
00901         return !mb_y ? DC_127_PRED : mode;
00902     case HOR_PRED:
00903         if (!mb_y) {
00904             *copy_buf = 1;
00905             return mode;
00906         }
00907         /* fall-through */
00908     case HOR_UP_PRED:
00909         return !mb_x ? DC_129_PRED : mode;
00910     case TM_VP8_PRED:
00911         return check_tm_pred4x4_mode(mode, mb_x, mb_y);
00912     case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
00913     case DIAG_DOWN_RIGHT_PRED:
00914     case VERT_RIGHT_PRED:
00915     case HOR_DOWN_PRED:
00916         if (!mb_y || !mb_x)
00917             *copy_buf = 1;
00918         return mode;
00919     }
00920     return mode;
00921 }
00922 
00923 static av_always_inline
00924 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
00925                    int mb_x, int mb_y)
00926 {
00927     AVCodecContext *avctx = s->avctx;
00928     int x, y, mode, nnz;
00929     uint32_t tr;
00930 
00931     // for the first row, we need to run xchg_mb_border to init the top edge to 127
00932     // otherwise, skip it if we aren't going to deblock
00933     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
00934         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
00935                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
00936                        s->filter.simple, 1);
00937 
00938     if (mb->mode < MODE_I4x4) {
00939         if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
00940             mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
00941         } else {
00942             mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
00943         }
00944         s->hpc.pred16x16[mode](dst[0], s->linesize);
00945     } else {
00946         uint8_t *ptr = dst[0];
00947         uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00948         uint8_t tr_top[4] = { 127, 127, 127, 127 };
00949 
00950         // all blocks on the right edge of the macroblock use bottom edge
00951         // the top macroblock for their topright edge
00952         uint8_t *tr_right = ptr - s->linesize + 16;
00953 
00954         // if we're on the right edge of the frame, said edge is extended
00955         // from the top macroblock
00956         if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
00957             mb_x == s->mb_width-1) {
00958             tr = tr_right[-1]*0x01010101u;
00959             tr_right = (uint8_t *)&tr;
00960         }
00961 
00962         if (mb->skip)
00963             AV_ZERO128(s->non_zero_count_cache);
00964 
00965         for (y = 0; y < 4; y++) {
00966             uint8_t *topright = ptr + 4 - s->linesize;
00967             for (x = 0; x < 4; x++) {
00968                 int copy = 0, linesize = s->linesize;
00969                 uint8_t *dst = ptr+4*x;
00970                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
00971 
00972                 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
00973                     topright = tr_top;
00974                 } else if (x == 3)
00975                     topright = tr_right;
00976 
00977                 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
00978                     mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
00979                     if (copy) {
00980                         dst = copy_dst + 12;
00981                         linesize = 8;
00982                         if (!(mb_y + y)) {
00983                             copy_dst[3] = 127U;
00984                             AV_WN32A(copy_dst+4, 127U * 0x01010101U);
00985                         } else {
00986                             AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
00987                             if (!(mb_x + x)) {
00988                                 copy_dst[3] = 129U;
00989                             } else {
00990                                 copy_dst[3] = ptr[4*x-s->linesize-1];
00991                             }
00992                         }
00993                         if (!(mb_x + x)) {
00994                             copy_dst[11] =
00995                             copy_dst[19] =
00996                             copy_dst[27] =
00997                             copy_dst[35] = 129U;
00998                         } else {
00999                             copy_dst[11] = ptr[4*x              -1];
01000                             copy_dst[19] = ptr[4*x+s->linesize  -1];
01001                             copy_dst[27] = ptr[4*x+s->linesize*2-1];
01002                             copy_dst[35] = ptr[4*x+s->linesize*3-1];
01003                         }
01004                     }
01005                 } else {
01006                     mode = intra4x4[x];
01007                 }
01008                 s->hpc.pred4x4[mode](dst, topright, linesize);
01009                 if (copy) {
01010                     AV_COPY32(ptr+4*x              , copy_dst+12);
01011                     AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
01012                     AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01013                     AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01014                 }
01015 
01016                 nnz = s->non_zero_count_cache[y][x];
01017                 if (nnz) {
01018                     if (nnz == 1)
01019                         s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
01020                     else
01021                         s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
01022                 }
01023                 topright += 4;
01024             }
01025 
01026             ptr   += 4*s->linesize;
01027             intra4x4 += 4;
01028         }
01029     }
01030 
01031     if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01032         mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
01033     } else {
01034         mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
01035     }
01036     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01037     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01038 
01039     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
01040         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01041                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01042                        s->filter.simple, 0);
01043 }
01044 
01045 static const uint8_t subpel_idx[3][8] = {
01046     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
01047                                 // also function pointer index
01048     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
01049     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
01050 };
01051 
01068 static av_always_inline
01069 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
01070                  int x_off, int y_off, int block_w, int block_h,
01071                  int width, int height, int linesize,
01072                  vp8_mc_func mc_func[3][3])
01073 {
01074     uint8_t *src = ref->data[0];
01075 
01076     if (AV_RN32A(mv)) {
01077 
01078         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01079         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01080 
01081         x_off += mv->x >> 2;
01082         y_off += mv->y >> 2;
01083 
01084         // edge emulation
01085         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01086         src += y_off * linesize + x_off;
01087         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01088             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01089             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01090                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01091                                     x_off - mx_idx, y_off - my_idx, width, height);
01092             src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01093         }
01094         mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01095     } else {
01096         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01097         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01098     }
01099 }
01100 
01118 static av_always_inline
01119 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
01120                    const VP56mv *mv, int x_off, int y_off,
01121                    int block_w, int block_h, int width, int height, int linesize,
01122                    vp8_mc_func mc_func[3][3])
01123 {
01124     uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01125 
01126     if (AV_RN32A(mv)) {
01127         int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01128         int my = mv->y&7, my_idx = subpel_idx[0][my];
01129 
01130         x_off += mv->x >> 3;
01131         y_off += mv->y >> 3;
01132 
01133         // edge emulation
01134         src1 += y_off * linesize + x_off;
01135         src2 += y_off * linesize + x_off;
01136         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01137         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01138             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01139             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01140                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01141                                     x_off - mx_idx, y_off - my_idx, width, height);
01142             src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01143             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01144 
01145             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01146                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01147                                     x_off - mx_idx, y_off - my_idx, width, height);
01148             src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01149             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01150         } else {
01151             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01152             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01153         }
01154     } else {
01155         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01156         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01157         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01158     }
01159 }
01160 
01161 static av_always_inline
01162 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
01163                  AVFrame *ref_frame, int x_off, int y_off,
01164                  int bx_off, int by_off,
01165                  int block_w, int block_h,
01166                  int width, int height, VP56mv *mv)
01167 {
01168     VP56mv uvmv = *mv;
01169 
01170     /* Y */
01171     vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
01172                 ref_frame, mv, x_off + bx_off, y_off + by_off,
01173                 block_w, block_h, width, height, s->linesize,
01174                 s->put_pixels_tab[block_w == 8]);
01175 
01176     /* U/V */
01177     if (s->profile == 3) {
01178         uvmv.x &= ~7;
01179         uvmv.y &= ~7;
01180     }
01181     x_off   >>= 1; y_off   >>= 1;
01182     bx_off  >>= 1; by_off  >>= 1;
01183     width   >>= 1; height  >>= 1;
01184     block_w >>= 1; block_h >>= 1;
01185     vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
01186                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01187                   &uvmv, x_off + bx_off, y_off + by_off,
01188                   block_w, block_h, width, height, s->uvlinesize,
01189                   s->put_pixels_tab[1 + (block_w == 4)]);
01190 }
01191 
01192 /* Fetch pixels for estimated mv 4 macroblocks ahead.
01193  * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
01194 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01195 {
01196     /* Don't prefetch refs that haven't been used very often this frame. */
01197     if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01198         int x_off = mb_x << 4, y_off = mb_y << 4;
01199         int mx = (mb->mv.x>>2) + x_off + 8;
01200         int my = (mb->mv.y>>2) + y_off;
01201         uint8_t **src= s->framep[ref]->data;
01202         int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01203         /* For threading, a ff_thread_await_progress here might be useful, but
01204          * it actually slows down the decoder. Since a bad prefetch doesn't
01205          * generate bad decoder output, we don't run it here. */
01206         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01207         off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01208         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01209     }
01210 }
01211 
01215 static av_always_inline
01216 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
01217                    int mb_x, int mb_y)
01218 {
01219     int x_off = mb_x << 4, y_off = mb_y << 4;
01220     int width = 16*s->mb_width, height = 16*s->mb_height;
01221     AVFrame *ref = s->framep[mb->ref_frame];
01222     VP56mv *bmv = mb->bmv;
01223 
01224     switch (mb->partitioning) {
01225     case VP8_SPLITMVMODE_NONE:
01226         vp8_mc_part(s, dst, ref, x_off, y_off,
01227                     0, 0, 16, 16, width, height, &mb->mv);
01228         break;
01229     case VP8_SPLITMVMODE_4x4: {
01230         int x, y;
01231         VP56mv uvmv;
01232 
01233         /* Y */
01234         for (y = 0; y < 4; y++) {
01235             for (x = 0; x < 4; x++) {
01236                 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
01237                             ref, &bmv[4*y + x],
01238                             4*x + x_off, 4*y + y_off, 4, 4,
01239                             width, height, s->linesize,
01240                             s->put_pixels_tab[2]);
01241             }
01242         }
01243 
01244         /* U/V */
01245         x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01246         for (y = 0; y < 2; y++) {
01247             for (x = 0; x < 2; x++) {
01248                 uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
01249                          mb->bmv[ 2*y    * 4 + 2*x+1].x +
01250                          mb->bmv[(2*y+1) * 4 + 2*x  ].x +
01251                          mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01252                 uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
01253                          mb->bmv[ 2*y    * 4 + 2*x+1].y +
01254                          mb->bmv[(2*y+1) * 4 + 2*x  ].y +
01255                          mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01256                 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01257                 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01258                 if (s->profile == 3) {
01259                     uvmv.x &= ~7;
01260                     uvmv.y &= ~7;
01261                 }
01262                 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
01263                               dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01264                               4*x + x_off, 4*y + y_off, 4, 4,
01265                               width, height, s->uvlinesize,
01266                               s->put_pixels_tab[2]);
01267             }
01268         }
01269         break;
01270     }
01271     case VP8_SPLITMVMODE_16x8:
01272         vp8_mc_part(s, dst, ref, x_off, y_off,
01273                     0, 0, 16, 8, width, height, &bmv[0]);
01274         vp8_mc_part(s, dst, ref, x_off, y_off,
01275                     0, 8, 16, 8, width, height, &bmv[1]);
01276         break;
01277     case VP8_SPLITMVMODE_8x16:
01278         vp8_mc_part(s, dst, ref, x_off, y_off,
01279                     0, 0, 8, 16, width, height, &bmv[0]);
01280         vp8_mc_part(s, dst, ref, x_off, y_off,
01281                     8, 0, 8, 16, width, height, &bmv[1]);
01282         break;
01283     case VP8_SPLITMVMODE_8x8:
01284         vp8_mc_part(s, dst, ref, x_off, y_off,
01285                     0, 0, 8, 8, width, height, &bmv[0]);
01286         vp8_mc_part(s, dst, ref, x_off, y_off,
01287                     8, 0, 8, 8, width, height, &bmv[1]);
01288         vp8_mc_part(s, dst, ref, x_off, y_off,
01289                     0, 8, 8, 8, width, height, &bmv[2]);
01290         vp8_mc_part(s, dst, ref, x_off, y_off,
01291                     8, 8, 8, 8, width, height, &bmv[3]);
01292         break;
01293     }
01294 }
01295 
01296 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
01297 {
01298     int x, y, ch;
01299 
01300     if (mb->mode != MODE_I4x4) {
01301         uint8_t *y_dst = dst[0];
01302         for (y = 0; y < 4; y++) {
01303             uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
01304             if (nnz4) {
01305                 if (nnz4&~0x01010101) {
01306                     for (x = 0; x < 4; x++) {
01307                         if ((uint8_t)nnz4 == 1)
01308                             s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
01309                         else if((uint8_t)nnz4 > 1)
01310                             s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
01311                         nnz4 >>= 8;
01312                         if (!nnz4)
01313                             break;
01314                     }
01315                 } else {
01316                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
01317                 }
01318             }
01319             y_dst += 4*s->linesize;
01320         }
01321     }
01322 
01323     for (ch = 0; ch < 2; ch++) {
01324         uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
01325         if (nnz4) {
01326             uint8_t *ch_dst = dst[1+ch];
01327             if (nnz4&~0x01010101) {
01328                 for (y = 0; y < 2; y++) {
01329                     for (x = 0; x < 2; x++) {
01330                         if ((uint8_t)nnz4 == 1)
01331                             s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01332                         else if((uint8_t)nnz4 > 1)
01333                             s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01334                         nnz4 >>= 8;
01335                         if (!nnz4)
01336                             goto chroma_idct_end;
01337                     }
01338                     ch_dst += 4*s->uvlinesize;
01339                 }
01340             } else {
01341                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
01342             }
01343         }
01344 chroma_idct_end: ;
01345     }
01346 }
01347 
01348 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01349 {
01350     int interior_limit, filter_level;
01351 
01352     if (s->segmentation.enabled) {
01353         filter_level = s->segmentation.filter_level[s->segment];
01354         if (!s->segmentation.absolute_vals)
01355             filter_level += s->filter.level;
01356     } else
01357         filter_level = s->filter.level;
01358 
01359     if (s->lf_delta.enabled) {
01360         filter_level += s->lf_delta.ref[mb->ref_frame];
01361         filter_level += s->lf_delta.mode[mb->mode];
01362     }
01363 
01364     filter_level = av_clip_uintp2(filter_level, 6);
01365 
01366     interior_limit = filter_level;
01367     if (s->filter.sharpness) {
01368         interior_limit >>= (s->filter.sharpness + 3) >> 2;
01369         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01370     }
01371     interior_limit = FFMAX(interior_limit, 1);
01372 
01373     f->filter_level = filter_level;
01374     f->inner_limit = interior_limit;
01375     f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01376 }
01377 
01378 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01379 {
01380     int mbedge_lim, bedge_lim, hev_thresh;
01381     int filter_level = f->filter_level;
01382     int inner_limit = f->inner_limit;
01383     int inner_filter = f->inner_filter;
01384     int linesize = s->linesize;
01385     int uvlinesize = s->uvlinesize;
01386     static const uint8_t hev_thresh_lut[2][64] = {
01387         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01388           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01389           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01390           3, 3, 3, 3 },
01391         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01392           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01393           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01394           2, 2, 2, 2 }
01395     };
01396 
01397     if (!filter_level)
01398         return;
01399 
01400      bedge_lim = 2*filter_level + inner_limit;
01401     mbedge_lim = bedge_lim + 4;
01402 
01403     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01404 
01405     if (mb_x) {
01406         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
01407                                        mbedge_lim, inner_limit, hev_thresh);
01408         s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01409                                        mbedge_lim, inner_limit, hev_thresh);
01410     }
01411 
01412     if (inner_filter) {
01413         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01414                                              inner_limit, hev_thresh);
01415         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01416                                              inner_limit, hev_thresh);
01417         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01418                                              inner_limit, hev_thresh);
01419         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01420                                              uvlinesize,  bedge_lim,
01421                                              inner_limit, hev_thresh);
01422     }
01423 
01424     if (mb_y) {
01425         s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
01426                                        mbedge_lim, inner_limit, hev_thresh);
01427         s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01428                                        mbedge_lim, inner_limit, hev_thresh);
01429     }
01430 
01431     if (inner_filter) {
01432         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01433                                              linesize,    bedge_lim,
01434                                              inner_limit, hev_thresh);
01435         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01436                                              linesize,    bedge_lim,
01437                                              inner_limit, hev_thresh);
01438         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01439                                              linesize,    bedge_lim,
01440                                              inner_limit, hev_thresh);
01441         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01442                                              dst[2] + 4 * uvlinesize,
01443                                              uvlinesize,  bedge_lim,
01444                                              inner_limit, hev_thresh);
01445     }
01446 }
01447 
01448 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01449 {
01450     int mbedge_lim, bedge_lim;
01451     int filter_level = f->filter_level;
01452     int inner_limit = f->inner_limit;
01453     int inner_filter = f->inner_filter;
01454     int linesize = s->linesize;
01455 
01456     if (!filter_level)
01457         return;
01458 
01459      bedge_lim = 2*filter_level + inner_limit;
01460     mbedge_lim = bedge_lim + 4;
01461 
01462     if (mb_x)
01463         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01464     if (inner_filter) {
01465         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01466         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01467         s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01468     }
01469 
01470     if (mb_y)
01471         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01472     if (inner_filter) {
01473         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01474         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01475         s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01476     }
01477 }
01478 
01479 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
01480 {
01481     VP8FilterStrength *f = s->filter_strength;
01482     uint8_t *dst[3] = {
01483         curframe->data[0] + 16*mb_y*s->linesize,
01484         curframe->data[1] +  8*mb_y*s->uvlinesize,
01485         curframe->data[2] +  8*mb_y*s->uvlinesize
01486     };
01487     int mb_x;
01488 
01489     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01490         backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01491         filter_mb(s, dst, f++, mb_x, mb_y);
01492         dst[0] += 16;
01493         dst[1] += 8;
01494         dst[2] += 8;
01495     }
01496 }
01497 
01498 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
01499 {
01500     VP8FilterStrength *f = s->filter_strength;
01501     uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
01502     int mb_x;
01503 
01504     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01505         backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
01506         filter_mb_simple(s, dst, f++, mb_x, mb_y);
01507         dst += 16;
01508     }
01509 }
01510 
01511 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
01512                             AVPacket *avpkt)
01513 {
01514     VP8Context *s = avctx->priv_data;
01515     int ret, mb_x, mb_y, i, y, referenced;
01516     enum AVDiscard skip_thresh;
01517     AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
01518 
01519     if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01520         return ret;
01521 
01522     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01523                                 || s->update_altref == VP56_FRAME_CURRENT;
01524 
01525     skip_thresh = !referenced ? AVDISCARD_NONREF :
01526                     !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01527 
01528     if (avctx->skip_frame >= skip_thresh) {
01529         s->invisible = 1;
01530         goto skip_decode;
01531     }
01532     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01533 
01534     // release no longer referenced frames
01535     for (i = 0; i < 5; i++)
01536         if (s->frames[i].data[0] &&
01537             &s->frames[i] != prev_frame &&
01538             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01539             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01540             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01541             ff_thread_release_buffer(avctx, &s->frames[i]);
01542 
01543     // find a free buffer
01544     for (i = 0; i < 5; i++)
01545         if (&s->frames[i] != prev_frame &&
01546             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01547             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01548             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01549             curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01550             break;
01551         }
01552     if (i == 5) {
01553         av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01554         abort();
01555     }
01556     if (curframe->data[0])
01557         ff_thread_release_buffer(avctx, curframe);
01558 
01559     curframe->key_frame = s->keyframe;
01560     curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01561     curframe->reference = referenced ? 3 : 0;
01562     curframe->ref_index[0] = s->segmentation_map;
01563     if ((ret = ff_thread_get_buffer(avctx, curframe))) {
01564         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01565         return ret;
01566     }
01567 
01568     // check if golden and altref are swapped
01569     if (s->update_altref != VP56_FRAME_NONE) {
01570         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
01571     } else {
01572         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
01573     }
01574     if (s->update_golden != VP56_FRAME_NONE) {
01575         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
01576     } else {
01577         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
01578     }
01579     if (s->update_last) {
01580         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01581     } else {
01582         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01583     }
01584     s->next_framep[VP56_FRAME_CURRENT]      = curframe;
01585 
01586     ff_thread_finish_setup(avctx);
01587 
01588     // Given that arithmetic probabilities are updated every frame, it's quite likely
01589     // that the values we have on a random interframe are complete junk if we didn't
01590     // start decode on a keyframe. So just don't display anything rather than junk.
01591     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01592                          !s->framep[VP56_FRAME_GOLDEN] ||
01593                          !s->framep[VP56_FRAME_GOLDEN2])) {
01594         av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01595         return AVERROR_INVALIDDATA;
01596     }
01597 
01598     s->linesize   = curframe->linesize[0];
01599     s->uvlinesize = curframe->linesize[1];
01600 
01601     if (!s->edge_emu_buffer)
01602         s->edge_emu_buffer = av_malloc(21*s->linesize);
01603 
01604     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01605 
01606     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
01607     memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01608 
01609     // top edge of 127 for intra prediction
01610     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01611         s->top_border[0][15] = s->top_border[0][23] = 127;
01612         memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
01613     }
01614     memset(s->ref_count, 0, sizeof(s->ref_count));
01615     if (s->keyframe)
01616         memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01617 
01618 #define MARGIN (16 << 2)
01619     s->mv_min.y = -MARGIN;
01620     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01621 
01622     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01623         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01624         VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01625         int mb_xy = mb_y*s->mb_width;
01626         uint8_t *dst[3] = {
01627             curframe->data[0] + 16*mb_y*s->linesize,
01628             curframe->data[1] +  8*mb_y*s->uvlinesize,
01629             curframe->data[2] +  8*mb_y*s->uvlinesize
01630         };
01631 
01632         memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
01633         memset(s->left_nnz, 0, sizeof(s->left_nnz));
01634         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01635 
01636         // left edge of 129 for intra prediction
01637         if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01638             for (i = 0; i < 3; i++)
01639                 for (y = 0; y < 16>>!!i; y++)
01640                     dst[i][y*curframe->linesize[i]-1] = 129;
01641             if (mb_y == 1) // top left edge is also 129
01642                 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01643         }
01644 
01645         s->mv_min.x = -MARGIN;
01646         s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
01647         if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01648             ff_thread_await_progress(prev_frame, mb_y, 0);
01649 
01650         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01651             /* Prefetch the current frame, 4 MBs ahead */
01652             s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01653             s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01654 
01655             decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
01656                            prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
01657 
01658             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01659 
01660             if (!mb->skip)
01661                 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
01662 
01663             if (mb->mode <= MODE_I4x4)
01664                 intra_predict(s, dst, mb, mb_x, mb_y);
01665             else
01666                 inter_predict(s, dst, mb, mb_x, mb_y);
01667 
01668             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01669 
01670             if (!mb->skip) {
01671                 idct_mb(s, dst, mb);
01672             } else {
01673                 AV_ZERO64(s->left_nnz);
01674                 AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
01675 
01676                 // Reset DC block predictors if they would exist if the mb had coefficients
01677                 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01678                     s->left_nnz[8]      = 0;
01679                     s->top_nnz[mb_x][8] = 0;
01680                 }
01681             }
01682 
01683             if (s->deblock_filter)
01684                 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
01685 
01686             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01687 
01688             dst[0] += 16;
01689             dst[1] += 8;
01690             dst[2] += 8;
01691             s->mv_min.x -= 64;
01692             s->mv_max.x -= 64;
01693         }
01694         if (s->deblock_filter) {
01695             if (s->filter.simple)
01696                 filter_mb_row_simple(s, curframe, mb_y);
01697             else
01698                 filter_mb_row(s, curframe, mb_y);
01699         }
01700         s->mv_min.y -= 64;
01701         s->mv_max.y -= 64;
01702 
01703         ff_thread_report_progress(curframe, mb_y, 0);
01704     }
01705 
01706     ff_thread_report_progress(curframe, INT_MAX, 0);
01707 skip_decode:
01708     // if future frames don't use the updated probabilities,
01709     // reset them to the values we saved
01710     if (!s->update_probabilities)
01711         s->prob[0] = s->prob[1];
01712 
01713     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01714 
01715     if (!s->invisible) {
01716         *(AVFrame*)data = *curframe;
01717         *data_size = sizeof(AVFrame);
01718     }
01719 
01720     return avpkt->size;
01721 }
01722 
01723 static av_cold int vp8_decode_init(AVCodecContext *avctx)
01724 {
01725     VP8Context *s = avctx->priv_data;
01726 
01727     s->avctx = avctx;
01728     avctx->pix_fmt = PIX_FMT_YUV420P;
01729 
01730     dsputil_init(&s->dsp, avctx);
01731     ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
01732     ff_vp8dsp_init(&s->vp8dsp);
01733 
01734     return 0;
01735 }
01736 
01737 static av_cold int vp8_decode_free(AVCodecContext *avctx)
01738 {
01739     vp8_decode_flush(avctx);
01740     return 0;
01741 }
01742 
01743 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
01744 {
01745     VP8Context *s = avctx->priv_data;
01746 
01747     s->avctx = avctx;
01748 
01749     return 0;
01750 }
01751 
01752 #define REBASE(pic) \
01753     pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
01754 
01755 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
01756 {
01757     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
01758 
01759     if (s->macroblocks_base &&
01760         (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
01761         free_buffers(s);
01762     }
01763 
01764     s->prob[0] = s_src->prob[!s_src->update_probabilities];
01765     s->segmentation = s_src->segmentation;
01766     s->lf_delta = s_src->lf_delta;
01767     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
01768 
01769     memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
01770     s->framep[0] = REBASE(s_src->next_framep[0]);
01771     s->framep[1] = REBASE(s_src->next_framep[1]);
01772     s->framep[2] = REBASE(s_src->next_framep[2]);
01773     s->framep[3] = REBASE(s_src->next_framep[3]);
01774 
01775     return 0;
01776 }
01777 
01778 AVCodec ff_vp8_decoder = {
01779     "vp8",
01780     AVMEDIA_TYPE_VIDEO,
01781     CODEC_ID_VP8,
01782     sizeof(VP8Context),
01783     vp8_decode_init,
01784     NULL,
01785     vp8_decode_free,
01786     vp8_decode_frame,
01787     CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
01788     .flush = vp8_decode_flush,
01789     .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
01790     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
01791     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
01792 };

Generated on Wed Apr 11 2012 07:31:35 for FFmpeg by  doxygen 1.7.1