• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavfilter/libmpcodecs/vf_filmdint.c

Go to the documentation of this file.
00001 /*
00002  * This file is part of MPlayer.
00003  *
00004  * MPlayer is free software; you can redistribute it and/or modify
00005  * it under the terms of the GNU General Public License as published by
00006  * the Free Software Foundation; either version 2 of the License, or
00007  * (at your option) any later version.
00008  *
00009  * MPlayer is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License along
00015  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
00016  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00017  */
00018 
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <string.h>
00022 #include <sys/time.h>
00023 
00024 #include "config.h"
00025 #include "mp_msg.h"
00026 #include "cpudetect.h"
00027 
00028 #include "img_format.h"
00029 #include "mp_image.h"
00030 #include "vd.h"
00031 #include "vf.h"
00032 #include "cmmx.h"
00033 
00034 #include "libvo/fastmemcpy.h"
00035 
00036 #define NUM_STORED 4
00037 
00038 enum pu_field_type_t {
00039     PU_1ST_OF_3,
00040     PU_2ND_OF_3,
00041     PU_3RD_OF_3,
00042     PU_1ST_OF_2,
00043     PU_2ND_OF_2,
00044     PU_INTERLACED
00045 };
00046 
00047 struct metrics {
00048     /* This struct maps to a packed word 64-bit MMX register */
00049     unsigned short int even;
00050     unsigned short int odd;
00051     unsigned short int noise;
00052     unsigned short int temp;
00053 } __attribute__ ((aligned (8)));
00054 
00055 struct frame_stats {
00056     struct metrics tiny, low, high, bigger, twox, max;
00057     struct { unsigned int even, odd, noise, temp; } sad;
00058     unsigned short interlaced_high;
00059     unsigned short interlaced_low;
00060     unsigned short num_blocks;
00061 };
00062 
00063 struct vf_priv_s {
00064     unsigned long inframes;
00065     unsigned long outframes;
00066     enum pu_field_type_t prev_type;
00067     unsigned swapped, chroma_swapped;
00068     unsigned luma_only;
00069     unsigned verbose;
00070     unsigned fast;
00071     unsigned long w, h, cw, ch, stride, chroma_stride, nplanes;
00072     unsigned long sad_thres;
00073     unsigned long dint_thres;
00074     unsigned char *memory_allocated;
00075     unsigned char *planes[2*NUM_STORED][4];
00076     unsigned char **old_planes;
00077     unsigned long static_idx;
00078     unsigned long temp_idx;
00079     unsigned long crop_x, crop_y, crop_cx, crop_cy;
00080     unsigned long export_count, merge_count;
00081     unsigned long num_breaks;
00082     unsigned long num_copies;
00083     long in_inc, out_dec, iosync;
00084     long num_fields;
00085     long prev_fields;
00086     long notout;
00087     long mmx2;
00088     unsigned small_bytes[2];
00089     unsigned mmx_temp[2];
00090     struct frame_stats stats[2];
00091     struct metrics thres;
00092     char chflag;
00093     double diff_time, merge_time, decode_time, vo_time, filter_time;
00094 };
00095 
00096 #define PPZ { 2000, 2000, 0, 2000 }
00097 #define PPR { 2000, 2000, 0, 2000 }
00098 static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999};
00099 static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999};
00100 
00101 #ifndef MIN
00102 #define        MIN(a,b) (((a)<(b))?(a):(b))
00103 #endif
00104 #ifndef MAX
00105 #define        MAX(a,b) (((a)>(b))?(a):(b))
00106 #endif
00107 
00108 #define PDIFFUB(X,Y,T) "movq "    #X "," #T "\n\t" \
00109                        "psubusb " #Y "," #T "\n\t" \
00110                        "psubusb " #X "," #Y "\n\t" \
00111                        "paddusb " #Y "," #T "\n\t"
00112 
00113 #define PDIFFUBT(X,Y,T) "movq "    #X "," #T "\n\t" \
00114                         "psubusb " #Y "," #T "\n\t" \
00115                         "psubusb " #X "," #Y "\n\t" \
00116                         "paddusb " #T "," #Y "\n\t"
00117 
00118 #define PSUMBW(X,T,Z)        "movq " #X "," #T "\n\t" \
00119                         "punpcklbw " #Z "," #X "\n\t" \
00120                         "punpckhbw " #Z "," #T "\n\t" \
00121                         "paddw " #T "," #X "\n\t" \
00122                         "movq " #X "," #T "\n\t" \
00123                         "psllq $32, " #T "\n\t" \
00124                         "paddw " #T "," #X "\n\t" \
00125                         "movq " #X "," #T "\n\t" \
00126                         "psllq $16, " #T "\n\t" \
00127                         "paddw " #T "," #X "\n\t" \
00128                         "psrlq $48, " #X "\n\t"
00129 
00130 #define PSADBW(X,Y,T,Z)        PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z)
00131 
00132 #define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t"
00133 #define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t"
00134 #define PMINUBT(X,Y,T)        "movq " #Y "," #T "\n\t" \
00135                         "psubusb " #X "," #T "\n\t" \
00136                         "psubusb " #T "," #Y "\n\t"
00137 #define PAVGB(X,Y)        "pavgusb " #X "," #Y "\n\t"
00138 
00139 static inline void
00140 get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines,
00141               struct metrics *m)
00142 {
00143     a -= as;
00144     b -= bs;
00145     do {
00146         cmmx_t old_po = *(cmmx_t*)(a      );
00147         cmmx_t     po = *(cmmx_t*)(b      );
00148         cmmx_t      e = *(cmmx_t*)(b +   bs);
00149         cmmx_t  old_o = *(cmmx_t*)(a + 2*as);
00150         cmmx_t      o = *(cmmx_t*)(b + 2*bs);
00151         cmmx_t     ne = *(cmmx_t*)(b + 3*bs);
00152         cmmx_t old_no = *(cmmx_t*)(a + 4*as);
00153         cmmx_t     no = *(cmmx_t*)(b + 4*bs);
00154 
00155         cmmx_t   qup_old_odd = p31avgb(old_o, old_po);
00156         cmmx_t       qup_odd = p31avgb(    o,     po);
00157         cmmx_t qdown_old_odd = p31avgb(old_o, old_no);
00158         cmmx_t     qdown_odd = p31avgb(    o,     no);
00159 
00160         cmmx_t   qup_even = p31avgb(ne, e);
00161         cmmx_t qdown_even = p31avgb(e, ne);
00162 
00163         cmmx_t    temp_up_diff = pdiffub(qdown_even, qup_old_odd);
00164         cmmx_t   noise_up_diff = pdiffub(qdown_even, qup_odd);
00165         cmmx_t  temp_down_diff = pdiffub(qup_even, qdown_old_odd);
00166         cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd);
00167 
00168         cmmx_t odd_diff = pdiffub(o, old_o);
00169         m->odd  += psumbw(odd_diff);
00170         m->even += psadbw(e, *(cmmx_t*)(a+as));
00171 
00172         temp_up_diff  = pminub(temp_up_diff, temp_down_diff);
00173         temp_up_diff  = pminub(temp_up_diff, odd_diff);
00174         m->temp  += psumbw(temp_up_diff);
00175         noise_up_diff = pminub(noise_up_diff, odd_diff);
00176         noise_up_diff = pminub(noise_up_diff, noise_down_diff);
00177 
00178         m->noise += psumbw(noise_up_diff);
00179         a += 2*as;
00180         b += 2*bs;
00181     } while (--lines);
00182 }
00183 
00184 static inline void
00185 get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
00186                    int lines, struct metrics *m)
00187 {
00188     a -= as;
00189     b -= bs;
00190     do {
00191         cmmx_t old_po = (*(cmmx_t*)(a       ) >> 1) & ~SIGN_BITS;
00192         cmmx_t     po = (*(cmmx_t*)(b       ) >> 1) & ~SIGN_BITS;
00193         cmmx_t  old_e = (*(cmmx_t*)(a +   as) >> 1) & ~SIGN_BITS;
00194         cmmx_t      e = (*(cmmx_t*)(b +   bs) >> 1) & ~SIGN_BITS;
00195         cmmx_t  old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS;
00196         cmmx_t      o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS;
00197         cmmx_t     ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS;
00198         cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS;
00199         cmmx_t     no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS;
00200 
00201         cmmx_t   qup_old_odd = p31avgb_s(old_o, old_po);
00202         cmmx_t       qup_odd = p31avgb_s(    o,     po);
00203         cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no);
00204         cmmx_t     qdown_odd = p31avgb_s(    o,     no);
00205 
00206         cmmx_t   qup_even = p31avgb_s(ne, e);
00207         cmmx_t qdown_even = p31avgb_s(e, ne);
00208 
00209         cmmx_t    temp_up_diff = pdiffub_s(qdown_even, qup_old_odd);
00210         cmmx_t   noise_up_diff = pdiffub_s(qdown_even, qup_odd);
00211         cmmx_t  temp_down_diff = pdiffub_s(qup_even, qdown_old_odd);
00212         cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd);
00213 
00214         cmmx_t odd_diff = pdiffub_s(o, old_o);
00215         m->odd  += psumbw_s(odd_diff) << 1;
00216         m->even += psadbw_s(e, old_e) << 1;
00217 
00218         temp_up_diff  = pminub_s(temp_up_diff, temp_down_diff);
00219         temp_up_diff  = pminub_s(temp_up_diff, odd_diff);
00220         m->temp      += psumbw_s(temp_up_diff) << 1;
00221         noise_up_diff = pminub_s(noise_up_diff, odd_diff);
00222         noise_up_diff = pminub_s(noise_up_diff, noise_down_diff);
00223 
00224         m->noise += psumbw_s(noise_up_diff) << 1;
00225         a += 2*as;
00226         b += 2*bs;
00227     } while (--lines);
00228 }
00229 
00230 static inline void
00231 get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
00232                    int lines, struct metrics *m)
00233 {
00234     a -= as;
00235     b -= bs;
00236     do {
00237         cmmx_t old_po = (*(cmmx_t*)(a       )>>1) & ~SIGN_BITS;
00238         cmmx_t     po = (*(cmmx_t*)(b       )>>1) & ~SIGN_BITS;
00239         cmmx_t  old_e = (*(cmmx_t*)(a +   as)>>1) & ~SIGN_BITS;
00240         cmmx_t      e = (*(cmmx_t*)(b +   bs)>>1) & ~SIGN_BITS;
00241         cmmx_t  old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS;
00242         cmmx_t      o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS;
00243         cmmx_t     ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS;
00244 
00245         cmmx_t  down_even = p31avgb_s(e, ne);
00246         cmmx_t     up_odd = p31avgb_s(o, po);
00247         cmmx_t up_old_odd = p31avgb_s(old_o, old_po);
00248 
00249         cmmx_t   odd_diff = pdiffub_s(o, old_o);
00250         cmmx_t  temp_diff = pdiffub_s(down_even, up_old_odd);
00251         cmmx_t noise_diff = pdiffub_s(down_even, up_odd);
00252 
00253         m->even += psadbw_s(e, old_e) << 1;
00254         m->odd  += psumbw_s(odd_diff) << 1;
00255 
00256         temp_diff  = pminub_s(temp_diff, odd_diff);
00257         noise_diff = pminub_s(noise_diff, odd_diff);
00258 
00259         m->noise += psumbw_s(noise_diff) << 1;
00260         m->temp  += psumbw_s(temp_diff) << 1;
00261         a += 2*as;
00262         b += 2*bs;
00263     } while (--lines);
00264 
00265 }
00266 
00267 static inline void
00268 get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s)
00269 {
00270     unsigned two_e = m->even  + MAX(m->even , p->thres.even );
00271     unsigned two_o = m->odd   + MAX(m->odd  , p->thres.odd  );
00272     unsigned two_n = m->noise + MAX(m->noise, p->thres.noise);
00273     unsigned two_t = m->temp  + MAX(m->temp , p->thres.temp );
00274 
00275     unsigned e_big   = m->even  >= (m->odd   + two_o + 1)/2;
00276     unsigned o_big   = m->odd   >= (m->even  + two_e + 1)/2;
00277     unsigned n_big   = m->noise >= (m->temp  + two_t + 1)/2;
00278     unsigned t_big   = m->temp  >= (m->noise + two_n + 1)/2;
00279 
00280     unsigned e2x     = m->even  >= two_o;
00281     unsigned o2x     = m->odd   >= two_e;
00282     unsigned n2x     = m->noise >= two_t;
00283     unsigned t2x     = m->temp  >= two_n;
00284 
00285     unsigned ntiny_e = m->even  > p->thres.even ;
00286     unsigned ntiny_o = m->odd   > p->thres.odd  ;
00287     unsigned ntiny_n = m->noise > p->thres.noise;
00288     unsigned ntiny_t = m->temp  > p->thres.temp ;
00289 
00290     unsigned nlow_e  = m->even  > 2*p->thres.even ;
00291     unsigned nlow_o  = m->odd   > 2*p->thres.odd  ;
00292     unsigned nlow_n  = m->noise > 2*p->thres.noise;
00293     unsigned nlow_t  = m->temp  > 2*p->thres.temp ;
00294 
00295     unsigned high_e  = m->even  > 4*p->thres.even ;
00296     unsigned high_o  = m->odd   > 4*p->thres.odd  ;
00297     unsigned high_n  = m->noise > 4*p->thres.noise;
00298     unsigned high_t  = m->temp  > 4*p->thres.temp ;
00299 
00300     unsigned low_il  = !n_big && !t_big && ntiny_n && ntiny_t;
00301     unsigned high_il = !n_big && !t_big && nlow_n  && nlow_t;
00302 
00303     if (low_il | high_il) {
00304         s->interlaced_low  += low_il;
00305         s->interlaced_high += high_il;
00306     } else {
00307         s->tiny.even  += ntiny_e;
00308         s->tiny.odd   += ntiny_o;
00309         s->tiny.noise += ntiny_n;
00310         s->tiny.temp  += ntiny_t;
00311 
00312         s->low .even  += nlow_e ;
00313         s->low .odd   += nlow_o ;
00314         s->low .noise += nlow_n ;
00315         s->low .temp  += nlow_t ;
00316 
00317         s->high.even  += high_e ;
00318         s->high.odd   += high_o ;
00319         s->high.noise += high_n ;
00320         s->high.temp  += high_t ;
00321 
00322         if (m->even  >=        p->sad_thres) s->sad.even  += m->even ;
00323         if (m->odd   >=        p->sad_thres) s->sad.odd   += m->odd  ;
00324         if (m->noise >=        p->sad_thres) s->sad.noise += m->noise;
00325         if (m->temp  >=        p->sad_thres) s->sad.temp  += m->temp ;
00326     }
00327     s->num_blocks++;
00328     s->max.even  = MAX(s->max.even , m->even );
00329     s->max.odd   = MAX(s->max.odd  , m->odd  );
00330     s->max.noise = MAX(s->max.noise, m->noise);
00331     s->max.temp  = MAX(s->max.temp , m->temp );
00332 
00333     s->bigger.even  += e_big  ;
00334     s->bigger.odd   += o_big  ;
00335     s->bigger.noise += n_big  ;
00336     s->bigger.temp  += t_big  ;
00337 
00338     s->twox.even  += e2x    ;
00339     s->twox.odd   += o2x    ;
00340     s->twox.noise += n2x    ;
00341     s->twox.temp  += t2x    ;
00342 
00343 }
00344 
00345 static inline struct metrics
00346 block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs,
00347                 int lines, struct vf_priv_s *p, struct frame_stats *s)
00348 {
00349     struct metrics tm;
00350     tm.even = tm.odd = tm.noise = tm.temp = 0;
00351     get_metrics_c(a, b, as, bs, lines, &tm);
00352     if (sizeof(cmmx_t) < 8)
00353         get_metrics_c(a+4, b+4, as, bs, lines, &tm);
00354     get_block_stats(&tm, p, s);
00355     return tm;
00356 }
00357 
00358 static inline struct metrics
00359 block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
00360                 int lines, struct vf_priv_s *p, struct frame_stats *s)
00361 {
00362     struct metrics tm;
00363     tm.even = tm.odd = tm.noise = tm.temp = 0;
00364     get_metrics_fast_c(a, b, as, bs, lines, &tm);
00365     if (sizeof(cmmx_t) < 8)
00366         get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm);
00367     get_block_stats(&tm, p, s);
00368     return tm;
00369 }
00370 
00371 static inline struct metrics
00372 block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
00373                 int lines, struct vf_priv_s *p, struct frame_stats *s)
00374 {
00375     struct metrics tm;
00376     tm.even = tm.odd = tm.noise = tm.temp = 0;
00377     get_metrics_faster_c(a, b, as, bs, lines, &tm);
00378     if (sizeof(cmmx_t) < 8)
00379         get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm);
00380     get_block_stats(&tm, p, s);
00381     return tm;
00382 }
00383 
00384 #define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)
00385 
00386 #define BLOCK_METRICS_TEMPLATE() \
00387     __asm__ volatile("pxor %mm7, %mm7\n\t"   /* The result is colleted in mm7 */ \
00388                  "pxor %mm6, %mm6\n\t"   /* Temp to stay at 0 */             \
00389         );                                                                     \
00390     a -= as;                                                                     \
00391     b -= bs;                                                                     \
00392     do {                                                                     \
00393         __asm__ volatile(                                                     \
00394             "movq (%0,%2), %%mm0\n\t"                                             \
00395             "movq (%1,%3), %%mm1\n\t"   /* mm1 = even */                     \
00396             PSADBW(%%mm1, %%mm0, %%mm4, %%mm6)                                     \
00397             "paddusw %%mm0, %%mm7\n\t"  /* even diff */                             \
00398             "movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */                     \
00399             "movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */                             \
00400             "movq (%0), %%mm3\n\t"                                             \
00401             "psubusb %4, %%mm3\n\t"                                             \
00402             PAVGB(%%mm0, %%mm3)                                                     \
00403             PAVGB(%%mm0, %%mm3)    /* mm3 = qup old odd */                     \
00404             "movq %%mm0, %%mm5\n\t"                                             \
00405             PSADBW(%%mm2, %%mm0, %%mm4, %%mm6)                                     \
00406             "psllq $16, %%mm0\n\t"                                             \
00407             "paddusw %%mm0, %%mm7\n\t"                                             \
00408             "movq (%1), %%mm4\n\t"                                             \
00409             "lea (%0,%2,2), %0\n\t"                                             \
00410             "lea (%1,%3,2), %1\n\t"                                             \
00411             "psubusb %4, %%mm4\n\t"                                             \
00412             PAVGB(%%mm2, %%mm4)                                                     \
00413             PAVGB(%%mm2, %%mm4)    /* mm4 = qup odd */                             \
00414             PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldodd-odd) */             \
00415             "movq (%1,%3), %%mm5\n\t"                                             \
00416             "psubusb %4, %%mm5\n\t"                                             \
00417             PAVGB(%%mm1, %%mm5)                                                     \
00418             PAVGB(%%mm5, %%mm1)    /* mm1 = qdown even */                     \
00419             PAVGB((%1,%3), %%mm5)  /* mm5 = qup next even */                     \
00420             PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldo-qde) */             \
00421             PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupodd-qde) */             \
00422             PMINUBT(%%mm2, %%mm3, %%mm0)  /* limit temp to odd diff */             \
00423             PMINUBT(%%mm2, %%mm4, %%mm0)  /* limit noise to odd diff */             \
00424             "movq (%1,%3,2), %%mm2\n\t"                                             \
00425             "psubusb %4, %%mm2\n\t"                                             \
00426             PAVGB((%1), %%mm2)                                                     \
00427             PAVGB((%1), %%mm2)    /* mm2 = qdown odd */                             \
00428             "movq (%0,%2,2), %%mm1\n\t"                                             \
00429             "psubusb %4, %%mm1\n\t"                                             \
00430             PAVGB((%0), %%mm1)                                                     \
00431             PAVGB((%0), %%mm1)  /* mm1 = qdown old odd */                     \
00432             PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdo-qune) */             \
00433             PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdoo-qune) */             \
00434             PMINUBT(%%mm4, %%mm2, %%mm0)  /* current */                             \
00435             PMINUBT(%%mm3, %%mm1, %%mm0)  /* old */                             \
00436             PSUMBW(%%mm2, %%mm0, %%mm6)                                             \
00437             PSUMBW(%%mm1, %%mm0, %%mm6)                                             \
00438             "psllq $32, %%mm2\n\t"                                             \
00439             "psllq $48, %%mm1\n\t"                                             \
00440             "paddusw %%mm2, %%mm7\n\t"                                             \
00441             "paddusw %%mm1, %%mm7\n\t"                                             \
00442             : "=r" (a), "=r" (b)                                             \
00443             : "r"((x86_reg)as), "r"((x86_reg)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \
00444             );                                                                     \
00445     } while (--lines);
00446 
00447 static inline struct metrics
00448 block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
00449                     int lines, struct vf_priv_s *p, struct frame_stats *s)
00450 {
00451     struct metrics tm;
00452 #if !HAVE_AMD3DNOW
00453     mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");
00454 #else
00455     static const unsigned long long ones = 0x0101010101010101ull;
00456 
00457     BLOCK_METRICS_TEMPLATE();
00458     __asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
00459     get_block_stats(&tm, p, s);
00460 #endif
00461     return tm;
00462 }
00463 
00464 #undef PSUMBW
00465 #undef PSADBW
00466 #undef PMAXUB
00467 #undef PMINUBT
00468 #undef PAVGB
00469 
00470 #define PSUMBW(X,T,Z)        "psadbw " #Z "," #X "\n\t"
00471 #define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t"
00472 #define PMAXUB(X,Y)        "pmaxub " #X "," #Y "\n\t"
00473 #define PMINUBT(X,Y,T)        "pminub " #X "," #Y "\n\t"
00474 #define PAVGB(X,Y)        "pavgb "  #X "," #Y "\n\t"
00475 
00476 static inline struct metrics
00477 block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
00478                    int lines, struct vf_priv_s *p, struct frame_stats *s)
00479 {
00480     struct metrics tm;
00481 #if !HAVE_MMX
00482     mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n");
00483 #else
00484     static const unsigned long long ones = 0x0101010101010101ull;
00485     x86_reg interlaced;
00486     x86_reg prefetch_line = (((long)a>>3) & 7) + 10;
00487 #ifdef DEBUG
00488     struct frame_stats ts = *s;
00489 #endif
00490     __asm__ volatile("prefetcht0 (%0,%2)\n\t"
00491                  "prefetcht0 (%1,%3)\n\t" :
00492                  : "r" (a), "r" (b),
00493                  "r" (prefetch_line * as), "r" (prefetch_line * bs));
00494 
00495     BLOCK_METRICS_TEMPLATE();
00496 
00497     s->num_blocks++;
00498     __asm__ volatile(
00499         "movq %3, %%mm0\n\t"
00500         "movq %%mm7, %%mm1\n\t"
00501         "psubusw %%mm0, %%mm1\n\t"
00502         "movq %%mm1, %%mm2\n\t"
00503         "paddusw %%mm0, %%mm2\n\t"
00504         "paddusw %%mm7, %%mm2\n\t"
00505         "pshufw $0xb1, %%mm2, %%mm3\n\t"
00506         "pavgw %%mm7, %%mm2\n\t"
00507         "pshufw $0xb1, %%mm2, %%mm2\n\t"
00508         "psubusw %%mm7, %%mm2\n\t"
00509         "pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */
00510         "psubusw %%mm7, %%mm3\n\t"
00511         "pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */
00512         "movq %1, %%mm4\n\t"
00513         "movq %2, %%mm5\n\t"
00514         "psubw %%mm2, %%mm4\n\t"
00515         "psubw %%mm3, %%mm5\n\t"
00516         "movq %%mm4, %1\n\t"
00517         "movq %%mm5, %2\n\t"
00518         "pxor %%mm4, %%mm4\n\t"
00519         "pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */
00520         "psubusw %%mm0, %%mm1\n\t"
00521         "pxor %%mm5, %%mm5\n\t"
00522         "pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */
00523         "psubusw %%mm0, %%mm1\n\t"
00524         "psubusw %%mm0, %%mm1\n\t"
00525         "pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */
00526         "pshufw $0xb1, %%mm2, %%mm0\n\t"
00527         "por %%mm2, %%mm0\n\t"     /* 1 if not close */
00528         "punpckhdq %%mm0, %%mm0\n\t"
00529         "movq %%mm4, %%mm2\n\t"      /* tttt */
00530         "punpckhdq %%mm5, %%mm2\n\t" /* ttll */
00531         "por %%mm2, %%mm0\n\t"
00532         "pcmpeqd %%mm6, %%mm0\n\t" /* close && big */
00533         "psrlq $16, %%mm0\n\t"
00534         "psrlw $15, %%mm0\n\t"
00535         "movd %%mm0, %0\n\t"
00536         : "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox)
00537         : "m" (p->thres)
00538         );
00539 
00540     if (interlaced) {
00541         s->interlaced_high += interlaced >> 16;
00542         s->interlaced_low += interlaced;
00543     } else {
00544         __asm__ volatile(
00545             "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */
00546             "psubw         %%mm0, %%mm4\n\t"
00547             "psubw         %%mm0, %%mm5\n\t"
00548             "psubw         %%mm0, %%mm1\n\t"
00549             "paddw %0, %%mm4\n\t"
00550             "paddw %1, %%mm5\n\t"
00551             "paddw %2, %%mm1\n\t"
00552             "movq %%mm4, %0\n\t"
00553             "movq %%mm5, %1\n\t"
00554             "movq %%mm1, %2\n\t"
00555             : "=m" (s->tiny), "=m" (s->low), "=m" (s->high)
00556             );
00557 
00558         __asm__ volatile(
00559             "pshufw $0, %2, %%mm0\n\t"
00560             "psubusw %%mm7, %%mm0\n\t"
00561             "pcmpeqw %%mm6, %%mm0\n\t"   /* 0 if below sad_thres */
00562             "pand %%mm7, %%mm0\n\t"
00563             "movq %%mm0, %%mm1\n\t"
00564             "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */
00565             "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */
00566             "paddd %0, %%mm0\n\t"
00567             "paddd %1, %%mm1\n\t"
00568             "movq %%mm0, %0\n\t"
00569             "movq %%mm1, %1\n\t"
00570             : "=m" (s->sad.even), "=m" (s->sad.noise)
00571             : "m" (p->sad_thres)
00572             );
00573     }
00574 
00575     __asm__ volatile(
00576         "movq %%mm7, (%1)\n\t"
00577         PMAXUW((%0), %%mm7)
00578         "movq %%mm7, (%0)\n\t"
00579         "emms"
00580         : : "r" (&s->max), "r" (&tm), "X" (s->max)
00581         : "memory"
00582         );
00583 #ifdef DEBUG
00584     if (1) {
00585         struct metrics cm;
00586         a -= 7*as;
00587         b -= 7*bs;
00588         cm = block_metrics_c(a, b, as, bs, 4, p, &ts);
00589         if (!MEQ(tm, cm))
00590             mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n");
00591         if (s) {
00592 #           define CHECK(X) if (!MEQ(s->X, ts.X)) \
00593                 mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n");
00594             CHECK(tiny);
00595             CHECK(low);
00596             CHECK(high);
00597             CHECK(sad);
00598             CHECK(max);
00599         }
00600     }
00601 #endif
00602 #endif
00603     return tm;
00604 }
00605 
00606 static inline int
00607 dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
00608                     long cos, int ds, int ss, int w, int t)
00609 {
00610 #if !HAVE_MMX
00611     mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n");
00612     return 0;
00613 #else
00614     unsigned long len = (w+7) >> 3;
00615     int ret;
00616     __asm__ volatile (
00617         "pxor %%mm6, %%mm6 \n\t"       /* deinterlaced pixel counter */
00618         "movd %0, %%mm7 \n\t"
00619         "punpcklbw %%mm7, %%mm7 \n\t"
00620         "punpcklwd %%mm7, %%mm7 \n\t"
00621         "punpckldq %%mm7, %%mm7 \n\t"  /* mm7 = threshold */
00622         : /* no output */
00623         : "rm" (t)
00624         );
00625     do {
00626         __asm__ volatile (
00627             "movq (%0), %%mm0\n\t"
00628             "movq (%0,%3,2), %%mm1\n\t"
00629             "movq %%mm0, (%2)\n\t"
00630             "pmaxub %%mm1, %%mm0\n\t"
00631             "pavgb (%0), %%mm1\n\t"
00632             "psubusb %%mm1, %%mm0\n\t"
00633             "paddusb %%mm7, %%mm0\n\t"  /* mm0 = max-avg+thr */
00634             "movq (%0,%1), %%mm2\n\t"
00635             "movq (%0,%5), %%mm3\n\t"
00636             "movq %%mm2, %%mm4\n\t"
00637             PDIFFUBT(%%mm1, %%mm2, %%mm5)
00638             PDIFFUBT(%%mm1, %%mm3, %%mm5)
00639             "pminub %%mm2, %%mm3\n\t"
00640             "pcmpeqb %%mm3, %%mm2\n\t"  /* b = min */
00641             "pand %%mm2, %%mm4\n\t"
00642             "pandn (%0,%5), %%mm2\n\t"
00643             "por %%mm4, %%mm2\n\t"
00644             "pminub %%mm0, %%mm3\n\t"
00645             "pcmpeqb %%mm0, %%mm3\n\t"  /* set to 1s if >= threshold */
00646             "psubb %%mm3, %%mm6\n\t"    /* count pixels above thr. */
00647             "pand %%mm3, %%mm1 \n\t"
00648             "pandn %%mm2, %%mm3 \n\t"
00649             "por %%mm3, %%mm1 \n\t"     /* avg if >= threshold */
00650             "movq %%mm1, (%2,%4) \n\t"
00651             : /* no output */
00652             : "r" (a), "r" ((x86_reg)bos), "r" ((x86_reg)dst), "r" ((x86_reg)ss), "r" ((x86_reg)ds), "r" ((x86_reg)cos)
00653             );
00654         a += 8;
00655         dst += 8;
00656     } while (--len);
00657 
00658     __asm__ volatile ("pxor %%mm7, %%mm7 \n\t"
00659                   "psadbw %%mm6, %%mm7 \n\t"
00660                   "movd %%mm7, %0 \n\t"
00661                   "emms \n\t"
00662                   : "=r" (ret)
00663         );
00664     return ret;
00665 #endif
00666 }
00667 
00668 static inline int
00669 dint_copy_line(unsigned char *dst, unsigned char *a, long bos,
00670                long cos, int ds, int ss, int w, int t)
00671 {
00672     unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t);
00673     cmmx_t dint_count = 0;
00674     cmmx_t thr;
00675     t |= t <<  8;
00676     thr = t | (t << 16);
00677     if (sizeof(cmmx_t) > 4)
00678         thr |= thr << (sizeof(cmmx_t)*4);
00679     do {
00680         cmmx_t e = *(cmmx_t*)a;
00681         cmmx_t ne = *(cmmx_t*)(a+2*ss);
00682         cmmx_t o = *(cmmx_t*)(a+bos);
00683         cmmx_t oo = *(cmmx_t*)(a+cos);
00684         cmmx_t maxe = pmaxub(e, ne);
00685         cmmx_t avge = pavgb(e, ne);
00686         cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */
00687         cmmx_t diffo  = pdiffub(avge, o);
00688         cmmx_t diffoo = pdiffub(avge, oo);
00689         cmmx_t diffcmp = pcmpgtub(diffo, diffoo);
00690         cmmx_t bo = ((oo ^ o) & diffcmp) ^ o;
00691         cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo;
00692         cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo);
00693         cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo;
00694         dint_count += above_thr & ONE_BYTES;
00695         *(cmmx_t*)(dst) = e;
00696         *(cmmx_t*)(dst+ds) = bo_or_avg;
00697         a += sizeof(cmmx_t);
00698         dst += sizeof(cmmx_t);
00699     } while (--len);
00700     return psumbw(dint_count);
00701 }
00702 
00703 static int
00704 dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b,
00705                 unsigned char *c, unsigned long w, unsigned long h,
00706                 unsigned long ds, unsigned long ss, unsigned long threshold,
00707                 long field, long mmx2)
00708 {
00709     unsigned long ret = 0;
00710     long bos = b - a;
00711     long cos = c - a;
00712     if (field) {
00713         fast_memcpy(d, b, w);
00714         h--;
00715         d += ds;
00716         a += ss;
00717     }
00718     bos += ss;
00719     cos += ss;
00720     while (h > 2) {
00721         if (threshold >= 128) {
00722             fast_memcpy(d, a, w);
00723             fast_memcpy(d+ds, a+bos, w);
00724         } else if (mmx2 == 1) {
00725             ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);
00726         } else
00727             ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold);
00728         h -= 2;
00729         d += 2*ds;
00730         a += 2*ss;
00731     }
00732     fast_memcpy(d, a, w);
00733     if (h == 2)
00734         fast_memcpy(d+ds, a+bos, w);
00735     return ret;
00736 }
00737 
00738 static void
00739 copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi,
00740                   unsigned char **old, unsigned char **new, unsigned long show)
00741 {
00742     unsigned long threshold = 256;
00743     unsigned long field = p->swapped;
00744     unsigned long dint_pixels = 0;
00745     unsigned char **other = old;
00746     if (show >= 12 || !(show & 3))
00747         show >>= 2, other = new, new = old;
00748     if (show <= 2) {  /* Single field: de-interlace */
00749         threshold = p->dint_thres;
00750         field ^= show & 1;
00751         old = new;
00752     } else if (show == 3)
00753         old = new;
00754     else
00755         field ^= 1;
00756     dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0],
00757                                   other[0], p->w, p->h, dmpi->stride[0],
00758                                   p->stride, threshold, field, p->mmx2);
00759     if (dmpi->flags & MP_IMGFLAG_PLANAR) {
00760         if (p->luma_only)
00761             old = new, other = new;
00762         else
00763             threshold = threshold/2 + 1;
00764         field ^= p->chroma_swapped;
00765         dint_copy_plane(dmpi->planes[1], old[1], new[1],
00766                         other[1], p->cw, p->ch,        dmpi->stride[1],
00767                         p->chroma_stride, threshold, field, p->mmx2);
00768         dint_copy_plane(dmpi->planes[2], old[2], new[2],
00769                         other[2], p->cw, p->ch, dmpi->stride[2],
00770                         p->chroma_stride, threshold, field, p->mmx2);
00771     }
00772     if (dint_pixels > 0 && p->verbose)
00773         mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels);
00774 }
00775 
00776 static void diff_planes(struct vf_priv_s *p, struct frame_stats *s,
00777                         unsigned char *of, unsigned char *nf,
00778                         int w, int h, int os, int ns, int swapped)
00779 {
00780     int i, y;
00781     int align = -(long)nf & 7;
00782     of += align;
00783     nf += align;
00784     w -= align;
00785     if (swapped)
00786         of -= os, nf -= ns;
00787     i = (h*3 >> 7) & ~1;
00788     of += i*os + 8;
00789     nf += i*ns + 8;
00790     h -= i;
00791     w -= 16;
00792 
00793     memset(s, 0, sizeof(*s));
00794 
00795     for (y = (h-8) >> 3; y; y--) {
00796         if (p->mmx2 == 1) {
00797             for (i = 0; i < w; i += 8)
00798                 block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);
00799         } else if (p->mmx2 == 2) {
00800             for (i = 0; i < w; i += 8)
00801                 block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);
00802         } else if (p->fast > 3) {
00803             for (i = 0; i < w; i += 8)
00804                 block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);
00805         } else if (p->fast > 1) {
00806             for (i = 0; i < w; i += 8)
00807                 block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s);
00808         } else {
00809             for (i = 0; i < w; i += 8)
00810                 block_metrics_c(of+i, nf+i, os, ns, 4, p, s);
00811         }
00812         of += 8*os;
00813         nf += 8*ns;
00814     }
00815 }
00816 
00817 #define METRICS(X) (X).even, (X).odd, (X).noise, (X).temp
00818 
00819 static void diff_fields(struct vf_priv_s *p, struct frame_stats *s,
00820                         unsigned char **old, unsigned char **new)
00821 {
00822     diff_planes(p, s, old[0], new[0], p->w, p->h,
00823                 p->stride, p->stride, p->swapped);
00824     s->sad.even  = (s->sad.even  * 16ul) / s->num_blocks;
00825     s->sad.odd   = (s->sad.odd   * 16ul) / s->num_blocks;
00826     s->sad.noise = (s->sad.noise * 16ul) / s->num_blocks;
00827     s->sad.temp  = (s->sad.temp  * 16ul) / s->num_blocks;
00828     if (p->verbose)
00829         mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d - %d, "
00830                "t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, "
00831                "2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n",
00832                p->inframes, p->chflag, METRICS(s->max), s->num_blocks,
00833                METRICS(s->tiny), METRICS(s->low), METRICS(s->high),
00834                METRICS(s->bigger), METRICS(s->twox), METRICS(s->sad),
00835                s->interlaced_low, s->interlaced_high,
00836                p->iosync / (double) p->in_inc);
00837 }
00838 
00839 static const char *parse_args(struct vf_priv_s *p, const char *args)
00840 {
00841     args--;
00842     while (args && *++args &&
00843            (sscanf(args, "io=%lu:%lu", &p->out_dec, &p->in_inc) == 2 ||
00844             sscanf(args, "diff_thres=%hu", &p->thres.even ) == 1 ||
00845             sscanf(args, "comb_thres=%hu", &p->thres.noise) == 1 ||
00846             sscanf(args, "sad_thres=%lu",  &p->sad_thres  ) == 1 ||
00847             sscanf(args, "dint_thres=%lu", &p->dint_thres ) == 1 ||
00848             sscanf(args, "fast=%u",        &p->fast       ) == 1 ||
00849             sscanf(args, "mmx2=%lu",       &p->mmx2       ) == 1 ||
00850             sscanf(args, "luma_only=%u",   &p->luma_only  ) == 1 ||
00851             sscanf(args, "verbose=%u",     &p->verbose    ) == 1 ||
00852             sscanf(args, "crop=%lu:%lu:%lu:%lu", &p->w,
00853                    &p->h, &p->crop_x, &p->crop_y) == 4))
00854         args = strchr(args, '/');
00855     return args;
00856 }
00857 
00858 static unsigned long gcd(unsigned long x, unsigned long y)
00859 {
00860     unsigned long t;
00861     if (x > y)
00862         t = x, x = y, y = t;
00863 
00864     while (x) {
00865         t = y % x;
00866         y = x;
00867         x = t;
00868     }
00869     return y;
00870 }
00871 
00872 static void init(struct vf_priv_s *p, mp_image_t *mpi)
00873 {
00874     unsigned long i;
00875     unsigned long plane_size, chroma_plane_size;
00876     unsigned char *plane;
00877     unsigned long cos, los;
00878     p->crop_cx = p->crop_x >> mpi->chroma_x_shift;
00879     p->crop_cy = p->crop_y >> mpi->chroma_y_shift;
00880     if (mpi->flags & MP_IMGFLAG_ACCEPT_STRIDE) {
00881         p->stride = (mpi->w + 15) & ~15;
00882         p->chroma_stride = p->stride >> mpi->chroma_x_shift;
00883     } else {
00884         p->stride = mpi->width;
00885         p->chroma_stride = mpi->chroma_width;
00886     }
00887     p->cw = p->w >> mpi->chroma_x_shift;
00888     p->ch = p->h >> mpi->chroma_y_shift;
00889     p->nplanes = 1;
00890     p->static_idx = 0;
00891     p->temp_idx = 0;
00892     p->old_planes = p->planes[0];
00893     plane_size = mpi->h * p->stride;
00894     chroma_plane_size = mpi->flags & MP_IMGFLAG_PLANAR ?
00895         mpi->chroma_height * p->chroma_stride : 0;
00896     p->memory_allocated =
00897         malloc(NUM_STORED * (plane_size+2*chroma_plane_size) +
00898                8*p->chroma_stride + 4096);
00899     /* align to page boundary */
00900     plane = p->memory_allocated + (-(long)p->memory_allocated & 4095);
00901     memset(plane, 0, NUM_STORED * plane_size);
00902     los = p->crop_x  + p->crop_y  * p->stride;
00903     cos = p->crop_cx + p->crop_cy * p->chroma_stride;
00904     for (i = 0; i != NUM_STORED; i++, plane += plane_size) {
00905         p->planes[i][0] = plane;
00906         p->planes[NUM_STORED + i][0] = plane + los;
00907     }
00908     if (mpi->flags & MP_IMGFLAG_PLANAR) {
00909         p->nplanes = 3;
00910         memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size);
00911         for (i = 0; i != NUM_STORED; i++) {
00912             p->planes[i][1] = plane;
00913             p->planes[NUM_STORED + i][1] = plane + cos;
00914             plane += chroma_plane_size;
00915             p->planes[i][2] = plane;
00916             p->planes[NUM_STORED + i][2] = plane + cos;
00917             plane += chroma_plane_size;
00918         }
00919     }
00920     p->out_dec <<= 2;
00921     i = gcd(p->in_inc, p->out_dec);
00922     p->in_inc /= i;
00923     p->out_dec /= i;
00924     p->iosync = 0;
00925     p->num_fields = 3;
00926 }
00927 
00928 static inline double get_time(void)
00929 {
00930     struct timeval tv;
00931     gettimeofday(&tv, 0);
00932     return tv.tv_sec + tv.tv_usec * 1e-6;
00933 }
00934 
00935 static void get_image(struct vf_instance *vf, mp_image_t *mpi)
00936 {
00937     struct vf_priv_s *p = vf->priv;
00938     static unsigned char **planes, planes_idx;
00939 
00940     if (mpi->type == MP_IMGTYPE_STATIC) return;
00941 
00942     if (!p->planes[0][0]) init(p, mpi);
00943 
00944     if (mpi->type == MP_IMGTYPE_TEMP ||
00945         (mpi->type == MP_IMGTYPE_IPB && !(mpi->flags & MP_IMGFLAG_READABLE)))
00946         planes_idx = NUM_STORED/2 + (++p->temp_idx % (NUM_STORED/2));
00947     else
00948         planes_idx = ++p->static_idx % (NUM_STORED/2);
00949     planes = p->planes[planes_idx];
00950     mpi->priv = p->planes[NUM_STORED + planes_idx];
00951     if (mpi->priv == p->old_planes) {
00952         unsigned char **old_planes =
00953             p->planes[NUM_STORED + 2 + (++p->temp_idx & 1)];
00954         my_memcpy_pic(old_planes[0], p->old_planes[0],
00955                       p->w, p->h, p->stride, p->stride);
00956         if (mpi->flags & MP_IMGFLAG_PLANAR) {
00957             my_memcpy_pic(old_planes[1], p->old_planes[1],
00958                           p->cw, p->ch, p->chroma_stride, p->chroma_stride);
00959             my_memcpy_pic(old_planes[2], p->old_planes[2],
00960                           p->cw, p->ch, p->chroma_stride, p->chroma_stride);
00961         }
00962         p->old_planes = old_planes;
00963         p->num_copies++;
00964     }
00965     mpi->planes[0] = planes[0];
00966     mpi->stride[0] = p->stride;
00967     if (mpi->flags & MP_IMGFLAG_PLANAR) {
00968         mpi->planes[1] = planes[1];
00969         mpi->planes[2] = planes[2];
00970         mpi->stride[1] = mpi->stride[2] = p->chroma_stride;
00971     }
00972     mpi->width = p->stride;
00973 
00974     mpi->flags |= MP_IMGFLAG_DIRECT;
00975     mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK;
00976 }
00977 
00978 static inline long
00979 cmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e)
00980 {
00981     long diff = x-y;
00982     long unit = ((x+y+err) >> e);
00983     long ret = (diff > unit) - (diff < -unit);
00984     unit >>= 1;
00985     return ret + (diff > unit) - (diff < -unit);
00986 }
00987 
00988 static unsigned long
00989 find_breaks(struct vf_priv_s *p, struct frame_stats *s)
00990 {
00991     struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
00992     long notfilm = 5*p->in_inc - p->out_dec;
00993     unsigned long n = s->num_blocks >> 8;
00994     unsigned long sad_comb_cmp = cmpe(s->sad.temp, s->sad.noise, 512, 1);
00995     unsigned long ret = 8;
00996 
00997     if (cmpe(s->sad.temp, s->sad.even, 512, 1) > 0)
00998         mp_msg(MSGT_VFILTER, MSGL_WARN,
00999                "@@@@@@@@ Bottom-first field??? @@@@@@@@\n");
01000     if (s->sad.temp > 1000 && s->sad.noise > 1000)
01001         return 3;
01002     if (s->interlaced_high >= 2*n && s->sad.temp > 256 && s->sad.noise > 256)
01003         return 3;
01004     if (s->high.noise > s->num_blocks/4 && s->sad.noise > 10000 &&
01005         s->sad.noise > 2*s->sad.even && s->sad.noise > 2*ps->sad.odd) {
01006         // Mid-frame scene change
01007         if (s->tiny.temp + s->interlaced_low  < n   ||
01008             s->low.temp  + s->interlaced_high < n/4 ||
01009             s->high.temp + s->interlaced_high < n/8 ||
01010             s->sad.temp < 160)
01011             return 1;
01012         return 3;
01013     }
01014     if (s->high.temp > s->num_blocks/4 && s->sad.temp > 10000 &&
01015         s->sad.temp > 2*ps->sad.odd && s->sad.temp > 2*ps->sad.even) {
01016         // Start frame scene change
01017         if (s->tiny.noise + s->interlaced_low  < n   ||
01018             s->low.noise  + s->interlaced_high < n/4 ||
01019             s->high.noise + s->interlaced_high < n/8 ||
01020             s->sad.noise < 160)
01021             return 2;
01022         return 3;
01023     }
01024     if (sad_comb_cmp == 2)
01025         return 2;
01026     if (sad_comb_cmp == -2)
01027         return 1;
01028 
01029     if (s->tiny.odd > 3*MAX(n,s->tiny.even) + s->interlaced_low)
01030         return 1;
01031     if (s->tiny.even > 3*MAX(n,s->tiny.odd)+s->interlaced_low &&
01032         (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
01033         return 4;
01034 
01035     if (s->sad.noise < 64 && s->sad.temp < 64 &&
01036         s->low.noise <= n/2 && s->high.noise <= n/4 &&
01037         s->low.temp  <= n/2 && s->high.temp  <= n/4)
01038         goto still;
01039 
01040     if (s->tiny.temp > 3*MAX(n,s->tiny.noise) + s->interlaced_low)
01041         return 2;
01042     if (s->tiny.noise > 3*MAX(n,s->tiny.temp) + s->interlaced_low)
01043         return 1;
01044 
01045     if (s->low.odd > 3*MAX(n/4,s->low.even) + s->interlaced_high)
01046         return 1;
01047     if (s->low.even > 3*MAX(n/4,s->low.odd)+s->interlaced_high &&
01048         s->sad.even > 2*s->sad.odd &&
01049         (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
01050         return 4;
01051 
01052     if (s->low.temp > 3*MAX(n/4,s->low.noise) + s->interlaced_high)
01053         return 2;
01054     if (s->low.noise > 3*MAX(n/4,s->low.temp) + s->interlaced_high)
01055         return 1;
01056 
01057     if (sad_comb_cmp == 1 && s->sad.noise < 64)
01058         return 2;
01059     if (sad_comb_cmp == -1 && s->sad.temp < 64)
01060         return 1;
01061 
01062     if (s->tiny.odd <= n || (s->tiny.noise <= n/2 && s->tiny.temp <= n/2)) {
01063         if (s->interlaced_low <= n) {
01064             if (p->num_fields == 1)
01065                 goto still;
01066             if (s->tiny.even <= n || ps->tiny.noise <= n/2)
01067                 /* Still frame */
01068                 goto still;
01069             if (s->bigger.even >= 2*MAX(n,s->bigger.odd) + s->interlaced_low)
01070                 return 4;
01071             if (s->low.even >= 2*n + s->interlaced_low)
01072                 return 4;
01073             goto still;
01074         }
01075     }
01076     if (s->low.odd <= n/4) {
01077         if (s->interlaced_high <= n/4) {
01078             if (p->num_fields == 1)
01079                 goto still;
01080             if (s->low.even <= n/4)
01081                 /* Still frame */
01082                 goto still;
01083             if (s->bigger.even >= 2*MAX(n/4,s->bigger.odd)+s->interlaced_high)
01084                 return 4;
01085             if (s->low.even >= n/2 + s->interlaced_high)
01086                 return 4;
01087             goto still;
01088         }
01089     }
01090     if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_low)
01091         return 2;
01092     if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_low)
01093         return 1;
01094     if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_high)
01095         return 2;
01096     if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_high)
01097         return 1;
01098     if (s->twox.temp > 2*MAX(n,s->twox.noise) + s->interlaced_high)
01099         return 2;
01100     if (s->twox.noise > 2*MAX(n,s->twox.temp) + s->interlaced_high)
01101         return 1;
01102     if (s->bigger.even > 2*MAX(n,s->bigger.odd) + s->interlaced_low &&
01103         s->bigger.temp < n && s->bigger.noise < n)
01104         return 4;
01105     if (s->interlaced_low > MIN(2*n, s->tiny.odd))
01106         return 3;
01107     ret = 8 + (1 << (s->sad.temp > s->sad.noise));
01108   still:
01109     if (p->num_fields == 1 && p->prev_fields == 3 && notfilm >= 0 &&
01110         (s->tiny.temp <= s->tiny.noise || s->sad.temp < s->sad.noise+16))
01111         return 1;
01112     if (p->notout < p->num_fields && p->iosync > 2*p->in_inc && notfilm < 0)
01113         notfilm = 0;
01114     if (p->num_fields < 2 ||
01115         (p->num_fields == 2 && p->prev_fields == 2 && notfilm < 0))
01116         return ret;
01117     if (!notfilm && (p->prev_fields&~1) == 2) {
01118         if (p->prev_fields + p->num_fields == 5) {
01119             if (s->tiny.noise <= s->tiny.temp ||
01120                 s->low.noise == 0 || s->low.noise < s->low.temp ||
01121                 s->sad.noise < s->sad.temp+16)
01122                 return 2;
01123         }
01124         if (p->prev_fields + p->num_fields == 4) {
01125             if (s->tiny.temp <= s->tiny.noise ||
01126                 s->low.temp == 0 || s->low.temp < s->low.noise ||
01127                 s->sad.temp < s->sad.noise+16)
01128                 return 1;
01129         }
01130     }
01131     if (p->num_fields > 2 &&
01132         ps->sad.noise > s->sad.noise && ps->sad.noise > s->sad.temp)
01133         return 4;
01134     return 2 >> (s->sad.noise > s->sad.temp);
01135 }
01136 
01137 #define ITOC(X) (!(X) ? ' ' : (X) + ((X)>9 ? 'a'-10 : '0'))
01138 
01139 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
01140 {
01141     mp_image_t *dmpi;
01142     struct vf_priv_s *p = vf->priv;
01143     unsigned char **planes, **old_planes;
01144     struct frame_stats *s  = &p->stats[p->inframes & 1];
01145     struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
01146     int swapped = 0;
01147     const int flags = mpi->fields;
01148     int breaks, prev;
01149     int show_fields = 0;
01150     int dropped_fields = 0;
01151     double start_time, diff_time;
01152     char prev_chflag = p->chflag;
01153     int keep_rate;
01154 
01155     if (!p->planes[0][0]) init(p, mpi);
01156 
01157     old_planes = p->old_planes;
01158 
01159     if ((mpi->flags & MP_IMGFLAG_DIRECT) && mpi->priv) {
01160         planes = mpi->priv;
01161         mpi->priv = 0;
01162     } else {
01163         planes = p->planes[2 + (++p->temp_idx & 1)];
01164         my_memcpy_pic(planes[0],
01165                       mpi->planes[0] + p->crop_x + p->crop_y * mpi->stride[0],
01166                       p->w, p->h, p->stride, mpi->stride[0]);
01167         if (mpi->flags & MP_IMGFLAG_PLANAR) {
01168             my_memcpy_pic(planes[1],
01169                           mpi->planes[1] + p->crop_cx + p->crop_cy * mpi->stride[1],
01170                           p->cw, p->ch, p->chroma_stride, mpi->stride[1]);
01171             my_memcpy_pic(planes[2],
01172                           mpi->planes[2] + p->crop_cx + p->crop_cy * mpi->stride[2],
01173                           p->cw, p->ch, p->chroma_stride, mpi->stride[2]);
01174             p->num_copies++;
01175         }
01176     }
01177 
01178     p->old_planes = planes;
01179     p->chflag = ';';
01180     if (flags & MP_IMGFIELD_ORDERED) {
01181         swapped = !(flags & MP_IMGFIELD_TOP_FIRST);
01182         p->chflag = (flags & MP_IMGFIELD_REPEAT_FIRST ? '|' :
01183                      flags & MP_IMGFIELD_TOP_FIRST ? ':' : '.');
01184     }
01185     p->swapped = swapped;
01186 
01187     start_time = get_time();
01188     if (p->chflag == '|') {
01189         *s = ppzs;
01190         p->iosync += p->in_inc;
01191     } else if ((p->fast & 1) && prev_chflag == '|')
01192         *s = pprs;
01193     else
01194         diff_fields(p, s, old_planes, planes);
01195     diff_time = get_time();
01196     p->diff_time += diff_time - start_time;
01197     breaks = p->inframes ? find_breaks(p, s) : 2;
01198     p->inframes++;
01199     keep_rate = 4*p->in_inc == p->out_dec;
01200 
01201     switch (breaks) {
01202       case 0:
01203       case 8:
01204       case 9:
01205       case 10:
01206         if (!keep_rate && p->notout < p->num_fields && p->iosync < 2*p->in_inc)
01207             break;
01208         if (p->notout < p->num_fields)
01209             dropped_fields = -2;
01210       case 4:
01211         if (keep_rate || p->iosync >= -2*p->in_inc)
01212             show_fields = (4<<p->num_fields)-1;
01213         break;
01214       case 3:
01215         if (keep_rate)
01216             show_fields = 2;
01217         else if (p->iosync > 0) {
01218             if (p->notout >= p->num_fields && p->iosync > 2*p->in_inc) {
01219                 show_fields = 4; /* prev odd only */
01220                 if (p->num_fields > 1)
01221                     show_fields |= 8; /* + prev even */
01222             } else {
01223                 show_fields = 2; /* even only */
01224                 if (p->notout >= p->num_fields)
01225                     dropped_fields += p->num_fields;
01226             }
01227         }
01228         break;
01229       case 2:
01230         if (p->iosync <= -3*p->in_inc) {
01231             if (p->notout >= p->num_fields)
01232                 dropped_fields = p->num_fields;
01233             break;
01234         }
01235         if (p->num_fields == 1) {
01236             int prevbreak = ps->sad.noise >= 128;
01237             if (p->iosync < 4*p->in_inc) {
01238                 show_fields = 3;
01239                 dropped_fields = prevbreak;
01240             } else {
01241                 show_fields = 4 | (!prevbreak << 3);
01242                 if (p->notout < 1 + p->prev_fields)
01243                     dropped_fields = -!prevbreak;
01244             }
01245             break;
01246         }
01247       default:
01248         if (keep_rate)
01249             show_fields = 3 << (breaks & 1);
01250         else if (p->notout >= p->num_fields &&
01251             p->iosync >= (breaks == 1 ? -p->in_inc :
01252                           p->in_inc << (p->num_fields == 1))) {
01253             show_fields = (1 << (2 + p->num_fields)) - (1<<breaks);
01254         } else {
01255             if (p->notout >= p->num_fields)
01256                 dropped_fields += p->num_fields + 2 - breaks;
01257             if (breaks == 1) {
01258                 if (p->iosync >= 4*p->in_inc)
01259                     show_fields = 6;
01260             } else if (p->iosync > -3*p->in_inc)
01261                 show_fields = 3;  /* odd+even */
01262         }
01263         break;
01264     }
01265 
01266     show_fields &= 15;
01267     prev = p->prev_fields;
01268     if (breaks < 8) {
01269         if (p->num_fields == 1)
01270             breaks &= ~4;
01271         if (breaks)
01272             p->num_breaks++;
01273         if (breaks == 3)
01274             p->prev_fields = p->num_fields = 1;
01275         else if (breaks) {
01276             p->prev_fields = p->num_fields + (breaks==1) - (breaks==4);
01277             p->num_fields = breaks - (breaks == 4) + (p->chflag == '|');
01278         } else
01279             p->num_fields += 2;
01280     } else
01281         p->num_fields += 2;
01282 
01283     p->iosync += 4 * p->in_inc;
01284     if (p->chflag == '|')
01285         p->iosync += p->in_inc;
01286 
01287     if (show_fields) {
01288         p->iosync -= p->out_dec;
01289         p->notout = !(show_fields & 1) + !(show_fields & 3);
01290         if (((show_fields &  3) ==  3 &&
01291              (s->low.noise + s->interlaced_low < (s->num_blocks>>8) ||
01292               s->sad.noise < 160)) ||
01293             ((show_fields & 12) == 12 &&
01294              (ps->low.noise + ps->interlaced_low < (s->num_blocks>>8) ||
01295               ps->sad.noise < 160))) {
01296             p->export_count++;
01297             dmpi = vf_get_image(vf->next, mpi->imgfmt, MP_IMGTYPE_EXPORT,
01298                                 MP_IMGFLAG_PRESERVE|MP_IMGFLAG_READABLE,
01299                                 p->w, p->h);
01300             if ((show_fields & 3) != 3) planes = old_planes;
01301             dmpi->planes[0] = planes[0];
01302             dmpi->stride[0] = p->stride;
01303             dmpi->width = mpi->width;
01304             if (mpi->flags & MP_IMGFLAG_PLANAR) {
01305                 dmpi->planes[1] = planes[1];
01306                 dmpi->planes[2] = planes[2];
01307                 dmpi->stride[1] = p->chroma_stride;
01308                 dmpi->stride[2] = p->chroma_stride;
01309             }
01310         } else {
01311             p->merge_count++;
01312             dmpi = vf_get_image(vf->next, mpi->imgfmt,
01313                                 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
01314                                 p->w, p->h);
01315             copy_merge_fields(p, dmpi, old_planes, planes, show_fields);
01316         }
01317         p->outframes++;
01318     } else
01319         p->notout += 2;
01320 
01321     if (p->verbose)
01322         mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu %lu: %x %c %c %lu%s%s%c%s\n",
01323                p->inframes, p->outframes,
01324                breaks, breaks<8 && breaks>0 ? (int) p->prev_fields+'0' : ' ',
01325                ITOC(show_fields),
01326                p->num_breaks, 5*p->in_inc == p->out_dec && breaks<8 &&
01327                breaks>0 && ((prev&~1)!=2 || prev+p->prev_fields!=5) ?
01328                " ######## bad telecine ########" : "",
01329                dropped_fields ? " ======== dropped ":"", ITOC(dropped_fields),
01330                !show_fields || (show_fields & (show_fields-1)) ?
01331                "" : " @@@@@@@@@@@@@@@@@");
01332 
01333     p->merge_time += get_time() - diff_time;
01334     return show_fields ? vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE) : 0;
01335 }
01336 
01337 static int query_format(struct vf_instance *vf, unsigned int fmt)
01338 {
01339     /* FIXME - support more formats */
01340     switch (fmt) {
01341       case IMGFMT_YV12:
01342       case IMGFMT_IYUV:
01343       case IMGFMT_I420:
01344       case IMGFMT_411P:
01345       case IMGFMT_422P:
01346       case IMGFMT_444P:
01347         return vf_next_query_format(vf, fmt);
01348     }
01349     return 0;
01350 }
01351 
01352 static int config(struct vf_instance *vf,
01353                   int width, int height, int d_width, int d_height,
01354                   unsigned int flags, unsigned int outfmt)
01355 {
01356     unsigned long cxm = 0;
01357     unsigned long cym = 0;
01358     struct vf_priv_s *p = vf->priv;
01359     // rounding:
01360     if(!IMGFMT_IS_RGB(outfmt) && !IMGFMT_IS_BGR(outfmt)){
01361         switch(outfmt){
01362           case IMGFMT_444P:
01363           case IMGFMT_Y800:
01364           case IMGFMT_Y8:
01365             break;
01366           case IMGFMT_YVU9:
01367           case IMGFMT_IF09:
01368             cym = 3;
01369           case IMGFMT_411P:
01370             cxm = 3;
01371             break;
01372           case IMGFMT_YV12:
01373           case IMGFMT_I420:
01374           case IMGFMT_IYUV:
01375             cym = 1;
01376           default:
01377             cxm = 1;
01378         }
01379     }
01380     p->chroma_swapped = !!(p->crop_y & (cym+1));
01381     if (p->w) p->w += p->crop_x & cxm;
01382     if (p->h) p->h += p->crop_y & cym;
01383     p->crop_x &= ~cxm;
01384     p->crop_y &= ~cym;
01385     if (!p->w || p->w > width ) p->w = width;
01386     if (!p->h || p->h > height) p->h = height;
01387     if (p->crop_x + p->w > width ) p->crop_x = 0;
01388     if (p->crop_y + p->h > height) p->crop_y = 0;
01389 
01390     if(!opt_screen_size_x && !opt_screen_size_y){
01391         d_width = d_width * p->w/width;
01392         d_height = d_height * p->h/height;
01393     }
01394     return vf_next_config(vf, p->w, p->h, d_width, d_height, flags, outfmt);
01395 }
01396 
01397 static void uninit(struct vf_instance *vf)
01398 {
01399     struct vf_priv_s *p = vf->priv;
01400     mp_msg(MSGT_VFILTER, MSGL_INFO, "diff_time: %.3f, merge_time: %.3f, "
01401            "export: %lu, merge: %lu, copy: %lu\n", p->diff_time, p->merge_time,
01402            p->export_count, p->merge_count, p->num_copies);
01403     free(p->memory_allocated);
01404     free(p);
01405 }
01406 
01407 static int vf_open(vf_instance_t *vf, char *args)
01408 {
01409     struct vf_priv_s *p;
01410     vf->get_image = get_image;
01411     vf->put_image = put_image;
01412     vf->config = config;
01413     vf->query_format = query_format;
01414     vf->uninit = uninit;
01415     vf->default_reqs = VFCAP_ACCEPT_STRIDE;
01416     vf->priv = p = calloc(1, sizeof(struct vf_priv_s));
01417     p->out_dec = 5;
01418     p->in_inc = 4;
01419     p->thres.noise = 128;
01420     p->thres.even  = 128;
01421     p->sad_thres = 64;
01422     p->dint_thres = 4;
01423     p->luma_only = 0;
01424     p->fast = 3;
01425     p->mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0;
01426     if (args) {
01427         const char *args_remain = parse_args(p, args);
01428         if (args_remain) {
01429             mp_msg(MSGT_VFILTER, MSGL_FATAL,
01430                    "filmdint: unknown suboption: %s\n", args_remain);
01431             return 0;
01432         }
01433         if (p->out_dec < p->in_inc) {
01434             mp_msg(MSGT_VFILTER, MSGL_FATAL,
01435                    "filmdint: increasing the frame rate is not supported\n");
01436             return 0;
01437         }
01438     }
01439     if (p->mmx2 > 2)
01440         p->mmx2 = 0;
01441 #if !HAVE_MMX
01442     p->mmx2 = 0;
01443 #endif
01444 #if !HAVE_AMD3DNOW
01445     p->mmx2 &= 1;
01446 #endif
01447     p->thres.odd  = p->thres.even;
01448     p->thres.temp = p->thres.noise;
01449     p->diff_time = 0;
01450     p->merge_time = 0;
01451     return 1;
01452 }
01453 
01454 const vf_info_t vf_info_filmdint = {
01455     "Advanced inverse telecine filer",
01456     "filmdint",
01457     "Zoltan Hidvegi",
01458     "",
01459     vf_open,
01460     NULL
01461 };

Generated on Wed Apr 11 2012 07:31:36 for FFmpeg by  doxygen 1.7.1