libpostproc/postprocess.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029                         C       MMX     MMX2    3DNow   AltiVec
00030 isVertDC                Ec      Ec                      Ec
00031 isVertMinMaxOk          Ec      Ec                      Ec
00032 doVertLowPass           E               e       e       Ec
00033 doVertDefFilter         Ec      Ec      e       e       Ec
00034 isHorizDC               Ec      Ec                      Ec
00035 isHorizMinMaxOk         a       E                       Ec
00036 doHorizLowPass          E               e       e       Ec
00037 doHorizDefFilter        Ec      Ec      e       e       Ec
00038 do_a_deblock            Ec      E       Ec      E
00039 deRing                  E               e       e*      Ecp
00040 Vertical RKAlgo1        E               a       a
00041 Horizontal RKAlgo1                      a       a
00042 Vertical X1#            a               E       E
00043 Horizontal X1#          a               E       E
00044 LinIpolDeinterlace      e               E       E*
00045 CubicIpolDeinterlace    a               e       e*
00046 LinBlendDeinterlace     e               E       E*
00047 MedianDeinterlace#      E       Ec      Ec
00048 TempDeNoiser#           E               e       e       Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066         (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use git log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include "libavutil/avassert.h"
00079 #include <inttypes.h>
00080 #include <stdio.h>
00081 #include <stdlib.h>
00082 #include <string.h>
00083 //#undef HAVE_MMX2
00084 //#define HAVE_AMD3DNOW
00085 //#undef HAVE_MMX
00086 //#undef ARCH_X86
00087 //#define DEBUG_BRIGHTNESS
00088 #include "postprocess.h"
00089 #include "postprocess_internal.h"
00090 #include "libavutil/avstring.h"
00091 
00092 unsigned postproc_version(void)
00093 {
00094     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
00095     return LIBPOSTPROC_VERSION_INT;
00096 }
00097 
00098 const char *postproc_configuration(void)
00099 {
00100     return FFMPEG_CONFIGURATION;
00101 }
00102 
00103 const char *postproc_license(void)
00104 {
00105 #define LICENSE_PREFIX "libpostproc license: "
00106     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00107 }
00108 
00109 #if HAVE_ALTIVEC_H
00110 #include <altivec.h>
00111 #endif
00112 
00113 #define GET_MODE_BUFFER_SIZE 500
00114 #define OPTIONS_ARRAY_SIZE 10
00115 #define BLOCK_SIZE 8
00116 #define TEMP_STRIDE 8
00117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00118 
00119 #if ARCH_X86
00120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00128 #endif
00129 
00130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00131 
00132 
00133 static struct PPFilter filters[]=
00134 {
00135     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
00136     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
00137 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
00138     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
00139     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
00140     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
00141     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
00142     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
00143     {"dr", "dering",                1, 5, 6, DERING},
00144     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
00145     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00146     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00147     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00148     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
00149     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
00150     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
00151     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
00152     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
00153     {NULL, NULL,0,0,0,0} //End Marker
00154 };
00155 
00156 static const char *replaceTable[]=
00157 {
00158     "default",      "hb:a,vb:a,dr:a",
00159     "de",           "hb:a,vb:a,dr:a",
00160     "fast",         "h1:a,v1:a,dr:a",
00161     "fa",           "h1:a,v1:a,dr:a",
00162     "ac",           "ha:a:128:7,va:a,dr:a",
00163     NULL //End Marker
00164 };
00165 
00166 
00167 #if ARCH_X86
00168 static inline void prefetchnta(void *p)
00169 {
00170     __asm__ volatile(   "prefetchnta (%0)\n\t"
00171         : : "r" (p)
00172     );
00173 }
00174 
00175 static inline void prefetcht0(void *p)
00176 {
00177     __asm__ volatile(   "prefetcht0 (%0)\n\t"
00178         : : "r" (p)
00179     );
00180 }
00181 
00182 static inline void prefetcht1(void *p)
00183 {
00184     __asm__ volatile(   "prefetcht1 (%0)\n\t"
00185         : : "r" (p)
00186     );
00187 }
00188 
00189 static inline void prefetcht2(void *p)
00190 {
00191     __asm__ volatile(   "prefetcht2 (%0)\n\t"
00192         : : "r" (p)
00193     );
00194 }
00195 #endif
00196 
00197 /* The horizontal functions exist only in C because the MMX
00198  * code is faster with vertical filters and transposing. */
00199 
00203 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00204 {
00205     int numEq= 0;
00206     int y;
00207     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00208     const int dcThreshold= dcOffset*2 + 1;
00209 
00210     for(y=0; y<BLOCK_SIZE; y++){
00211         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00212         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00213         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00214         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00215         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00216         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00217         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00218         src+= stride;
00219     }
00220     return numEq > c->ppMode.flatnessThreshold;
00221 }
00222 
00226 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00227 {
00228     int numEq= 0;
00229     int y;
00230     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00231     const int dcThreshold= dcOffset*2 + 1;
00232 
00233     src+= stride*4; // src points to begin of the 8x8 Block
00234     for(y=0; y<BLOCK_SIZE-1; y++){
00235         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00236         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00237         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00238         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00239         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00240         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00241         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00242         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00243         src+= stride;
00244     }
00245     return numEq > c->ppMode.flatnessThreshold;
00246 }
00247 
00248 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00249 {
00250     int i;
00251     for(i=0; i<2; i++){
00252         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00253         src += stride;
00254         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00255         src += stride;
00256         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00257         src += stride;
00258         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00259         src += stride;
00260     }
00261     return 1;
00262 }
00263 
00264 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00265 {
00266     int x;
00267     src+= stride*4;
00268     for(x=0; x<BLOCK_SIZE; x+=4){
00269         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
00270         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00271         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00272         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00273     }
00274     return 1;
00275 }
00276 
00277 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00278 {
00279     if( isHorizDC_C(src, stride, c) ){
00280         if( isHorizMinMaxOk_C(src, stride, c->QP) )
00281             return 1;
00282         else
00283             return 0;
00284     }else{
00285         return 2;
00286     }
00287 }
00288 
00289 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00290 {
00291     if( isVertDC_C(src, stride, c) ){
00292         if( isVertMinMaxOk_C(src, stride, c->QP) )
00293             return 1;
00294         else
00295             return 0;
00296     }else{
00297         return 2;
00298     }
00299 }
00300 
00301 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00302 {
00303     int y;
00304     for(y=0; y<BLOCK_SIZE; y++){
00305         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00306 
00307         if(FFABS(middleEnergy) < 8*c->QP){
00308             const int q=(dst[3] - dst[4])/2;
00309             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00310             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00311 
00312             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00313             d= FFMAX(d, 0);
00314 
00315             d= (5*d + 32) >> 6;
00316             d*= FFSIGN(-middleEnergy);
00317 
00318             if(q>0)
00319             {
00320                 d= d<0 ? 0 : d;
00321                 d= d>q ? q : d;
00322             }
00323             else
00324             {
00325                 d= d>0 ? 0 : d;
00326                 d= d<q ? q : d;
00327             }
00328 
00329             dst[3]-= d;
00330             dst[4]+= d;
00331         }
00332         dst+= stride;
00333     }
00334 }
00335 
00340 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00341 {
00342     int y;
00343     for(y=0; y<BLOCK_SIZE; y++){
00344         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00345         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00346 
00347         int sums[10];
00348         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00349         sums[1] = sums[0] - first  + dst[3];
00350         sums[2] = sums[1] - first  + dst[4];
00351         sums[3] = sums[2] - first  + dst[5];
00352         sums[4] = sums[3] - first  + dst[6];
00353         sums[5] = sums[4] - dst[0] + dst[7];
00354         sums[6] = sums[5] - dst[1] + last;
00355         sums[7] = sums[6] - dst[2] + last;
00356         sums[8] = sums[7] - dst[3] + last;
00357         sums[9] = sums[8] - dst[4] + last;
00358 
00359         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00360         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00361         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00362         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00363         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00364         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00365         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00366         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00367 
00368         dst+= stride;
00369     }
00370 }
00371 
00380 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00381 {
00382     int y;
00383     static uint64_t *lut= NULL;
00384     if(lut==NULL)
00385     {
00386         int i;
00387         lut = av_malloc(256*8);
00388         for(i=0; i<256; i++)
00389         {
00390             int v= i < 128 ? 2*i : 2*(i-256);
00391 /*
00392 //Simulate 112242211 9-Tap filter
00393             uint64_t a= (v/16)  & 0xFF;
00394             uint64_t b= (v/8)   & 0xFF;
00395             uint64_t c= (v/4)   & 0xFF;
00396             uint64_t d= (3*v/8) & 0xFF;
00397 */
00398 //Simulate piecewise linear interpolation
00399             uint64_t a= (v/16)   & 0xFF;
00400             uint64_t b= (v*3/16) & 0xFF;
00401             uint64_t c= (v*5/16) & 0xFF;
00402             uint64_t d= (7*v/16) & 0xFF;
00403             uint64_t A= (0x100 - a)&0xFF;
00404             uint64_t B= (0x100 - b)&0xFF;
00405             uint64_t C= (0x100 - c)&0xFF;
00406             uint64_t D= (0x100 - c)&0xFF;
00407 
00408             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00409                        (D<<24) | (C<<16) | (B<<8)  | (A);
00410             //lut[i] = (v<<32) | (v<<24);
00411         }
00412     }
00413 
00414     for(y=0; y<BLOCK_SIZE; y++){
00415         int a= src[1] - src[2];
00416         int b= src[3] - src[4];
00417         int c= src[5] - src[6];
00418 
00419         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00420 
00421         if(d < QP){
00422             int v = d * FFSIGN(-b);
00423 
00424             src[1] +=v/8;
00425             src[2] +=v/4;
00426             src[3] +=3*v/8;
00427             src[4] -=3*v/8;
00428             src[5] -=v/4;
00429             src[6] -=v/8;
00430         }
00431         src+=stride;
00432     }
00433 }
00434 
00438 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00439     int y;
00440     const int QP= c->QP;
00441     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00442     const int dcThreshold= dcOffset*2 + 1;
00443 //START_TIMER
00444     src+= step*4; // src points to begin of the 8x8 Block
00445     for(y=0; y<8; y++){
00446         int numEq= 0;
00447 
00448         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00449         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00450         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00451         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00452         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00453         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00454         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00455         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00456         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00457         if(numEq > c->ppMode.flatnessThreshold){
00458             int min, max, x;
00459 
00460             if(src[0] > src[step]){
00461                 max= src[0];
00462                 min= src[step];
00463             }else{
00464                 max= src[step];
00465                 min= src[0];
00466             }
00467             for(x=2; x<8; x+=2){
00468                 if(src[x*step] > src[(x+1)*step]){
00469                         if(src[x    *step] > max) max= src[ x   *step];
00470                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
00471                 }else{
00472                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
00473                         if(src[ x   *step] < min) min= src[ x   *step];
00474                 }
00475             }
00476             if(max-min < 2*QP){
00477                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00478                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00479 
00480                 int sums[10];
00481                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00482                 sums[1] = sums[0] - first       + src[3*step];
00483                 sums[2] = sums[1] - first       + src[4*step];
00484                 sums[3] = sums[2] - first       + src[5*step];
00485                 sums[4] = sums[3] - first       + src[6*step];
00486                 sums[5] = sums[4] - src[0*step] + src[7*step];
00487                 sums[6] = sums[5] - src[1*step] + last;
00488                 sums[7] = sums[6] - src[2*step] + last;
00489                 sums[8] = sums[7] - src[3*step] + last;
00490                 sums[9] = sums[8] - src[4*step] + last;
00491 
00492                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00493                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00494                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00495                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00496                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00497                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00498                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00499                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00500             }
00501         }else{
00502             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00503 
00504             if(FFABS(middleEnergy) < 8*QP){
00505                 const int q=(src[3*step] - src[4*step])/2;
00506                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00507                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00508 
00509                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00510                 d= FFMAX(d, 0);
00511 
00512                 d= (5*d + 32) >> 6;
00513                 d*= FFSIGN(-middleEnergy);
00514 
00515                 if(q>0){
00516                     d= d<0 ? 0 : d;
00517                     d= d>q ? q : d;
00518                 }else{
00519                     d= d>0 ? 0 : d;
00520                     d= d<q ? q : d;
00521                 }
00522 
00523                 src[3*step]-= d;
00524                 src[4*step]+= d;
00525             }
00526         }
00527 
00528         src += stride;
00529     }
00530 /*if(step==16){
00531     STOP_TIMER("step16")
00532 }else{
00533     STOP_TIMER("stepX")
00534 }*/
00535 }
00536 
00537 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00538 //Plain C versions
00539 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00540 #define COMPILE_C
00541 #endif
00542 
00543 #if HAVE_ALTIVEC
00544 #define COMPILE_ALTIVEC
00545 #endif //HAVE_ALTIVEC
00546 
00547 #if ARCH_X86
00548 
00549 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00550 #define COMPILE_MMX
00551 #endif
00552 
00553 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00554 #define COMPILE_MMX2
00555 #endif
00556 
00557 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00558 #define COMPILE_3DNOW
00559 #endif
00560 #endif /* ARCH_X86 */
00561 
00562 #undef HAVE_MMX
00563 #define HAVE_MMX 0
00564 #undef HAVE_MMX2
00565 #define HAVE_MMX2 0
00566 #undef HAVE_AMD3DNOW
00567 #define HAVE_AMD3DNOW 0
00568 #undef HAVE_ALTIVEC
00569 #define HAVE_ALTIVEC 0
00570 
00571 #ifdef COMPILE_C
00572 #define RENAME(a) a ## _C
00573 #include "postprocess_template.c"
00574 #endif
00575 
00576 #ifdef COMPILE_ALTIVEC
00577 #undef RENAME
00578 #undef HAVE_ALTIVEC
00579 #define HAVE_ALTIVEC 1
00580 #define RENAME(a) a ## _altivec
00581 #include "postprocess_altivec_template.c"
00582 #include "postprocess_template.c"
00583 #endif
00584 
00585 //MMX versions
00586 #ifdef COMPILE_MMX
00587 #undef RENAME
00588 #undef HAVE_MMX
00589 #define HAVE_MMX 1
00590 #define RENAME(a) a ## _MMX
00591 #include "postprocess_template.c"
00592 #endif
00593 
00594 //MMX2 versions
00595 #ifdef COMPILE_MMX2
00596 #undef RENAME
00597 #undef HAVE_MMX
00598 #undef HAVE_MMX2
00599 #define HAVE_MMX 1
00600 #define HAVE_MMX2 1
00601 #define RENAME(a) a ## _MMX2
00602 #include "postprocess_template.c"
00603 #endif
00604 
00605 //3DNOW versions
00606 #ifdef COMPILE_3DNOW
00607 #undef RENAME
00608 #undef HAVE_MMX
00609 #undef HAVE_MMX2
00610 #undef HAVE_AMD3DNOW
00611 #define HAVE_MMX 1
00612 #define HAVE_MMX2 0
00613 #define HAVE_AMD3DNOW 1
00614 #define RENAME(a) a ## _3DNow
00615 #include "postprocess_template.c"
00616 #endif
00617 
00618 // minor note: the HAVE_xyz is messed up after that line so do not use it.
00619 
00620 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00621         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00622 {
00623     PPContext *c= (PPContext *)vc;
00624     PPMode *ppMode= (PPMode *)vm;
00625     c->ppMode= *ppMode; //FIXME
00626 
00627     // Using ifs here as they are faster than function pointers although the
00628     // difference would not be measurable here but it is much better because
00629     // someone might exchange the CPU whithout restarting MPlayer ;)
00630 #if CONFIG_RUNTIME_CPUDETECT
00631 #if ARCH_X86
00632     // ordered per speed fastest first
00633     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00634         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00635     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00636         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00637     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00638         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00639     else
00640         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00641 #else
00642 #if HAVE_ALTIVEC
00643     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00644             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00645     else
00646 #endif
00647             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00648 #endif
00649 #else /* CONFIG_RUNTIME_CPUDETECT */
00650 #if   HAVE_MMX2
00651             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00652 #elif HAVE_AMD3DNOW
00653             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00654 #elif HAVE_MMX
00655             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00656 #elif HAVE_ALTIVEC
00657             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00658 #else
00659             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00660 #endif
00661 #endif /* !CONFIG_RUNTIME_CPUDETECT */
00662 }
00663 
00664 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00665 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
00666 
00667 /* -pp Command line Help
00668 */
00669 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00670 const char *const pp_help=
00671 #else
00672 const char pp_help[] =
00673 #endif
00674 "Available postprocessing filters:\n"
00675 "Filters                        Options\n"
00676 "short  long name       short   long option     Description\n"
00677 "*      *               a       autoq           CPU power dependent enabler\n"
00678 "                       c       chrom           chrominance filtering enabled\n"
00679 "                       y       nochrom         chrominance filtering disabled\n"
00680 "                       n       noluma          luma filtering disabled\n"
00681 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
00682 "       1. difference factor: default=32, higher -> more deblocking\n"
00683 "       2. flatness threshold: default=39, lower -> more deblocking\n"
00684 "                       the h & v deblocking filters share these\n"
00685 "                       so you can't set different thresholds for h / v\n"
00686 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
00687 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
00688 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
00689 "h1     x1hdeblock                              experimental h deblock filter 1\n"
00690 "v1     x1vdeblock                              experimental v deblock filter 1\n"
00691 "dr     dering                                  deringing filter\n"
00692 "al     autolevels                              automatic brightness / contrast\n"
00693 "                       f        fullyrange     stretch luminance to (0..255)\n"
00694 "lb     linblenddeint                           linear blend deinterlacer\n"
00695 "li     linipoldeint                            linear interpolating deinterlace\n"
00696 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
00697 "md     mediandeint                             median deinterlacer\n"
00698 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
00699 "l5     lowpass5                                FIR lowpass deinterlacer\n"
00700 "de     default                                 hb:a,vb:a,dr:a\n"
00701 "fa     fast                                    h1:a,v1:a,dr:a\n"
00702 "ac                                             ha:a:128:7,va:a,dr:a\n"
00703 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
00704 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
00705 "fq     forceQuant      <quantizer>             force quantizer\n"
00706 "Usage:\n"
00707 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00708 "long form example:\n"
00709 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
00710 "short form example:\n"
00711 "vb:a/hb:a/lb                                   de,-vb\n"
00712 "more examples:\n"
00713 "tn:64:128:256\n"
00714 "\n"
00715 ;
00716 
00717 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00718 {
00719     char temp[GET_MODE_BUFFER_SIZE];
00720     char *p= temp;
00721     static const char filterDelimiters[] = ",/";
00722     static const char optionDelimiters[] = ":";
00723     struct PPMode *ppMode;
00724     char *filterToken;
00725 
00726     ppMode= av_malloc(sizeof(PPMode));
00727 
00728     ppMode->lumMode= 0;
00729     ppMode->chromMode= 0;
00730     ppMode->maxTmpNoise[0]= 700;
00731     ppMode->maxTmpNoise[1]= 1500;
00732     ppMode->maxTmpNoise[2]= 3000;
00733     ppMode->maxAllowedY= 234;
00734     ppMode->minAllowedY= 16;
00735     ppMode->baseDcDiff= 256/8;
00736     ppMode->flatnessThreshold= 56-16-1;
00737     ppMode->maxClippedThreshold= 0.01;
00738     ppMode->error=0;
00739 
00740     memset(temp, 0, GET_MODE_BUFFER_SIZE);
00741     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00742 
00743     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00744 
00745     for(;;){
00746         char *filterName;
00747         int q= 1000000; //PP_QUALITY_MAX;
00748         int chrom=-1;
00749         int luma=-1;
00750         char *option;
00751         char *options[OPTIONS_ARRAY_SIZE];
00752         int i;
00753         int filterNameOk=0;
00754         int numOfUnknownOptions=0;
00755         int enable=1; //does the user want us to enabled or disabled the filter
00756 
00757         filterToken= strtok(p, filterDelimiters);
00758         if(filterToken == NULL) break;
00759         p+= strlen(filterToken) + 1; // p points to next filterToken
00760         filterName= strtok(filterToken, optionDelimiters);
00761         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00762 
00763         if(*filterName == '-'){
00764             enable=0;
00765             filterName++;
00766         }
00767 
00768         for(;;){ //for all options
00769             option= strtok(NULL, optionDelimiters);
00770             if(option == NULL) break;
00771 
00772             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00773             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00774             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00775             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00776             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00777             else{
00778                 options[numOfUnknownOptions] = option;
00779                 numOfUnknownOptions++;
00780             }
00781             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00782         }
00783         options[numOfUnknownOptions] = NULL;
00784 
00785         /* replace stuff from the replace Table */
00786         for(i=0; replaceTable[2*i]!=NULL; i++){
00787             if(!strcmp(replaceTable[2*i], filterName)){
00788                 int newlen= strlen(replaceTable[2*i + 1]);
00789                 int plen;
00790                 int spaceLeft;
00791 
00792                 p--, *p=',';
00793 
00794                 plen= strlen(p);
00795                 spaceLeft= p - temp + plen;
00796                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
00797                     ppMode->error++;
00798                     break;
00799                 }
00800                 memmove(p + newlen, p, plen+1);
00801                 memcpy(p, replaceTable[2*i + 1], newlen);
00802                 filterNameOk=1;
00803             }
00804         }
00805 
00806         for(i=0; filters[i].shortName!=NULL; i++){
00807             if(   !strcmp(filters[i].longName, filterName)
00808                || !strcmp(filters[i].shortName, filterName)){
00809                 ppMode->lumMode &= ~filters[i].mask;
00810                 ppMode->chromMode &= ~filters[i].mask;
00811 
00812                 filterNameOk=1;
00813                 if(!enable) break; // user wants to disable it
00814 
00815                 if(q >= filters[i].minLumQuality && luma)
00816                     ppMode->lumMode|= filters[i].mask;
00817                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00818                     if(q >= filters[i].minChromQuality)
00819                             ppMode->chromMode|= filters[i].mask;
00820 
00821                 if(filters[i].mask == LEVEL_FIX){
00822                     int o;
00823                     ppMode->minAllowedY= 16;
00824                     ppMode->maxAllowedY= 234;
00825                     for(o=0; options[o]!=NULL; o++){
00826                         if(  !strcmp(options[o],"fullyrange")
00827                            ||!strcmp(options[o],"f")){
00828                             ppMode->minAllowedY= 0;
00829                             ppMode->maxAllowedY= 255;
00830                             numOfUnknownOptions--;
00831                         }
00832                     }
00833                 }
00834                 else if(filters[i].mask == TEMP_NOISE_FILTER)
00835                 {
00836                     int o;
00837                     int numOfNoises=0;
00838 
00839                     for(o=0; options[o]!=NULL; o++){
00840                         char *tail;
00841                         ppMode->maxTmpNoise[numOfNoises]=
00842                             strtol(options[o], &tail, 0);
00843                         if(tail!=options[o]){
00844                             numOfNoises++;
00845                             numOfUnknownOptions--;
00846                             if(numOfNoises >= 3) break;
00847                         }
00848                     }
00849                 }
00850                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
00851                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00852                     int o;
00853 
00854                     for(o=0; options[o]!=NULL && o<2; o++){
00855                         char *tail;
00856                         int val= strtol(options[o], &tail, 0);
00857                         if(tail==options[o]) break;
00858 
00859                         numOfUnknownOptions--;
00860                         if(o==0) ppMode->baseDcDiff= val;
00861                         else ppMode->flatnessThreshold= val;
00862                     }
00863                 }
00864                 else if(filters[i].mask == FORCE_QUANT){
00865                     int o;
00866                     ppMode->forcedQuant= 15;
00867 
00868                     for(o=0; options[o]!=NULL && o<1; o++){
00869                         char *tail;
00870                         int val= strtol(options[o], &tail, 0);
00871                         if(tail==options[o]) break;
00872 
00873                         numOfUnknownOptions--;
00874                         ppMode->forcedQuant= val;
00875                     }
00876                 }
00877             }
00878         }
00879         if(!filterNameOk) ppMode->error++;
00880         ppMode->error += numOfUnknownOptions;
00881     }
00882 
00883     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00884     if(ppMode->error){
00885         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00886         av_free(ppMode);
00887         return NULL;
00888     }
00889     return ppMode;
00890 }
00891 
00892 void pp_free_mode(pp_mode *mode){
00893     av_free(mode);
00894 }
00895 
00896 static void reallocAlign(void **p, int alignment, int size){
00897     av_free(*p);
00898     *p= av_mallocz(size);
00899 }
00900 
00901 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00902     int mbWidth = (width+15)>>4;
00903     int mbHeight= (height+15)>>4;
00904     int i;
00905 
00906     c->stride= stride;
00907     c->qpStride= qpStride;
00908 
00909     reallocAlign((void **)&c->tempDst, 8, stride*24);
00910     reallocAlign((void **)&c->tempSrc, 8, stride*24);
00911     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00912     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00913     for(i=0; i<256; i++)
00914             c->yHistogram[i]= width*height/64*15/256;
00915 
00916     for(i=0; i<3; i++){
00917         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
00918         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00919         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00920     }
00921 
00922     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00923     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00924     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00925     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00926 }
00927 
00928 static const char * context_to_name(void * ptr) {
00929     return "postproc";
00930 }
00931 
00932 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00933 
00934 pp_context *pp_get_context(int width, int height, int cpuCaps){
00935     PPContext *c= av_malloc(sizeof(PPContext));
00936     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
00937     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00938 
00939     memset(c, 0, sizeof(PPContext));
00940     c->av_class = &av_codec_context_class;
00941     c->cpuCaps= cpuCaps;
00942     if(cpuCaps&PP_FORMAT){
00943         c->hChromaSubSample= cpuCaps&0x3;
00944         c->vChromaSubSample= (cpuCaps>>4)&0x3;
00945     }else{
00946         c->hChromaSubSample= 1;
00947         c->vChromaSubSample= 1;
00948     }
00949 
00950     reallocBuffers(c, width, height, stride, qpStride);
00951 
00952     c->frameNum=-1;
00953 
00954     return c;
00955 }
00956 
00957 void pp_free_context(void *vc){
00958     PPContext *c = (PPContext*)vc;
00959     int i;
00960 
00961     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00962     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00963 
00964     av_free(c->tempBlocks);
00965     av_free(c->yHistogram);
00966     av_free(c->tempDst);
00967     av_free(c->tempSrc);
00968     av_free(c->deintTemp);
00969     av_free(c->stdQPTable);
00970     av_free(c->nonBQPTable);
00971     av_free(c->forcedQPTable);
00972 
00973     memset(c, 0, sizeof(PPContext));
00974 
00975     av_free(c);
00976 }
00977 
00978 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00979                      uint8_t * dst[3], const int dstStride[3],
00980                      int width, int height,
00981                      const QP_STORE_T *QP_store,  int QPStride,
00982                      pp_mode *vm,  void *vc, int pict_type)
00983 {
00984     int mbWidth = (width+15)>>4;
00985     int mbHeight= (height+15)>>4;
00986     PPMode *mode = (PPMode*)vm;
00987     PPContext *c = (PPContext*)vc;
00988     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
00989     int absQPStride = FFABS(QPStride);
00990 
00991     // c->stride and c->QPStride are always positive
00992     if(c->stride < minStride || c->qpStride < absQPStride)
00993         reallocBuffers(c, width, height,
00994                        FFMAX(minStride, c->stride),
00995                        FFMAX(c->qpStride, absQPStride));
00996 
00997     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
00998         int i;
00999         QP_store= c->forcedQPTable;
01000         absQPStride = QPStride = 0;
01001         if(mode->lumMode & FORCE_QUANT)
01002             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01003         else
01004             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01005     }
01006 
01007     if(pict_type & PP_PICT_TYPE_QP2){
01008         int i;
01009         const int count= mbHeight * absQPStride;
01010         for(i=0; i<(count>>2); i++){
01011             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01012         }
01013         for(i<<=2; i<count; i++){
01014             c->stdQPTable[i] = QP_store[i]>>1;
01015         }
01016         QP_store= c->stdQPTable;
01017         QPStride= absQPStride;
01018     }
01019 
01020     if(0){
01021         int x,y;
01022         for(y=0; y<mbHeight; y++){
01023             for(x=0; x<mbWidth; x++){
01024                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01025             }
01026             av_log(c, AV_LOG_INFO, "\n");
01027         }
01028         av_log(c, AV_LOG_INFO, "\n");
01029     }
01030 
01031     if((pict_type&7)!=3){
01032         if (QPStride >= 0){
01033             int i;
01034             const int count= mbHeight * QPStride;
01035             for(i=0; i<(count>>2); i++){
01036                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01037             }
01038             for(i<<=2; i<count; i++){
01039                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01040             }
01041         } else {
01042             int i,j;
01043             for(i=0; i<mbHeight; i++) {
01044                 for(j=0; j<absQPStride; j++) {
01045                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01046                 }
01047             }
01048         }
01049     }
01050 
01051     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01052            mode->lumMode, mode->chromMode);
01053 
01054     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01055                 width, height, QP_store, QPStride, 0, mode, c);
01056 
01057     width  = (width )>>c->hChromaSubSample;
01058     height = (height)>>c->vChromaSubSample;
01059 
01060     if(mode->chromMode){
01061         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01062                     width, height, QP_store, QPStride, 1, mode, c);
01063         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01064                     width, height, QP_store, QPStride, 2, mode, c);
01065     }
01066     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01067         linecpy(dst[1], src[1], height, srcStride[1]);
01068         linecpy(dst[2], src[2], height, srcStride[2]);
01069     }else{
01070         int y;
01071         for(y=0; y<height; y++){
01072             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01073             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01074         }
01075     }
01076 }