libavfilter/x86/yadif_template.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License along
00017  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
00018  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00019  */
00020 
00021 #ifdef COMPILE_TEMPLATE_SSE
00022 #define MM "%%xmm"
00023 #define MOV  "movq"
00024 #define MOVQ "movdqa"
00025 #define MOVQU "movdqu"
00026 #define STEP 8
00027 #define LOAD(mem,dst) \
00028             MOV"       "mem", "dst" \n\t"\
00029             "punpcklbw "MM"7, "dst" \n\t"
00030 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
00031 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
00032 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
00033                        "psrldq $2, "src"     \n\t"
00034 #else
00035 #define MM "%%mm"
00036 #define MOV  "movd"
00037 #define MOVQ "movq"
00038 #define MOVQU "movq"
00039 #define STEP 4
00040 #define LOAD(mem,dst) \
00041             MOV"       "mem", "dst" \n\t"\
00042             "punpcklbw "MM"7, "dst" \n\t"
00043 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
00044 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
00045 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
00046 #endif
00047 
00048 #ifdef COMPILE_TEMPLATE_SSSE3
00049 #define PABS(tmp,dst) \
00050             "pabsw     "dst", "dst" \n\t"
00051 #else
00052 #define PABS(tmp,dst) \
00053             "pxor     "tmp", "tmp" \n\t"\
00054             "psubw    "dst", "tmp" \n\t"\
00055             "pmaxsw   "tmp", "dst" \n\t"
00056 #endif
00057 
00058 #define CHECK(pj,mj) \
00059             MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
00060             MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
00061             MOVQ"      "MM"2, "MM"4 \n\t"\
00062             MOVQ"      "MM"2, "MM"5 \n\t"\
00063             "pxor      "MM"3, "MM"4 \n\t"\
00064             "pavgb     "MM"3, "MM"5 \n\t"\
00065             "pand     "MANGLE(pb_1)", "MM"4 \n\t"\
00066             "psubusb   "MM"4, "MM"5 \n\t"\
00067             PSRL1(MM"5")                 \
00068             "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
00069             MOVQ"      "MM"2, "MM"4 \n\t"\
00070             "psubusb   "MM"3, "MM"2 \n\t"\
00071             "psubusb   "MM"4, "MM"3 \n\t"\
00072             "pmaxub    "MM"3, "MM"2 \n\t"\
00073             MOVQ"      "MM"2, "MM"3 \n\t"\
00074             MOVQ"      "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
00075             PSRL1(MM"3")                  /* ABS(cur[x-refs  +j] - cur[x+refs  -j]) */\
00076             PSRL2(MM"4")                  /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
00077             "punpcklbw "MM"7, "MM"2 \n\t"\
00078             "punpcklbw "MM"7, "MM"3 \n\t"\
00079             "punpcklbw "MM"7, "MM"4 \n\t"\
00080             "paddw     "MM"3, "MM"2 \n\t"\
00081             "paddw     "MM"4, "MM"2 \n\t" /* score */
00082 
00083 #define CHECK1 \
00084             MOVQ"      "MM"0, "MM"3 \n\t"\
00085             "pcmpgtw   "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
00086             "pminsw    "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
00087             MOVQ"      "MM"3, "MM"6 \n\t"\
00088             "pand      "MM"3, "MM"5 \n\t"\
00089             "pandn     "MM"1, "MM"3 \n\t"\
00090             "por       "MM"5, "MM"3 \n\t"\
00091             MOVQ"      "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
00092 
00093 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
00094                   hurts both quality and speed, but matches the C version. */\
00095             "paddw    "MANGLE(pw_1)", "MM"6 \n\t"\
00096             "psllw     $14,   "MM"6 \n\t"\
00097             "paddsw    "MM"6, "MM"2 \n\t"\
00098             MOVQ"      "MM"0, "MM"3 \n\t"\
00099             "pcmpgtw   "MM"2, "MM"3 \n\t"\
00100             "pminsw    "MM"2, "MM"0 \n\t"\
00101             "pand      "MM"3, "MM"5 \n\t"\
00102             "pandn     "MM"1, "MM"3 \n\t"\
00103             "por       "MM"5, "MM"3 \n\t"\
00104             MOVQ"      "MM"3, "MM"1 \n\t"
00105 
00106 void RENAME(ff_yadif_filter_line)(uint8_t *dst,
00107                                   uint8_t *prev, uint8_t *cur, uint8_t *next,
00108                                   int w, int prefs, int mrefs, int parity, int mode)
00109 {
00110     uint8_t tmp[5*16];
00111     uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15);
00112     int x;
00113 
00114 #define FILTER\
00115     for(x=0; x<w; x+=STEP){\
00116         __asm__ volatile(\
00117             "pxor      "MM"7, "MM"7 \n\t"\
00118             LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
00119             LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
00120             LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
00121             LOAD("(%["next2"])", MM"3") /* next2[x] */\
00122             MOVQ"      "MM"3, "MM"4 \n\t"\
00123             "paddw     "MM"2, "MM"3 \n\t"\
00124             "psraw     $1,    "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
00125             MOVQ"      "MM"0, (%[tmpA]) \n\t" /* c */\
00126             MOVQ"      "MM"3, 16(%[tmpA]) \n\t" /* d */\
00127             MOVQ"      "MM"1, 32(%[tmpA]) \n\t" /* e */\
00128             "psubw     "MM"4, "MM"2 \n\t"\
00129             PABS(      MM"4", MM"2") /* temporal_diff0 */\
00130             LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
00131             LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
00132             "psubw     "MM"0, "MM"3 \n\t"\
00133             "psubw     "MM"1, "MM"4 \n\t"\
00134             PABS(      MM"5", MM"3")\
00135             PABS(      MM"5", MM"4")\
00136             "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
00137             "psrlw     $1,    "MM"2 \n\t"\
00138             "psrlw     $1,    "MM"3 \n\t"\
00139             "pmaxsw    "MM"3, "MM"2 \n\t"\
00140             LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
00141             LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
00142             "psubw     "MM"0, "MM"3 \n\t"\
00143             "psubw     "MM"1, "MM"4 \n\t"\
00144             PABS(      MM"5", MM"3")\
00145             PABS(      MM"5", MM"4")\
00146             "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
00147             "psrlw     $1,    "MM"3 \n\t"\
00148             "pmaxsw    "MM"3, "MM"2 \n\t"\
00149             MOVQ"      "MM"2, 48(%[tmpA]) \n\t" /* diff */\
00150 \
00151             "paddw     "MM"0, "MM"1 \n\t"\
00152             "paddw     "MM"0, "MM"0 \n\t"\
00153             "psubw     "MM"1, "MM"0 \n\t"\
00154             "psrlw     $1,    "MM"1 \n\t" /* spatial_pred */\
00155             PABS(      MM"2", MM"0")      /* ABS(c-e) */\
00156 \
00157             MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
00158             MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
00159             MOVQ"      "MM"2, "MM"4 \n\t"\
00160             "psubusb   "MM"3, "MM"2 \n\t"\
00161             "psubusb   "MM"4, "MM"3 \n\t"\
00162             "pmaxub    "MM"3, "MM"2 \n\t"\
00163             PSHUF(MM"3", MM"2") \
00164             "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
00165             "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
00166             "paddw     "MM"2, "MM"0 \n\t"\
00167             "paddw     "MM"3, "MM"0 \n\t"\
00168             "psubw    "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
00169 \
00170             CHECK(-2,0)\
00171             CHECK1\
00172             CHECK(-3,1)\
00173             CHECK2\
00174             CHECK(0,-2)\
00175             CHECK1\
00176             CHECK(1,-3)\
00177             CHECK2\
00178 \
00179             /* if(p->mode<2) ... */\
00180             MOVQ"    48(%[tmpA]), "MM"6 \n\t" /* diff */\
00181             "cmpl      $2, %[mode] \n\t"\
00182             "jge       1f \n\t"\
00183             LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
00184             LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
00185             LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
00186             LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
00187             "paddw     "MM"4, "MM"2 \n\t"\
00188             "paddw     "MM"5, "MM"3 \n\t"\
00189             "psrlw     $1,    "MM"2 \n\t" /* b */\
00190             "psrlw     $1,    "MM"3 \n\t" /* f */\
00191             MOVQ"    (%[tmpA]), "MM"4 \n\t" /* c */\
00192             MOVQ"    16(%[tmpA]), "MM"5 \n\t" /* d */\
00193             MOVQ"    32(%[tmpA]), "MM"7 \n\t" /* e */\
00194             "psubw     "MM"4, "MM"2 \n\t" /* b-c */\
00195             "psubw     "MM"7, "MM"3 \n\t" /* f-e */\
00196             MOVQ"      "MM"5, "MM"0 \n\t"\
00197             "psubw     "MM"4, "MM"5 \n\t" /* d-c */\
00198             "psubw     "MM"7, "MM"0 \n\t" /* d-e */\
00199             MOVQ"      "MM"2, "MM"4 \n\t"\
00200             "pminsw    "MM"3, "MM"2 \n\t"\
00201             "pmaxsw    "MM"4, "MM"3 \n\t"\
00202             "pmaxsw    "MM"5, "MM"2 \n\t"\
00203             "pminsw    "MM"5, "MM"3 \n\t"\
00204             "pmaxsw    "MM"0, "MM"2 \n\t" /* max */\
00205             "pminsw    "MM"0, "MM"3 \n\t" /* min */\
00206             "pxor      "MM"4, "MM"4 \n\t"\
00207             "pmaxsw    "MM"3, "MM"6 \n\t"\
00208             "psubw     "MM"2, "MM"4 \n\t" /* -max */\
00209             "pmaxsw    "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
00210             "1: \n\t"\
00211 \
00212             MOVQ"    16(%[tmpA]), "MM"2 \n\t" /* d */\
00213             MOVQ"      "MM"2, "MM"3 \n\t"\
00214             "psubw     "MM"6, "MM"2 \n\t" /* d-diff */\
00215             "paddw     "MM"6, "MM"3 \n\t" /* d+diff */\
00216             "pmaxsw    "MM"2, "MM"1 \n\t"\
00217             "pminsw    "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
00218             "packuswb  "MM"1, "MM"1 \n\t"\
00219 \
00220             :\
00221             :[tmpA] "r"(tmpA),\
00222              [prev] "r"(prev),\
00223              [cur]  "r"(cur),\
00224              [next] "r"(next),\
00225              [prefs]"r"((x86_reg)prefs),\
00226              [mrefs]"r"((x86_reg)mrefs),\
00227              [mode] "g"(mode)\
00228         );\
00229         __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
00230         dst += STEP;\
00231         prev+= STEP;\
00232         cur += STEP;\
00233         next+= STEP;\
00234     }
00235 
00236     if (parity) {
00237 #define prev2 "prev"
00238 #define next2 "cur"
00239         FILTER
00240 #undef prev2
00241 #undef next2
00242     } else {
00243 #define prev2 "cur"
00244 #define next2 "next"
00245         FILTER
00246 #undef prev2
00247 #undef next2
00248     }
00249 }
00250 #undef STEP
00251 #undef MM
00252 #undef MOV
00253 #undef MOVQ
00254 #undef MOVQU
00255 #undef PSHUF
00256 #undef PSRL1
00257 #undef PSRL2
00258 #undef LOAD
00259 #undef PABS
00260 #undef CHECK
00261 #undef CHECK1
00262 #undef CHECK2
00263 #undef FILTER
00264