libswscale/swscale.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include <inttypes.h>
00022 #include <string.h>
00023 #include <math.h>
00024 #include <stdio.h>
00025 #include "config.h"
00026 #include <assert.h>
00027 #include "swscale.h"
00028 #include "swscale_internal.h"
00029 #include "rgb2rgb.h"
00030 #include "libavutil/avassert.h"
00031 #include "libavutil/intreadwrite.h"
00032 #include "libavutil/cpu.h"
00033 #include "libavutil/avutil.h"
00034 #include "libavutil/mathematics.h"
00035 #include "libavutil/bswap.h"
00036 #include "libavutil/pixdesc.h"
00037 
00038 
00039 #define RGB2YUV_SHIFT 15
00040 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00041 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00042 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00043 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00044 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00045 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00046 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00047 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00048 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00049 
00050 /*
00051 NOTES
00052 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
00053 
00054 TODO
00055 more intelligent misalignment avoidance for the horizontal scaler
00056 write special vertical cubic upscale version
00057 optimize C code (YV12 / minmax)
00058 add support for packed pixel YUV input & output
00059 add support for Y8 output
00060 optimize BGR24 & BGR32
00061 add BGR4 output support
00062 write special BGR->BGR scaler
00063 */
00064 
00065 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
00066 {  1,   3,   1,   3,   1,   3,   1,   3, },
00067 {  2,   0,   2,   0,   2,   0,   2,   0, },
00068 };
00069 
00070 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
00071 {  6,   2,   6,   2,   6,   2,   6,   2, },
00072 {  0,   4,   0,   4,   0,   4,   0,   4, },
00073 };
00074 
00075 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
00076 {  8,   4,  11,   7,   8,   4,  11,   7, },
00077 {  2,  14,   1,  13,   2,  14,   1,  13, },
00078 { 10,   6,   9,   5,  10,   6,   9,   5, },
00079 {  0,  12,   3,  15,   0,  12,   3,  15, },
00080 };
00081 
00082 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
00083 { 17,   9,  23,  15,  16,   8,  22,  14, },
00084 {  5,  29,   3,  27,   4,  28,   2,  26, },
00085 { 21,  13,  19,  11,  20,  12,  18,  10, },
00086 {  0,  24,   6,  30,   1,  25,   7,  31, },
00087 { 16,   8,  22,  14,  17,   9,  23,  15, },
00088 {  4,  28,   2,  26,   5,  29,   3,  27, },
00089 { 20,  12,  18,  10,  21,  13,  19,  11, },
00090 {  1,  25,   7,  31,   0,  24,   6,  30, },
00091 };
00092 
00093 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
00094 {  0,  55,  14,  68,   3,  58,  17,  72, },
00095 { 37,  18,  50,  32,  40,  22,  54,  35, },
00096 {  9,  64,   5,  59,  13,  67,   8,  63, },
00097 { 46,  27,  41,  23,  49,  31,  44,  26, },
00098 {  2,  57,  16,  71,   1,  56,  15,  70, },
00099 { 39,  21,  52,  34,  38,  19,  51,  33, },
00100 { 11,  66,   7,  62,  10,  65,   6,  60, },
00101 { 48,  30,  43,  25,  47,  29,  42,  24, },
00102 };
00103 
00104 #if 1
00105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00106 {117,  62, 158, 103, 113,  58, 155, 100, },
00107 { 34, 199,  21, 186,  31, 196,  17, 182, },
00108 {144,  89, 131,  76, 141,  86, 127,  72, },
00109 {  0, 165,  41, 206,  10, 175,  52, 217, },
00110 {110,  55, 151,  96, 120,  65, 162, 107, },
00111 { 28, 193,  14, 179,  38, 203,  24, 189, },
00112 {138,  83, 124,  69, 148,  93, 134,  79, },
00113 {  7, 172,  48, 213,   3, 168,  45, 210, },
00114 };
00115 #elif 1
00116 // tries to correct a gamma of 1.5
00117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00118 {  0, 143,  18, 200,   2, 156,  25, 215, },
00119 { 78,  28, 125,  64,  89,  36, 138,  74, },
00120 { 10, 180,   3, 161,  16, 195,   8, 175, },
00121 {109,  51,  93,  38, 121,  60, 105,  47, },
00122 {  1, 152,  23, 210,   0, 147,  20, 205, },
00123 { 85,  33, 134,  71,  81,  30, 130,  67, },
00124 { 14, 190,   6, 171,  12, 185,   5, 166, },
00125 {117,  57, 101,  44, 113,  54,  97,  41, },
00126 };
00127 #elif 1
00128 // tries to correct a gamma of 2.0
00129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00130 {  0, 124,   8, 193,   0, 140,  12, 213, },
00131 { 55,  14, 104,  42,  66,  19, 119,  52, },
00132 {  3, 168,   1, 145,   6, 187,   3, 162, },
00133 { 86,  31,  70,  21,  99,  39,  82,  28, },
00134 {  0, 134,  11, 206,   0, 129,   9, 200, },
00135 { 62,  17, 114,  48,  58,  16, 109,  45, },
00136 {  5, 181,   2, 157,   4, 175,   1, 151, },
00137 { 95,  36,  78,  26,  90,  34,  74,  24, },
00138 };
00139 #else
00140 // tries to correct a gamma of 2.5
00141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00142 {  0, 107,   3, 187,   0, 125,   6, 212, },
00143 { 39,   7,  86,  28,  49,  11, 102,  36, },
00144 {  1, 158,   0, 131,   3, 180,   1, 151, },
00145 { 68,  19,  52,  12,  81,  25,  64,  17, },
00146 {  0, 119,   5, 203,   0, 113,   4, 195, },
00147 { 45,   9,  96,  33,  42,   8,  91,  30, },
00148 {  2, 172,   1, 144,   2, 165,   0, 137, },
00149 { 77,  23,  60,  15,  72,  21,  56,  14, },
00150 };
00151 #endif
00152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
00153 {  36, 68, 60, 92, 34, 66, 58, 90,},
00154 { 100,  4,124, 28, 98,  2,122, 26,},
00155 {  52, 84, 44, 76, 50, 82, 42, 74,},
00156 { 116, 20,108, 12,114, 18,106, 10,},
00157 {  32, 64, 56, 88, 38, 70, 62, 94,},
00158 {  96,  0,120, 24,102,  6,126, 30,},
00159 {  48, 80, 40, 72, 54, 86, 46, 78,},
00160 { 112, 16,104,  8,118, 22,110, 14,},
00161 };
00162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
00163 {  64, 64, 64, 64, 64, 64, 64, 64 };
00164 
00165 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
00166 {
00167   {   0,  1,  0,  1,  0,  1,  0,  1,},
00168   {   1,  0,  1,  0,  1,  0,  1,  0,},
00169   {   0,  1,  0,  1,  0,  1,  0,  1,},
00170   {   1,  0,  1,  0,  1,  0,  1,  0,},
00171   {   0,  1,  0,  1,  0,  1,  0,  1,},
00172   {   1,  0,  1,  0,  1,  0,  1,  0,},
00173   {   0,  1,  0,  1,  0,  1,  0,  1,},
00174   {   1,  0,  1,  0,  1,  0,  1,  0,},
00175 },{
00176   {   1,  2,  1,  2,  1,  2,  1,  2,},
00177   {   3,  0,  3,  0,  3,  0,  3,  0,},
00178   {   1,  2,  1,  2,  1,  2,  1,  2,},
00179   {   3,  0,  3,  0,  3,  0,  3,  0,},
00180   {   1,  2,  1,  2,  1,  2,  1,  2,},
00181   {   3,  0,  3,  0,  3,  0,  3,  0,},
00182   {   1,  2,  1,  2,  1,  2,  1,  2,},
00183   {   3,  0,  3,  0,  3,  0,  3,  0,},
00184 },{
00185   {   2,  4,  3,  5,  2,  4,  3,  5,},
00186   {   6,  0,  7,  1,  6,  0,  7,  1,},
00187   {   3,  5,  2,  4,  3,  5,  2,  4,},
00188   {   7,  1,  6,  0,  7,  1,  6,  0,},
00189   {   2,  4,  3,  5,  2,  4,  3,  5,},
00190   {   6,  0,  7,  1,  6,  0,  7,  1,},
00191   {   3,  5,  2,  4,  3,  5,  2,  4,},
00192   {   7,  1,  6,  0,  7,  1,  6,  0,},
00193 },{
00194   {   4,  8,  7, 11,  4,  8,  7, 11,},
00195   {  12,  0, 15,  3, 12,  0, 15,  3,},
00196   {   6, 10,  5,  9,  6, 10,  5,  9,},
00197   {  14,  2, 13,  1, 14,  2, 13,  1,},
00198   {   4,  8,  7, 11,  4,  8,  7, 11,},
00199   {  12,  0, 15,  3, 12,  0, 15,  3,},
00200   {   6, 10,  5,  9,  6, 10,  5,  9,},
00201   {  14,  2, 13,  1, 14,  2, 13,  1,},
00202 },{
00203   {   9, 17, 15, 23,  8, 16, 14, 22,},
00204   {  25,  1, 31,  7, 24,  0, 30,  6,},
00205   {  13, 21, 11, 19, 12, 20, 10, 18,},
00206   {  29,  5, 27,  3, 28,  4, 26,  2,},
00207   {   8, 16, 14, 22,  9, 17, 15, 23,},
00208   {  24,  0, 30,  6, 25,  1, 31,  7,},
00209   {  12, 20, 10, 18, 13, 21, 11, 19,},
00210   {  28,  4, 26,  2, 29,  5, 27,  3,},
00211 },{
00212   {  18, 34, 30, 46, 17, 33, 29, 45,},
00213   {  50,  2, 62, 14, 49,  1, 61, 13,},
00214   {  26, 42, 22, 38, 25, 41, 21, 37,},
00215   {  58, 10, 54,  6, 57,  9, 53,  5,},
00216   {  16, 32, 28, 44, 19, 35, 31, 47,},
00217   {  48,  0, 60, 12, 51,  3, 63, 15,},
00218   {  24, 40, 20, 36, 27, 43, 23, 39,},
00219   {  56,  8, 52,  4, 59, 11, 55,  7,},
00220 },{
00221   {  18, 34, 30, 46, 17, 33, 29, 45,},
00222   {  50,  2, 62, 14, 49,  1, 61, 13,},
00223   {  26, 42, 22, 38, 25, 41, 21, 37,},
00224   {  58, 10, 54,  6, 57,  9, 53,  5,},
00225   {  16, 32, 28, 44, 19, 35, 31, 47,},
00226   {  48,  0, 60, 12, 51,  3, 63, 15,},
00227   {  24, 40, 20, 36, 27, 43, 23, 39,},
00228   {  56,  8, 52,  4, 59, 11, 55,  7,},
00229 },{
00230   {  36, 68, 60, 92, 34, 66, 58, 90,},
00231   { 100,  4,124, 28, 98,  2,122, 26,},
00232   {  52, 84, 44, 76, 50, 82, 42, 74,},
00233   { 116, 20,108, 12,114, 18,106, 10,},
00234   {  32, 64, 56, 88, 38, 70, 62, 94,},
00235   {  96,  0,120, 24,102,  6,126, 30,},
00236   {  48, 80, 40, 72, 54, 86, 46, 78,},
00237   { 112, 16,104,  8,118, 22,110, 14,},
00238 }};
00239 
00240 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
00241 
00242 const uint16_t dither_scale[15][16]={
00243 {    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
00244 {    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
00245 {    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  113,  113,  113,  113,},
00246 {    3,    4,    4,    5,   31,   31,   61,  121,  241,  241,  241,  241,  481,  481,  481,  481,},
00247 {    3,    4,    5,    5,    6,   63,   63,  125,  249,  497,  993,  993,  993,  993,  993, 1985,},
00248 {    3,    5,    6,    6,    6,    7,  127,  127,  253,  505, 1009, 2017, 4033, 4033, 4033, 4033,},
00249 {    3,    5,    6,    7,    7,    7,    8,  255,  255,  509, 1017, 2033, 4065, 8129,16257,16257,},
00250 {    3,    5,    6,    8,    8,    8,    8,    9,  511,  511, 1021, 2041, 4081, 8161,16321,32641,},
00251 {    3,    5,    7,    8,    9,    9,    9,    9,   10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
00252 {    3,    5,    7,    8,   10,   10,   10,   10,   10,   11, 2047, 2047, 4093, 8185,16369,32737,},
00253 {    3,    5,    7,    8,   10,   11,   11,   11,   11,   11,   12, 4095, 4095, 8189,16377,32753,},
00254 {    3,    5,    7,    9,   10,   12,   12,   12,   12,   12,   12,   13, 8191, 8191,16381,32761,},
00255 {    3,    5,    7,    9,   10,   12,   13,   13,   13,   13,   13,   13,   14,16383,16383,32765,},
00256 {    3,    5,    7,    9,   10,   12,   14,   14,   14,   14,   14,   14,   14,   15,32767,32767,},
00257 {    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   15,   15,   16,65535,},
00258 };
00259 
00260 #define output_pixel(pos, val, bias, signedness) \
00261     if (big_endian) { \
00262         AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
00263     } else { \
00264         AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
00265     }
00266 
00267 static av_always_inline void
00268 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
00269                          int big_endian, int output_bits)
00270 {
00271     int i;
00272     int shift = 3;
00273     av_assert0(output_bits == 16);
00274 
00275     for (i = 0; i < dstW; i++) {
00276         int val = src[i] + (1 << (shift - 1));
00277         output_pixel(&dest[i], val, 0, uint);
00278     }
00279 }
00280 
00281 static av_always_inline void
00282 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
00283                          const int32_t **src, uint16_t *dest, int dstW,
00284                          int big_endian, int output_bits)
00285 {
00286     int i;
00287     int shift = 15;
00288     av_assert0(output_bits == 16);
00289 
00290     for (i = 0; i < dstW; i++) {
00291         int val = 1 << (shift - 1);
00292         int j;
00293 
00294         /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
00295          * filters (or anything with negative coeffs, the range can be slightly
00296          * wider in both directions. To account for this overflow, we subtract
00297          * a constant so it always fits in the signed range (assuming a
00298          * reasonable filterSize), and re-add that at the end. */
00299         val -= 0x40000000;
00300         for (j = 0; j < filterSize; j++)
00301             val += src[j][i] * filter[j];
00302 
00303         output_pixel(&dest[i], val, 0x8000, int);
00304     }
00305 }
00306 
00307 #undef output_pixel
00308 
00309 #define output_pixel(pos, val) \
00310     if (big_endian) { \
00311         AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00312     } else { \
00313         AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00314     }
00315 
00316 static av_always_inline void
00317 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
00318                          int big_endian, int output_bits)
00319 {
00320     int i;
00321     int shift = 15 - output_bits;
00322 
00323     for (i = 0; i < dstW; i++) {
00324         int val = src[i] + (1 << (shift - 1));
00325         output_pixel(&dest[i], val);
00326     }
00327 }
00328 
00329 static av_always_inline void
00330 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
00331                          const int16_t **src, uint16_t *dest, int dstW,
00332                          int big_endian, int output_bits)
00333 {
00334     int i;
00335     int shift = 11 + 16 - output_bits;
00336 
00337     for (i = 0; i < dstW; i++) {
00338         int val = 1 << (shift - 1);
00339         int j;
00340 
00341         for (j = 0; j < filterSize; j++)
00342             val += src[j][i] * filter[j];
00343 
00344         output_pixel(&dest[i], val);
00345     }
00346 }
00347 
00348 #undef output_pixel
00349 
00350 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
00351 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
00352                               uint8_t *dest, int dstW, \
00353                               const uint8_t *dither, int offset)\
00354 { \
00355     yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
00356                          (uint16_t *) dest, dstW, is_be, bits); \
00357 }\
00358 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
00359                               const int16_t **src, uint8_t *dest, int dstW, \
00360                               const uint8_t *dither, int offset)\
00361 { \
00362     yuv2planeX_## template_size ## _c_template(filter, \
00363                          filterSize, (const typeX_t **) src, \
00364                          (uint16_t *) dest, dstW, is_be, bits); \
00365 }
00366 yuv2NBPS( 9, BE, 1, 10, int16_t)
00367 yuv2NBPS( 9, LE, 0, 10, int16_t)
00368 yuv2NBPS(10, BE, 1, 10, int16_t)
00369 yuv2NBPS(10, LE, 0, 10, int16_t)
00370 yuv2NBPS(16, BE, 1, 16, int32_t)
00371 yuv2NBPS(16, LE, 0, 16, int32_t)
00372 
00373 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
00374                            const int16_t **src, uint8_t *dest, int dstW,
00375                            const uint8_t *dither, int offset)
00376 {
00377     int i;
00378     for (i=0; i<dstW; i++) {
00379         int val = dither[(i + offset) & 7] << 12;
00380         int j;
00381         for (j=0; j<filterSize; j++)
00382             val += src[j][i] * filter[j];
00383 
00384         dest[i]= av_clip_uint8(val>>19);
00385     }
00386 }
00387 
00388 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
00389                            const uint8_t *dither, int offset)
00390 {
00391     int i;
00392     for (i=0; i<dstW; i++) {
00393         int val = (src[i] + dither[(i + offset) & 7]) >> 7;
00394         dest[i]= av_clip_uint8(val);
00395     }
00396 }
00397 
00398 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
00399                         const int16_t **chrUSrc, const int16_t **chrVSrc,
00400                         uint8_t *dest, int chrDstW)
00401 {
00402     enum PixelFormat dstFormat = c->dstFormat;
00403     const uint8_t *chrDither = c->chrDither8;
00404     int i;
00405 
00406     if (dstFormat == PIX_FMT_NV12)
00407         for (i=0; i<chrDstW; i++) {
00408             int u = chrDither[i & 7] << 12;
00409             int v = chrDither[(i + 3) & 7] << 12;
00410             int j;
00411             for (j=0; j<chrFilterSize; j++) {
00412                 u += chrUSrc[j][i] * chrFilter[j];
00413                 v += chrVSrc[j][i] * chrFilter[j];
00414             }
00415 
00416             dest[2*i]= av_clip_uint8(u>>19);
00417             dest[2*i+1]= av_clip_uint8(v>>19);
00418         }
00419     else
00420         for (i=0; i<chrDstW; i++) {
00421             int u = chrDither[i & 7] << 12;
00422             int v = chrDither[(i + 3) & 7] << 12;
00423             int j;
00424             for (j=0; j<chrFilterSize; j++) {
00425                 u += chrUSrc[j][i] * chrFilter[j];
00426                 v += chrVSrc[j][i] * chrFilter[j];
00427             }
00428 
00429             dest[2*i]= av_clip_uint8(v>>19);
00430             dest[2*i+1]= av_clip_uint8(u>>19);
00431         }
00432 }
00433 
00434 #define output_pixel(pos, val) \
00435         if (target == PIX_FMT_GRAY16BE) { \
00436             AV_WB16(pos, val); \
00437         } else { \
00438             AV_WL16(pos, val); \
00439         }
00440 
00441 static av_always_inline void
00442 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
00443                         const int32_t **lumSrc, int lumFilterSize,
00444                         const int16_t *chrFilter, const int32_t **chrUSrc,
00445                         const int32_t **chrVSrc, int chrFilterSize,
00446                         const int32_t **alpSrc, uint16_t *dest, int dstW,
00447                         int y, enum PixelFormat target)
00448 {
00449     int i;
00450 
00451     for (i = 0; i < (dstW >> 1); i++) {
00452         int j;
00453         int Y1 = (1 << 14) - 0x40000000;
00454         int Y2 = (1 << 14) - 0x40000000;
00455 
00456         for (j = 0; j < lumFilterSize; j++) {
00457             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00458             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00459         }
00460         Y1 >>= 15;
00461         Y2 >>= 15;
00462         Y1 = av_clip_int16(Y1);
00463         Y2 = av_clip_int16(Y2);
00464         output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
00465         output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
00466     }
00467 }
00468 
00469 static av_always_inline void
00470 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
00471                         const int32_t *ubuf[2], const int32_t *vbuf[2],
00472                         const int32_t *abuf[2], uint16_t *dest, int dstW,
00473                         int yalpha, int uvalpha, int y,
00474                         enum PixelFormat target)
00475 {
00476     int  yalpha1 = 4095 - yalpha;
00477     int i;
00478     const int32_t *buf0 = buf[0], *buf1 = buf[1];
00479 
00480     for (i = 0; i < (dstW >> 1); i++) {
00481         int Y1 = (buf0[i * 2    ] * yalpha1 + buf1[i * 2    ] * yalpha) >> 15;
00482         int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
00483 
00484         output_pixel(&dest[i * 2 + 0], Y1);
00485         output_pixel(&dest[i * 2 + 1], Y2);
00486     }
00487 }
00488 
00489 static av_always_inline void
00490 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
00491                         const int32_t *ubuf[2], const int32_t *vbuf[2],
00492                         const int32_t *abuf0, uint16_t *dest, int dstW,
00493                         int uvalpha, int y, enum PixelFormat target)
00494 {
00495     int i;
00496 
00497     for (i = 0; i < (dstW >> 1); i++) {
00498         int Y1 = (buf0[i * 2    ]+4)>>3;
00499         int Y2 = (buf0[i * 2 + 1]+4)>>3;
00500 
00501         output_pixel(&dest[i * 2 + 0], Y1);
00502         output_pixel(&dest[i * 2 + 1], Y2);
00503     }
00504 }
00505 
00506 #undef output_pixel
00507 
00508 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
00509 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
00510                         const int16_t **_lumSrc, int lumFilterSize, \
00511                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
00512                         const int16_t **_chrVSrc, int chrFilterSize, \
00513                         const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
00514                         int y) \
00515 { \
00516     const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
00517                   **chrUSrc = (const int32_t **) _chrUSrc, \
00518                   **chrVSrc = (const int32_t **) _chrVSrc, \
00519                   **alpSrc  = (const int32_t **) _alpSrc; \
00520     uint16_t *dest = (uint16_t *) _dest; \
00521     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
00522                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00523                           alpSrc, dest, dstW, y, fmt); \
00524 } \
00525  \
00526 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
00527                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
00528                         const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
00529                         int yalpha, int uvalpha, int y) \
00530 { \
00531     const int32_t **buf  = (const int32_t **) _buf, \
00532                   **ubuf = (const int32_t **) _ubuf, \
00533                   **vbuf = (const int32_t **) _vbuf, \
00534                   **abuf = (const int32_t **) _abuf; \
00535     uint16_t *dest = (uint16_t *) _dest; \
00536     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
00537                           dest, dstW, yalpha, uvalpha, y, fmt); \
00538 } \
00539  \
00540 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
00541                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
00542                         const int16_t *_abuf0, uint8_t *_dest, int dstW, \
00543                         int uvalpha, int y) \
00544 { \
00545     const int32_t *buf0  = (const int32_t *)  _buf0, \
00546                  **ubuf  = (const int32_t **) _ubuf, \
00547                  **vbuf  = (const int32_t **) _vbuf, \
00548                   *abuf0 = (const int32_t *)  _abuf0; \
00549     uint16_t *dest = (uint16_t *) _dest; \
00550     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
00551                                   dstW, uvalpha, y, fmt); \
00552 }
00553 
00554 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
00555 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
00556 
00557 #define output_pixel(pos, acc) \
00558     if (target == PIX_FMT_MONOBLACK) { \
00559         pos = acc; \
00560     } else { \
00561         pos = ~acc; \
00562     }
00563 
00564 static av_always_inline void
00565 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
00566                       const int16_t **lumSrc, int lumFilterSize,
00567                       const int16_t *chrFilter, const int16_t **chrUSrc,
00568                       const int16_t **chrVSrc, int chrFilterSize,
00569                       const int16_t **alpSrc, uint8_t *dest, int dstW,
00570                       int y, enum PixelFormat target)
00571 {
00572     const uint8_t * const d128=dither_8x8_220[y&7];
00573     uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
00574     int i;
00575     unsigned acc = 0;
00576 
00577     for (i = 0; i < dstW - 1; i += 2) {
00578         int j;
00579         int Y1 = 1 << 18;
00580         int Y2 = 1 << 18;
00581 
00582         for (j = 0; j < lumFilterSize; j++) {
00583             Y1 += lumSrc[j][i]   * lumFilter[j];
00584             Y2 += lumSrc[j][i+1] * lumFilter[j];
00585         }
00586         Y1 >>= 19;
00587         Y2 >>= 19;
00588         if ((Y1 | Y2) & 0x100) {
00589             Y1 = av_clip_uint8(Y1);
00590             Y2 = av_clip_uint8(Y2);
00591         }
00592         acc += acc + g[Y1 + d128[(i + 0) & 7]];
00593         acc += acc + g[Y2 + d128[(i + 1) & 7]];
00594         if ((i & 7) == 6) {
00595             output_pixel(*dest++, acc);
00596         }
00597     }
00598 }
00599 
00600 static av_always_inline void
00601 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
00602                       const int16_t *ubuf[2], const int16_t *vbuf[2],
00603                       const int16_t *abuf[2], uint8_t *dest, int dstW,
00604                       int yalpha, int uvalpha, int y,
00605                       enum PixelFormat target)
00606 {
00607     const int16_t *buf0  = buf[0],  *buf1  = buf[1];
00608     const uint8_t * const d128 = dither_8x8_220[y & 7];
00609     uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
00610     int  yalpha1 = 4095 - yalpha;
00611     int i;
00612 
00613     for (i = 0; i < dstW - 7; i += 8) {
00614         int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
00615         acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
00616         acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
00617         acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
00618         acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
00619         acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
00620         acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
00621         acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
00622         output_pixel(*dest++, acc);
00623     }
00624 }
00625 
00626 static av_always_inline void
00627 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
00628                       const int16_t *ubuf[2], const int16_t *vbuf[2],
00629                       const int16_t *abuf0, uint8_t *dest, int dstW,
00630                       int uvalpha, int y, enum PixelFormat target)
00631 {
00632     const uint8_t * const d128 = dither_8x8_220[y & 7];
00633     uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
00634     int i;
00635 
00636     for (i = 0; i < dstW - 7; i += 8) {
00637         int acc =    g[(buf0[i    ] >> 7) + d128[0]];
00638         acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
00639         acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
00640         acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
00641         acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
00642         acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
00643         acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
00644         acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
00645         output_pixel(*dest++, acc);
00646     }
00647 }
00648 
00649 #undef output_pixel
00650 
00651 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
00652 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
00653                                 const int16_t **lumSrc, int lumFilterSize, \
00654                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
00655                                 const int16_t **chrVSrc, int chrFilterSize, \
00656                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
00657                                 int y) \
00658 { \
00659     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
00660                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00661                                   alpSrc, dest, dstW, y, fmt); \
00662 } \
00663  \
00664 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
00665                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
00666                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
00667                                 int yalpha, int uvalpha, int y) \
00668 { \
00669     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
00670                                   dest, dstW, yalpha, uvalpha, y, fmt); \
00671 } \
00672  \
00673 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
00674                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
00675                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
00676                                 int uvalpha, int y) \
00677 { \
00678     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
00679                                   abuf0, dest, dstW, uvalpha, \
00680                                   y, fmt); \
00681 }
00682 
00683 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
00684 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
00685 
00686 #define output_pixels(pos, Y1, U, Y2, V) \
00687     if (target == PIX_FMT_YUYV422) { \
00688         dest[pos + 0] = Y1; \
00689         dest[pos + 1] = U;  \
00690         dest[pos + 2] = Y2; \
00691         dest[pos + 3] = V;  \
00692     } else { \
00693         dest[pos + 0] = U;  \
00694         dest[pos + 1] = Y1; \
00695         dest[pos + 2] = V;  \
00696         dest[pos + 3] = Y2; \
00697     }
00698 
00699 static av_always_inline void
00700 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
00701                      const int16_t **lumSrc, int lumFilterSize,
00702                      const int16_t *chrFilter, const int16_t **chrUSrc,
00703                      const int16_t **chrVSrc, int chrFilterSize,
00704                      const int16_t **alpSrc, uint8_t *dest, int dstW,
00705                      int y, enum PixelFormat target)
00706 {
00707     int i;
00708 
00709     for (i = 0; i < (dstW >> 1); i++) {
00710         int j;
00711         int Y1 = 1 << 18;
00712         int Y2 = 1 << 18;
00713         int U  = 1 << 18;
00714         int V  = 1 << 18;
00715 
00716         for (j = 0; j < lumFilterSize; j++) {
00717             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00718             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00719         }
00720         for (j = 0; j < chrFilterSize; j++) {
00721             U += chrUSrc[j][i] * chrFilter[j];
00722             V += chrVSrc[j][i] * chrFilter[j];
00723         }
00724         Y1 >>= 19;
00725         Y2 >>= 19;
00726         U  >>= 19;
00727         V  >>= 19;
00728         if ((Y1 | Y2 | U | V) & 0x100) {
00729             Y1 = av_clip_uint8(Y1);
00730             Y2 = av_clip_uint8(Y2);
00731             U  = av_clip_uint8(U);
00732             V  = av_clip_uint8(V);
00733         }
00734         output_pixels(4*i, Y1, U, Y2, V);
00735     }
00736 }
00737 
00738 static av_always_inline void
00739 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
00740                      const int16_t *ubuf[2], const int16_t *vbuf[2],
00741                      const int16_t *abuf[2], uint8_t *dest, int dstW,
00742                      int yalpha, int uvalpha, int y,
00743                      enum PixelFormat target)
00744 {
00745     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
00746                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00747                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00748     int  yalpha1 = 4095 - yalpha;
00749     int uvalpha1 = 4095 - uvalpha;
00750     int i;
00751 
00752     for (i = 0; i < (dstW >> 1); i++) {
00753         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
00754         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
00755         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
00756         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
00757 
00758         output_pixels(i * 4, Y1, U, Y2, V);
00759     }
00760 }
00761 
00762 static av_always_inline void
00763 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
00764                      const int16_t *ubuf[2], const int16_t *vbuf[2],
00765                      const int16_t *abuf0, uint8_t *dest, int dstW,
00766                      int uvalpha, int y, enum PixelFormat target)
00767 {
00768     const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00769                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00770     int i;
00771 
00772     if (uvalpha < 2048) {
00773         for (i = 0; i < (dstW >> 1); i++) {
00774             int Y1 = buf0[i * 2]     >> 7;
00775             int Y2 = buf0[i * 2 + 1] >> 7;
00776             int U  = ubuf1[i]        >> 7;
00777             int V  = vbuf1[i]        >> 7;
00778 
00779             output_pixels(i * 4, Y1, U, Y2, V);
00780         }
00781     } else {
00782         for (i = 0; i < (dstW >> 1); i++) {
00783             int Y1 =  buf0[i * 2]          >> 7;
00784             int Y2 =  buf0[i * 2 + 1]      >> 7;
00785             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
00786             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
00787 
00788             output_pixels(i * 4, Y1, U, Y2, V);
00789         }
00790     }
00791 }
00792 
00793 #undef output_pixels
00794 
00795 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
00796 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
00797 
00798 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
00799 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
00800 #define output_pixel(pos, val) \
00801     if (isBE(target)) { \
00802         AV_WB16(pos, val); \
00803     } else { \
00804         AV_WL16(pos, val); \
00805     }
00806 
00807 static av_always_inline void
00808 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
00809                        const int32_t **lumSrc, int lumFilterSize,
00810                        const int16_t *chrFilter, const int32_t **chrUSrc,
00811                        const int32_t **chrVSrc, int chrFilterSize,
00812                        const int32_t **alpSrc, uint16_t *dest, int dstW,
00813                        int y, enum PixelFormat target)
00814 {
00815     int i;
00816 
00817     for (i = 0; i < (dstW >> 1); i++) {
00818         int j;
00819         int Y1 = -0x40000000;
00820         int Y2 = -0x40000000;
00821         int U  = -128 << 23; // 19
00822         int V  = -128 << 23;
00823         int R, G, B;
00824 
00825         for (j = 0; j < lumFilterSize; j++) {
00826             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00827             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00828         }
00829         for (j = 0; j < chrFilterSize; j++) {
00830             U += chrUSrc[j][i] * chrFilter[j];
00831             V += chrVSrc[j][i] * chrFilter[j];
00832         }
00833 
00834         // 8bit: 12+15=27; 16-bit: 12+19=31
00835         Y1 >>= 14; // 10
00836         Y1 += 0x10000;
00837         Y2 >>= 14;
00838         Y2 += 0x10000;
00839         U  >>= 14;
00840         V  >>= 14;
00841 
00842         // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
00843         Y1 -= c->yuv2rgb_y_offset;
00844         Y2 -= c->yuv2rgb_y_offset;
00845         Y1 *= c->yuv2rgb_y_coeff;
00846         Y2 *= c->yuv2rgb_y_coeff;
00847         Y1 += 1 << 13; // 21
00848         Y2 += 1 << 13;
00849         // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
00850 
00851         R = V * c->yuv2rgb_v2r_coeff;
00852         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00853         B =                            U * c->yuv2rgb_u2b_coeff;
00854 
00855         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
00856         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00857         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00858         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00859         output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00860         output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00861         output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00862         dest += 6;
00863     }
00864 }
00865 
00866 static av_always_inline void
00867 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
00868                        const int32_t *ubuf[2], const int32_t *vbuf[2],
00869                        const int32_t *abuf[2], uint16_t *dest, int dstW,
00870                        int yalpha, int uvalpha, int y,
00871                        enum PixelFormat target)
00872 {
00873     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
00874                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00875                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00876     int  yalpha1 = 4095 - yalpha;
00877     int uvalpha1 = 4095 - uvalpha;
00878     int i;
00879 
00880     for (i = 0; i < (dstW >> 1); i++) {
00881         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
00882         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
00883         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
00884         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
00885         int R, G, B;
00886 
00887         Y1 -= c->yuv2rgb_y_offset;
00888         Y2 -= c->yuv2rgb_y_offset;
00889         Y1 *= c->yuv2rgb_y_coeff;
00890         Y2 *= c->yuv2rgb_y_coeff;
00891         Y1 += 1 << 13;
00892         Y2 += 1 << 13;
00893 
00894         R = V * c->yuv2rgb_v2r_coeff;
00895         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00896         B =                            U * c->yuv2rgb_u2b_coeff;
00897 
00898         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00899         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00900         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00901         output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00902         output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00903         output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00904         dest += 6;
00905     }
00906 }
00907 
00908 static av_always_inline void
00909 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
00910                        const int32_t *ubuf[2], const int32_t *vbuf[2],
00911                        const int32_t *abuf0, uint16_t *dest, int dstW,
00912                        int uvalpha, int y, enum PixelFormat target)
00913 {
00914     const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00915                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00916     int i;
00917 
00918     if (uvalpha < 2048) {
00919         for (i = 0; i < (dstW >> 1); i++) {
00920             int Y1 = (buf0[i * 2]    ) >> 2;
00921             int Y2 = (buf0[i * 2 + 1]) >> 2;
00922             int U  = (ubuf0[i] + (-128 << 11)) >> 2;
00923             int V  = (vbuf0[i] + (-128 << 11)) >> 2;
00924             int R, G, B;
00925 
00926             Y1 -= c->yuv2rgb_y_offset;
00927             Y2 -= c->yuv2rgb_y_offset;
00928             Y1 *= c->yuv2rgb_y_coeff;
00929             Y2 *= c->yuv2rgb_y_coeff;
00930             Y1 += 1 << 13;
00931             Y2 += 1 << 13;
00932 
00933             R = V * c->yuv2rgb_v2r_coeff;
00934             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00935             B =                            U * c->yuv2rgb_u2b_coeff;
00936 
00937             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00938             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00939             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00940             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00941             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00942             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00943             dest += 6;
00944         }
00945     } else {
00946         for (i = 0; i < (dstW >> 1); i++) {
00947             int Y1 = (buf0[i * 2]    ) >> 2;
00948             int Y2 = (buf0[i * 2 + 1]) >> 2;
00949             int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
00950             int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
00951             int R, G, B;
00952 
00953             Y1 -= c->yuv2rgb_y_offset;
00954             Y2 -= c->yuv2rgb_y_offset;
00955             Y1 *= c->yuv2rgb_y_coeff;
00956             Y2 *= c->yuv2rgb_y_coeff;
00957             Y1 += 1 << 13;
00958             Y2 += 1 << 13;
00959 
00960             R = V * c->yuv2rgb_v2r_coeff;
00961             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00962             B =                            U * c->yuv2rgb_u2b_coeff;
00963 
00964             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00965             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00966             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00967             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00968             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00969             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00970             dest += 6;
00971         }
00972     }
00973 }
00974 
00975 #undef output_pixel
00976 #undef r_b
00977 #undef b_r
00978 
00979 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
00980 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
00981 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
00982 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
00983 
00984 /*
00985  * Write out 2 RGB pixels in the target pixel format. This function takes a
00986  * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
00987  * things like endianness conversion and shifting. The caller takes care of
00988  * setting the correct offset in these tables from the chroma (U/V) values.
00989  * This function then uses the luminance (Y1/Y2) values to write out the
00990  * correct RGB values into the destination buffer.
00991  */
00992 static av_always_inline void
00993 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
00994               unsigned A1, unsigned A2,
00995               const void *_r, const void *_g, const void *_b, int y,
00996               enum PixelFormat target, int hasAlpha)
00997 {
00998     if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
00999         target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
01000         uint32_t *dest = (uint32_t *) _dest;
01001         const uint32_t *r = (const uint32_t *) _r;
01002         const uint32_t *g = (const uint32_t *) _g;
01003         const uint32_t *b = (const uint32_t *) _b;
01004 
01005 #if CONFIG_SMALL
01006         int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
01007 
01008         dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
01009         dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
01010 #else
01011         if (hasAlpha) {
01012             int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
01013 
01014             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
01015             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
01016         } else {
01017             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
01018             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
01019         }
01020 #endif
01021     } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
01022         uint8_t *dest = (uint8_t *) _dest;
01023         const uint8_t *r = (const uint8_t *) _r;
01024         const uint8_t *g = (const uint8_t *) _g;
01025         const uint8_t *b = (const uint8_t *) _b;
01026 
01027 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
01028 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
01029 
01030         dest[i * 6 + 0] = r_b[Y1];
01031         dest[i * 6 + 1] =   g[Y1];
01032         dest[i * 6 + 2] = b_r[Y1];
01033         dest[i * 6 + 3] = r_b[Y2];
01034         dest[i * 6 + 4] =   g[Y2];
01035         dest[i * 6 + 5] = b_r[Y2];
01036 #undef r_b
01037 #undef b_r
01038     } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
01039                target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
01040                target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
01041         uint16_t *dest = (uint16_t *) _dest;
01042         const uint16_t *r = (const uint16_t *) _r;
01043         const uint16_t *g = (const uint16_t *) _g;
01044         const uint16_t *b = (const uint16_t *) _b;
01045         int dr1, dg1, db1, dr2, dg2, db2;
01046 
01047         if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
01048             dr1 = dither_2x2_8[ y & 1     ][0];
01049             dg1 = dither_2x2_4[ y & 1     ][0];
01050             db1 = dither_2x2_8[(y & 1) ^ 1][0];
01051             dr2 = dither_2x2_8[ y & 1     ][1];
01052             dg2 = dither_2x2_4[ y & 1     ][1];
01053             db2 = dither_2x2_8[(y & 1) ^ 1][1];
01054         } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
01055             dr1 = dither_2x2_8[ y & 1     ][0];
01056             dg1 = dither_2x2_8[ y & 1     ][1];
01057             db1 = dither_2x2_8[(y & 1) ^ 1][0];
01058             dr2 = dither_2x2_8[ y & 1     ][1];
01059             dg2 = dither_2x2_8[ y & 1     ][0];
01060             db2 = dither_2x2_8[(y & 1) ^ 1][1];
01061         } else {
01062             dr1 = dither_4x4_16[ y & 3     ][0];
01063             dg1 = dither_4x4_16[ y & 3     ][1];
01064             db1 = dither_4x4_16[(y & 3) ^ 3][0];
01065             dr2 = dither_4x4_16[ y & 3     ][1];
01066             dg2 = dither_4x4_16[ y & 3     ][0];
01067             db2 = dither_4x4_16[(y & 3) ^ 3][1];
01068         }
01069 
01070         dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
01071         dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
01072     } else /* 8/4-bit */ {
01073         uint8_t *dest = (uint8_t *) _dest;
01074         const uint8_t *r = (const uint8_t *) _r;
01075         const uint8_t *g = (const uint8_t *) _g;
01076         const uint8_t *b = (const uint8_t *) _b;
01077         int dr1, dg1, db1, dr2, dg2, db2;
01078 
01079         if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
01080             const uint8_t * const d64 = dither_8x8_73[y & 7];
01081             const uint8_t * const d32 = dither_8x8_32[y & 7];
01082             dr1 = dg1 = d32[(i * 2 + 0) & 7];
01083             db1 =       d64[(i * 2 + 0) & 7];
01084             dr2 = dg2 = d32[(i * 2 + 1) & 7];
01085             db2 =       d64[(i * 2 + 1) & 7];
01086         } else {
01087             const uint8_t * const d64  = dither_8x8_73 [y & 7];
01088             const uint8_t * const d128 = dither_8x8_220[y & 7];
01089             dr1 = db1 = d128[(i * 2 + 0) & 7];
01090             dg1 =        d64[(i * 2 + 0) & 7];
01091             dr2 = db2 = d128[(i * 2 + 1) & 7];
01092             dg2 =        d64[(i * 2 + 1) & 7];
01093         }
01094 
01095         if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
01096             dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
01097                     ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
01098         } else {
01099             dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
01100             dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
01101         }
01102     }
01103 }
01104 
01105 static av_always_inline void
01106 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
01107                      const int16_t **lumSrc, int lumFilterSize,
01108                      const int16_t *chrFilter, const int16_t **chrUSrc,
01109                      const int16_t **chrVSrc, int chrFilterSize,
01110                      const int16_t **alpSrc, uint8_t *dest, int dstW,
01111                      int y, enum PixelFormat target, int hasAlpha)
01112 {
01113     int i;
01114 
01115     for (i = 0; i < (dstW >> 1); i++) {
01116         int j;
01117         int Y1 = 1 << 18;
01118         int Y2 = 1 << 18;
01119         int U  = 1 << 18;
01120         int V  = 1 << 18;
01121         int av_unused A1, A2;
01122         const void *r, *g, *b;
01123 
01124         for (j = 0; j < lumFilterSize; j++) {
01125             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
01126             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
01127         }
01128         for (j = 0; j < chrFilterSize; j++) {
01129             U += chrUSrc[j][i] * chrFilter[j];
01130             V += chrVSrc[j][i] * chrFilter[j];
01131         }
01132         Y1 >>= 19;
01133         Y2 >>= 19;
01134         U  >>= 19;
01135         V  >>= 19;
01136         if (hasAlpha) {
01137             A1 = 1 << 18;
01138             A2 = 1 << 18;
01139             for (j = 0; j < lumFilterSize; j++) {
01140                 A1 += alpSrc[j][i * 2    ] * lumFilter[j];
01141                 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
01142             }
01143             A1 >>= 19;
01144             A2 >>= 19;
01145             if ((A1 | A2) & 0x100) {
01146                 A1 = av_clip_uint8(A1);
01147                 A2 = av_clip_uint8(A2);
01148             }
01149         }
01150 
01151         r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM];
01152         g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
01153         b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
01154 
01155         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01156                       r, g, b, y, target, hasAlpha);
01157     }
01158 }
01159 
01160 static av_always_inline void
01161 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
01162                      const int16_t *ubuf[2], const int16_t *vbuf[2],
01163                      const int16_t *abuf[2], uint8_t *dest, int dstW,
01164                      int yalpha, int uvalpha, int y,
01165                      enum PixelFormat target, int hasAlpha)
01166 {
01167     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
01168                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
01169                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
01170                   *abuf0 = hasAlpha ? abuf[0] : NULL,
01171                   *abuf1 = hasAlpha ? abuf[1] : NULL;
01172     int  yalpha1 = 4095 - yalpha;
01173     int uvalpha1 = 4095 - uvalpha;
01174     int i;
01175 
01176     for (i = 0; i < (dstW >> 1); i++) {
01177         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
01178         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
01179         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
01180         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
01181         int A1, A2;
01182         const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
01183                    *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
01184                    *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
01185 
01186         if (hasAlpha) {
01187             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
01188             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
01189         }
01190 
01191         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01192                       r, g, b, y, target, hasAlpha);
01193     }
01194 }
01195 
01196 static av_always_inline void
01197 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
01198                      const int16_t *ubuf[2], const int16_t *vbuf[2],
01199                      const int16_t *abuf0, uint8_t *dest, int dstW,
01200                      int uvalpha, int y, enum PixelFormat target,
01201                      int hasAlpha)
01202 {
01203     const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
01204                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
01205     int i;
01206 
01207     if (uvalpha < 2048) {
01208         for (i = 0; i < (dstW >> 1); i++) {
01209             int Y1 = buf0[i * 2]     >> 7;
01210             int Y2 = buf0[i * 2 + 1] >> 7;
01211             int U  = ubuf1[i]        >> 7;
01212             int V  = vbuf1[i]        >> 7;
01213             int A1, A2;
01214             const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
01215                        *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
01216                        *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
01217 
01218             if (hasAlpha) {
01219                 A1 = abuf0[i * 2    ] >> 7;
01220                 A2 = abuf0[i * 2 + 1] >> 7;
01221             }
01222 
01223             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01224                           r, g, b, y, target, hasAlpha);
01225         }
01226     } else {
01227         for (i = 0; i < (dstW >> 1); i++) {
01228             int Y1 =  buf0[i * 2]          >> 7;
01229             int Y2 =  buf0[i * 2 + 1]      >> 7;
01230             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
01231             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
01232             int A1, A2;
01233             const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
01234                        *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
01235                        *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
01236 
01237             if (hasAlpha) {
01238                 A1 = abuf0[i * 2    ] >> 7;
01239                 A2 = abuf0[i * 2 + 1] >> 7;
01240             }
01241 
01242             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01243                           r, g, b, y, target, hasAlpha);
01244         }
01245     }
01246 }
01247 
01248 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
01249 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
01250                                 const int16_t **lumSrc, int lumFilterSize, \
01251                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
01252                                 const int16_t **chrVSrc, int chrFilterSize, \
01253                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
01254                                 int y) \
01255 { \
01256     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
01257                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
01258                                   alpSrc, dest, dstW, y, fmt, hasAlpha); \
01259 }
01260 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
01261 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
01262 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
01263                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
01264                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
01265                                 int yalpha, int uvalpha, int y) \
01266 { \
01267     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
01268                                   dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
01269 } \
01270  \
01271 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
01272                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
01273                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
01274                                 int uvalpha, int y) \
01275 { \
01276     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
01277                                   dstW, uvalpha, y, fmt, hasAlpha); \
01278 }
01279 
01280 #if CONFIG_SMALL
01281 YUV2RGBWRAPPER(yuv2rgb,,  32_1,  PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01282 YUV2RGBWRAPPER(yuv2rgb,,  32,    PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01283 #else
01284 #if CONFIG_SWSCALE_ALPHA
01285 YUV2RGBWRAPPER(yuv2rgb,, a32_1,  PIX_FMT_RGB32_1,   1)
01286 YUV2RGBWRAPPER(yuv2rgb,, a32,    PIX_FMT_RGB32,     1)
01287 #endif
01288 YUV2RGBWRAPPER(yuv2rgb,, x32_1,  PIX_FMT_RGB32_1,   0)
01289 YUV2RGBWRAPPER(yuv2rgb,, x32,    PIX_FMT_RGB32,     0)
01290 #endif
01291 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24,   0)
01292 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24,   0)
01293 YUV2RGBWRAPPER(yuv2rgb,,  16,    PIX_FMT_RGB565,    0)
01294 YUV2RGBWRAPPER(yuv2rgb,,  15,    PIX_FMT_RGB555,    0)
01295 YUV2RGBWRAPPER(yuv2rgb,,  12,    PIX_FMT_RGB444,    0)
01296 YUV2RGBWRAPPER(yuv2rgb,,   8,    PIX_FMT_RGB8,      0)
01297 YUV2RGBWRAPPER(yuv2rgb,,   4,    PIX_FMT_RGB4,      0)
01298 YUV2RGBWRAPPER(yuv2rgb,,   4b,   PIX_FMT_RGB4_BYTE, 0)
01299 
01300 static av_always_inline void
01301 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
01302                           const int16_t **lumSrc, int lumFilterSize,
01303                           const int16_t *chrFilter, const int16_t **chrUSrc,
01304                           const int16_t **chrVSrc, int chrFilterSize,
01305                           const int16_t **alpSrc, uint8_t *dest,
01306                           int dstW, int y, enum PixelFormat target, int hasAlpha)
01307 {
01308     int i;
01309     int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
01310 
01311     for (i = 0; i < dstW; i++) {
01312         int j;
01313         int Y = 1<<9;
01314         int U = (1<<9)-(128 << 19);
01315         int V = (1<<9)-(128 << 19);
01316         int av_unused A;
01317         int R, G, B;
01318 
01319         for (j = 0; j < lumFilterSize; j++) {
01320             Y += lumSrc[j][i] * lumFilter[j];
01321         }
01322         for (j = 0; j < chrFilterSize; j++) {
01323             U += chrUSrc[j][i] * chrFilter[j];
01324             V += chrVSrc[j][i] * chrFilter[j];
01325         }
01326         Y >>= 10;
01327         U >>= 10;
01328         V >>= 10;
01329         if (hasAlpha) {
01330             A = 1 << 18;
01331             for (j = 0; j < lumFilterSize; j++) {
01332                 A += alpSrc[j][i] * lumFilter[j];
01333             }
01334             A >>= 19;
01335             if (A & 0x100)
01336                 A = av_clip_uint8(A);
01337         }
01338         Y -= c->yuv2rgb_y_offset;
01339         Y *= c->yuv2rgb_y_coeff;
01340         Y += 1 << 21;
01341         R = Y + V*c->yuv2rgb_v2r_coeff;
01342         G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
01343         B = Y +                          U*c->yuv2rgb_u2b_coeff;
01344         if ((R | G | B) & 0xC0000000) {
01345             R = av_clip_uintp2(R, 30);
01346             G = av_clip_uintp2(G, 30);
01347             B = av_clip_uintp2(B, 30);
01348         }
01349 
01350         switch(target) {
01351         case PIX_FMT_ARGB:
01352             dest[0] = hasAlpha ? A : 255;
01353             dest[1] = R >> 22;
01354             dest[2] = G >> 22;
01355             dest[3] = B >> 22;
01356             break;
01357         case PIX_FMT_RGB24:
01358             dest[0] = R >> 22;
01359             dest[1] = G >> 22;
01360             dest[2] = B >> 22;
01361             break;
01362         case PIX_FMT_RGBA:
01363             dest[0] = R >> 22;
01364             dest[1] = G >> 22;
01365             dest[2] = B >> 22;
01366             dest[3] = hasAlpha ? A : 255;
01367             break;
01368         case PIX_FMT_ABGR:
01369             dest[0] = hasAlpha ? A : 255;
01370             dest[1] = B >> 22;
01371             dest[2] = G >> 22;
01372             dest[3] = R >> 22;
01373             break;
01374         case PIX_FMT_BGR24:
01375             dest[0] = B >> 22;
01376             dest[1] = G >> 22;
01377             dest[2] = R >> 22;
01378             break;
01379         case PIX_FMT_BGRA:
01380             dest[0] = B >> 22;
01381             dest[1] = G >> 22;
01382             dest[2] = R >> 22;
01383             dest[3] = hasAlpha ? A : 255;
01384             break;
01385         }
01386         dest += step;
01387     }
01388 }
01389 
01390 #if CONFIG_SMALL
01391 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01392 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01393 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01394 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01395 #else
01396 #if CONFIG_SWSCALE_ALPHA
01397 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  1)
01398 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  1)
01399 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  1)
01400 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  1)
01401 #endif
01402 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA,  0)
01403 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR,  0)
01404 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA,  0)
01405 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB,  0)
01406 #endif
01407 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full,  PIX_FMT_BGR24, 0)
01408 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full,  PIX_FMT_RGB24, 0)
01409 
01410 static av_always_inline void fillPlane(uint8_t* plane, int stride,
01411                                        int width, int height,
01412                                        int y, uint8_t val)
01413 {
01414     int i;
01415     uint8_t *ptr = plane + stride*y;
01416     for (i=0; i<height; i++) {
01417         memset(ptr, val, width);
01418         ptr += stride;
01419     }
01420 }
01421 
01422 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01423 
01424 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
01425 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
01426 
01427 static av_always_inline void
01428 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
01429                     enum PixelFormat origin)
01430 {
01431     int i;
01432     for (i = 0; i < width; i++) {
01433         unsigned int r_b = input_pixel(&src[i*3+0]);
01434         unsigned int   g = input_pixel(&src[i*3+1]);
01435         unsigned int b_r = input_pixel(&src[i*3+2]);
01436 
01437         dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01438     }
01439 }
01440 
01441 static av_always_inline void
01442 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
01443                     const uint16_t *src1, const uint16_t *src2,
01444                     int width, enum PixelFormat origin)
01445 {
01446     int i;
01447     assert(src1==src2);
01448     for (i = 0; i < width; i++) {
01449         int r_b = input_pixel(&src1[i*3+0]);
01450         int   g = input_pixel(&src1[i*3+1]);
01451         int b_r = input_pixel(&src1[i*3+2]);
01452 
01453         dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01454         dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01455     }
01456 }
01457 
01458 static av_always_inline void
01459 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
01460                           const uint16_t *src1, const uint16_t *src2,
01461                           int width, enum PixelFormat origin)
01462 {
01463     int i;
01464     assert(src1==src2);
01465     for (i = 0; i < width; i++) {
01466         int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
01467         int   g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
01468         int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
01469 
01470         dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01471         dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01472     }
01473 }
01474 
01475 #undef r
01476 #undef b
01477 #undef input_pixel
01478 
01479 #define rgb48funcs(pattern, BE_LE, origin) \
01480 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
01481                                     int width, uint32_t *unused) \
01482 { \
01483     const uint16_t *src = (const uint16_t *) _src; \
01484     uint16_t *dst = (uint16_t *) _dst; \
01485     rgb48ToY_c_template(dst, src, width, origin); \
01486 } \
01487  \
01488 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
01489                                     const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
01490                                     int width, uint32_t *unused) \
01491 { \
01492     const uint16_t *src1 = (const uint16_t *) _src1, \
01493                    *src2 = (const uint16_t *) _src2; \
01494     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
01495     rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
01496 } \
01497  \
01498 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
01499                                     const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
01500                                     int width, uint32_t *unused) \
01501 { \
01502     const uint16_t *src1 = (const uint16_t *) _src1, \
01503                    *src2 = (const uint16_t *) _src2; \
01504     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
01505     rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
01506 }
01507 
01508 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
01509 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
01510 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
01511 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
01512 
01513 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
01514                          origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
01515                         (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
01516 
01517 static av_always_inline void
01518 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
01519                        int width, enum PixelFormat origin,
01520                        int shr,   int shg,   int shb, int shp,
01521                        int maskr, int maskg, int maskb,
01522                        int rsh,   int gsh,   int bsh, int S)
01523 {
01524     const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
01525     const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
01526     int i;
01527 
01528     for (i = 0; i < width; i++) {
01529         int px = input_pixel(i) >> shp;
01530         int b = (px & maskb) >> shb;
01531         int g = (px & maskg) >> shg;
01532         int r = (px & maskr) >> shr;
01533 
01534         dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
01535     }
01536 }
01537 
01538 static av_always_inline void
01539 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
01540                         const uint8_t *src, int width,
01541                         enum PixelFormat origin,
01542                         int shr,   int shg,   int shb, int shp,
01543                         int maskr, int maskg, int maskb,
01544                         int rsh,   int gsh,   int bsh, int S)
01545 {
01546     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01547               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
01548     const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
01549     int i;
01550 
01551     for (i = 0; i < width; i++) {
01552         int px = input_pixel(i) >> shp;
01553         int b = (px & maskb) >> shb;
01554         int g = (px & maskg) >> shg;
01555         int r = (px & maskr) >> shr;
01556 
01557         dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
01558         dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
01559     }
01560 }
01561 
01562 static av_always_inline void
01563 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
01564                              const uint8_t *src, int width,
01565                              enum PixelFormat origin,
01566                              int shr,   int shg,   int shb, int shp,
01567                              int maskr, int maskg, int maskb,
01568                              int rsh,   int gsh,   int bsh, int S)
01569 {
01570     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01571               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
01572               maskgx = ~(maskr | maskb);
01573     const unsigned rnd = (256U<<(S)) + (1<<(S-6));
01574     int i;
01575 
01576     maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
01577     for (i = 0; i < width; i++) {
01578         int px0 = input_pixel(2 * i + 0) >> shp;
01579         int px1 = input_pixel(2 * i + 1) >> shp;
01580         int b, r, g = (px0 & maskgx) + (px1 & maskgx);
01581         int rb = px0 + px1 - g;
01582 
01583         b = (rb & maskb) >> shb;
01584         if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
01585             origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
01586             g >>= shg;
01587         } else {
01588             g = (g  & maskg) >> shg;
01589         }
01590         r = (rb & maskr) >> shr;
01591 
01592         dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
01593         dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
01594     }
01595 }
01596 
01597 #undef input_pixel
01598 
01599 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
01600                          maskg, maskb, rsh, gsh, bsh, S) \
01601 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
01602                           int width, uint32_t *unused) \
01603 { \
01604     rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
01605                            shr, shg, shb, shp, \
01606                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
01607 } \
01608  \
01609 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01610                            const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
01611                            int width, uint32_t *unused) \
01612 { \
01613     rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,  \
01614                             shr, shg, shb, shp, \
01615                             maskr, maskg, maskb, rsh, gsh, bsh, S); \
01616 } \
01617  \
01618 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
01619                                 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
01620                                 int width, uint32_t *unused) \
01621 { \
01622     rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
01623                                  shr, shg, shb, shp, \
01624                                  maskr, maskg, maskb, rsh, gsh, bsh, S); \
01625 }
01626 
01627 rgb16_32_wrapper(PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8)
01628 rgb16_32_wrapper(PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8)
01629 rgb16_32_wrapper(PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8)
01630 rgb16_32_wrapper(PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8)
01631 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8)
01632 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7)
01633 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT+4)
01634 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8)
01635 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7)
01636 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT+4)
01637 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8)
01638 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7)
01639 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT+4)
01640 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8)
01641 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7)
01642 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT+4)
01643 
01644 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
01645                          const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
01646                          int width, enum PixelFormat origin)
01647 {
01648     int i;
01649     for (i = 0; i < width; i++) {
01650         unsigned int g   = gsrc[2*i] + gsrc[2*i+1];
01651         unsigned int b   = bsrc[2*i] + bsrc[2*i+1];
01652         unsigned int r   = rsrc[2*i] + rsrc[2*i+1];
01653 
01654         dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
01655         dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
01656     }
01657 }
01658 
01659 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
01660 {
01661     int i;
01662     for (i=0; i<width; i++) {
01663         dst[i]= src[4*i]<<6;
01664     }
01665 }
01666 
01667 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
01668 {
01669     int i;
01670     for (i=0; i<width; i++) {
01671         dst[i]= src[4*i+3]<<6;
01672     }
01673 }
01674 
01675 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
01676 {
01677     int i;
01678     for (i=0; i<width; i++) {
01679         int d= src[i];
01680 
01681         dst[i]= (pal[d] >> 24)<<6;
01682     }
01683 }
01684 
01685 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
01686 {
01687     int i;
01688     for (i=0; i<width; i++) {
01689         int d= src[i];
01690 
01691         dst[i]= (pal[d] & 0xFF)<<6;
01692     }
01693 }
01694 
01695 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
01696                            const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
01697                            int width, uint32_t *pal)
01698 {
01699     int i;
01700     assert(src1 == src2);
01701     for (i=0; i<width; i++) {
01702         int p= pal[src1[i]];
01703 
01704         dstU[i]= (uint8_t)(p>> 8)<<6;
01705         dstV[i]= (uint8_t)(p>>16)<<6;
01706     }
01707 }
01708 
01709 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
01710 {
01711     int i, j;
01712     for (i=0; i<width/8; i++) {
01713         int d= ~src[i];
01714         for(j=0; j<8; j++)
01715             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01716     }
01717     if(width&7){
01718         int d= ~src[i];
01719         for(j=0; j<(width&7); j++)
01720             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01721     }
01722 }
01723 
01724 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
01725 {
01726     int i, j;
01727     for (i=0; i<width/8; i++) {
01728         int d= src[i];
01729         for(j=0; j<8; j++)
01730             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01731     }
01732     if(width&7){
01733         int d= src[i];
01734         for(j=0; j<(width&7); j++)
01735             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01736     }
01737 }
01738 
01739 //FIXME yuy2* can read up to 7 samples too much
01740 
01741 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
01742                       uint32_t *unused)
01743 {
01744     int i;
01745     for (i=0; i<width; i++)
01746         dst[i]= src[2*i];
01747 }
01748 
01749 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01750                        const uint8_t *src2, int width, uint32_t *unused)
01751 {
01752     int i;
01753     for (i=0; i<width; i++) {
01754         dstU[i]= src1[4*i + 1];
01755         dstV[i]= src1[4*i + 3];
01756     }
01757     assert(src1 == src2);
01758 }
01759 
01760 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
01761 {
01762     int i;
01763     const uint16_t *src = (const uint16_t *) _src;
01764     uint16_t *dst = (uint16_t *) _dst;
01765     for (i=0; i<width; i++) {
01766         dst[i] = av_bswap16(src[i]);
01767     }
01768 }
01769 
01770 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
01771                         const uint8_t *_src2, int width, uint32_t *unused)
01772 {
01773     int i;
01774     const uint16_t *src1 = (const uint16_t *) _src1,
01775                    *src2 = (const uint16_t *) _src2;
01776     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
01777     for (i=0; i<width; i++) {
01778         dstU[i] = av_bswap16(src1[i]);
01779         dstV[i] = av_bswap16(src2[i]);
01780     }
01781 }
01782 
01783 /* This is almost identical to the previous, end exists only because
01784  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
01785 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
01786                       uint32_t *unused)
01787 {
01788     int i;
01789     for (i=0; i<width; i++)
01790         dst[i]= src[2*i+1];
01791 }
01792 
01793 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01794                        const uint8_t *src2, int width, uint32_t *unused)
01795 {
01796     int i;
01797     for (i=0; i<width; i++) {
01798         dstU[i]= src1[4*i + 0];
01799         dstV[i]= src1[4*i + 2];
01800     }
01801     assert(src1 == src2);
01802 }
01803 
01804 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
01805                                         const uint8_t *src, int width)
01806 {
01807     int i;
01808     for (i = 0; i < width; i++) {
01809         dst1[i] = src[2*i+0];
01810         dst2[i] = src[2*i+1];
01811     }
01812 }
01813 
01814 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
01815                        const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
01816                        int width, uint32_t *unused)
01817 {
01818     nvXXtoUV_c(dstU, dstV, src1, width);
01819 }
01820 
01821 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
01822                        const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
01823                        int width, uint32_t *unused)
01824 {
01825     nvXXtoUV_c(dstV, dstU, src1, width);
01826 }
01827 
01828 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01829 
01830 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
01831                        int width, uint32_t *unused)
01832 {
01833     int i;
01834     for (i=0; i<width; i++) {
01835         int b= src[i*3+0];
01836         int g= src[i*3+1];
01837         int r= src[i*3+2];
01838 
01839         dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
01840     }
01841 }
01842 
01843 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01844                         const uint8_t *src2, int width, uint32_t *unused)
01845 {
01846     int i;
01847     for (i=0; i<width; i++) {
01848         int b= src1[3*i + 0];
01849         int g= src1[3*i + 1];
01850         int r= src1[3*i + 2];
01851 
01852         dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01853         dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01854     }
01855     assert(src1 == src2);
01856 }
01857 
01858 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01859                              const uint8_t *src2, int width, uint32_t *unused)
01860 {
01861     int i;
01862     for (i=0; i<width; i++) {
01863         int b= src1[6*i + 0] + src1[6*i + 3];
01864         int g= src1[6*i + 1] + src1[6*i + 4];
01865         int r= src1[6*i + 2] + src1[6*i + 5];
01866 
01867         dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01868         dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01869     }
01870     assert(src1 == src2);
01871 }
01872 
01873 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
01874                        uint32_t *unused)
01875 {
01876     int i;
01877     for (i=0; i<width; i++) {
01878         int r= src[i*3+0];
01879         int g= src[i*3+1];
01880         int b= src[i*3+2];
01881 
01882         dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
01883     }
01884 }
01885 
01886 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01887                         const uint8_t *src2, int width, uint32_t *unused)
01888 {
01889     int i;
01890     assert(src1==src2);
01891     for (i=0; i<width; i++) {
01892         int r= src1[3*i + 0];
01893         int g= src1[3*i + 1];
01894         int b= src1[3*i + 2];
01895 
01896         dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01897         dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01898     }
01899 }
01900 
01901 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01902                                     const uint8_t *src2, int width, uint32_t *unused)
01903 {
01904     int i;
01905     assert(src1==src2);
01906     for (i=0; i<width; i++) {
01907         int r= src1[6*i + 0] + src1[6*i + 3];
01908         int g= src1[6*i + 1] + src1[6*i + 4];
01909         int b= src1[6*i + 2] + src1[6*i + 5];
01910 
01911         dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01912         dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01913     }
01914 }
01915 
01916 static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
01917 {
01918     int i;
01919     for (i = 0; i < width; i++) {
01920         int g = src[0][i];
01921         int b = src[1][i];
01922         int r = src[2][i];
01923 
01924         dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
01925     }
01926 }
01927 
01928 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
01929 {
01930     int i;
01931     const uint16_t **src = (const uint16_t **) _src;
01932     uint16_t *dst = (uint16_t *) _dst;
01933     for (i = 0; i < width; i++) {
01934         int g = AV_RL16(src[0] + i);
01935         int b = AV_RL16(src[1] + i);
01936         int r = AV_RL16(src[2] + i);
01937 
01938         dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
01939     }
01940 }
01941 
01942 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
01943 {
01944     int i;
01945     const uint16_t **src = (const uint16_t **) _src;
01946     uint16_t *dst = (uint16_t *) _dst;
01947     for (i = 0; i < width; i++) {
01948         int g = AV_RB16(src[0] + i);
01949         int b = AV_RB16(src[1] + i);
01950         int r = AV_RB16(src[2] + i);
01951 
01952         dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
01953     }
01954 }
01955 
01956 static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
01957 {
01958     int i;
01959     for (i = 0; i < width; i++) {
01960         int g = src[0][i];
01961         int b = src[1][i];
01962         int r = src[2][i];
01963 
01964         dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
01965         dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
01966     }
01967 }
01968 
01969 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
01970 {
01971     int i;
01972     const uint16_t **src = (const uint16_t **) _src;
01973     uint16_t *dstU = (uint16_t *) _dstU;
01974     uint16_t *dstV = (uint16_t *) _dstV;
01975     for (i = 0; i < width; i++) {
01976         int g = AV_RL16(src[0] + i);
01977         int b = AV_RL16(src[1] + i);
01978         int r = AV_RL16(src[2] + i);
01979 
01980         dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01981         dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01982     }
01983 }
01984 
01985 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
01986 {
01987     int i;
01988     const uint16_t **src = (const uint16_t **) _src;
01989     uint16_t *dstU = (uint16_t *) _dstU;
01990     uint16_t *dstV = (uint16_t *) _dstV;
01991     for (i = 0; i < width; i++) {
01992         int g = AV_RB16(src[0] + i);
01993         int b = AV_RB16(src[1] + i);
01994         int r = AV_RB16(src[2] + i);
01995 
01996         dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01997         dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01998     }
01999 }
02000 
02001 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
02002                            const int16_t *filter,
02003                            const int32_t *filterPos, int filterSize)
02004 {
02005     int i;
02006     int32_t *dst = (int32_t *) _dst;
02007     const uint16_t *src = (const uint16_t *) _src;
02008     int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
02009     int sh = bits - 4;
02010 
02011     if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
02012         sh= 9;
02013 
02014     for (i = 0; i < dstW; i++) {
02015         int j;
02016         int srcPos = filterPos[i];
02017         int val = 0;
02018 
02019         for (j = 0; j < filterSize; j++) {
02020             val += src[srcPos + j] * filter[filterSize * i + j];
02021         }
02022         // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
02023         dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
02024     }
02025 }
02026 
02027 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
02028                            const int16_t *filter,
02029                            const int32_t *filterPos, int filterSize)
02030 {
02031     int i;
02032     const uint16_t *src = (const uint16_t *) _src;
02033     int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
02034 
02035     if(sh<15)
02036         sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
02037 
02038     for (i = 0; i < dstW; i++) {
02039         int j;
02040         int srcPos = filterPos[i];
02041         int val = 0;
02042 
02043         for (j = 0; j < filterSize; j++) {
02044             val += src[srcPos + j] * filter[filterSize * i + j];
02045         }
02046         // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
02047         dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
02048     }
02049 }
02050 
02051 // bilinear / bicubic scaling
02052 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
02053                           const int16_t *filter, const int32_t *filterPos,
02054                           int filterSize)
02055 {
02056     int i;
02057     for (i=0; i<dstW; i++) {
02058         int j;
02059         int srcPos= filterPos[i];
02060         int val=0;
02061         for (j=0; j<filterSize; j++) {
02062             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02063         }
02064         //filter += hFilterSize;
02065         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
02066         //dst[i] = val>>7;
02067     }
02068 }
02069 
02070 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
02071                           const int16_t *filter, const int32_t *filterPos,
02072                           int filterSize)
02073 {
02074     int i;
02075     int32_t *dst = (int32_t *) _dst;
02076     for (i=0; i<dstW; i++) {
02077         int j;
02078         int srcPos= filterPos[i];
02079         int val=0;
02080         for (j=0; j<filterSize; j++) {
02081             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02082         }
02083         //filter += hFilterSize;
02084         dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
02085         //dst[i] = val>>7;
02086     }
02087 }
02088 
02089 //FIXME all pal and rgb srcFormats could do this convertion as well
02090 //FIXME all scalers more complex than bilinear could do half of this transform
02091 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
02092 {
02093     int i;
02094     for (i = 0; i < width; i++) {
02095         dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
02096         dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
02097     }
02098 }
02099 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
02100 {
02101     int i;
02102     for (i = 0; i < width; i++) {
02103         dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
02104         dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
02105     }
02106 }
02107 static void lumRangeToJpeg_c(int16_t *dst, int width)
02108 {
02109     int i;
02110     for (i = 0; i < width; i++)
02111         dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
02112 }
02113 static void lumRangeFromJpeg_c(int16_t *dst, int width)
02114 {
02115     int i;
02116     for (i = 0; i < width; i++)
02117         dst[i] = (dst[i]*14071 + 33561947)>>14;
02118 }
02119 
02120 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
02121 {
02122     int i;
02123     int32_t *dstU = (int32_t *) _dstU;
02124     int32_t *dstV = (int32_t *) _dstV;
02125     for (i = 0; i < width; i++) {
02126         dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
02127         dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
02128     }
02129 }
02130 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
02131 {
02132     int i;
02133     int32_t *dstU = (int32_t *) _dstU;
02134     int32_t *dstV = (int32_t *) _dstV;
02135     for (i = 0; i < width; i++) {
02136         dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
02137         dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
02138     }
02139 }
02140 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
02141 {
02142     int i;
02143     int32_t *dst = (int32_t *) _dst;
02144     for (i = 0; i < width; i++)
02145         dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
02146 }
02147 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
02148 {
02149     int i;
02150     int32_t *dst = (int32_t *) _dst;
02151     for (i = 0; i < width; i++)
02152         dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
02153 }
02154 
02155 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
02156                            const uint8_t *src, int srcW, int xInc)
02157 {
02158     int i;
02159     unsigned int xpos=0;
02160     for (i=0;i<dstWidth;i++) {
02161         register unsigned int xx=xpos>>16;
02162         register unsigned int xalpha=(xpos&0xFFFF)>>9;
02163         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
02164         xpos+=xInc;
02165     }
02166     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
02167         dst[i] = src[srcW-1]*128;
02168 }
02169 
02170 // *** horizontal scale Y line to temp buffer
02171 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
02172                                      const uint8_t *src_in[4], int srcW, int xInc,
02173                                      const int16_t *hLumFilter,
02174                                      const int32_t *hLumFilterPos, int hLumFilterSize,
02175                                      uint8_t *formatConvBuffer,
02176                                      uint32_t *pal, int isAlpha)
02177 {
02178     void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
02179     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
02180     const uint8_t *src = src_in[isAlpha ? 3 : 0];
02181 
02182     if (toYV12) {
02183         toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
02184         src= formatConvBuffer;
02185     } else if (c->readLumPlanar && !isAlpha) {
02186         c->readLumPlanar(formatConvBuffer, src_in, srcW);
02187         src = formatConvBuffer;
02188     }
02189 
02190     if (!c->hyscale_fast) {
02191         c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
02192     } else { // fast bilinear upscale / crap downscale
02193         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
02194     }
02195 
02196     if (convertRange)
02197         convertRange(dst, dstWidth);
02198 }
02199 
02200 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
02201                            int dstWidth, const uint8_t *src1,
02202                            const uint8_t *src2, int srcW, int xInc)
02203 {
02204     int i;
02205     unsigned int xpos=0;
02206     for (i=0;i<dstWidth;i++) {
02207         register unsigned int xx=xpos>>16;
02208         register unsigned int xalpha=(xpos&0xFFFF)>>9;
02209         dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
02210         dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
02211         xpos+=xInc;
02212     }
02213     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
02214         dst1[i] = src1[srcW-1]*128;
02215         dst2[i] = src2[srcW-1]*128;
02216     }
02217 }
02218 
02219 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
02220                                      const uint8_t *src_in[4],
02221                                      int srcW, int xInc, const int16_t *hChrFilter,
02222                                      const int32_t *hChrFilterPos, int hChrFilterSize,
02223                                      uint8_t *formatConvBuffer, uint32_t *pal)
02224 {
02225     const uint8_t *src1 = src_in[1], *src2 = src_in[2];
02226     if (c->chrToYV12) {
02227         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
02228         c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
02229         src1= formatConvBuffer;
02230         src2= buf2;
02231     } else if (c->readChrPlanar) {
02232         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
02233         c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
02234         src1= formatConvBuffer;
02235         src2= buf2;
02236     }
02237 
02238     if (!c->hcscale_fast) {
02239         c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
02240         c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
02241     } else { // fast bilinear upscale / crap downscale
02242         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
02243     }
02244 
02245     if (c->chrConvertRange)
02246         c->chrConvertRange(dst1, dst2, dstWidth);
02247 }
02248 
02249 static av_always_inline void
02250 find_c_packed_planar_out_funcs(SwsContext *c,
02251                                yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
02252                                yuv2interleavedX_fn *yuv2nv12cX,
02253                                yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
02254                                yuv2packedX_fn *yuv2packedX)
02255 {
02256     enum PixelFormat dstFormat = c->dstFormat;
02257 
02258     if (is16BPS(dstFormat)) {
02259         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
02260         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
02261     } else if (is9_OR_10BPS(dstFormat)) {
02262         if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
02263             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
02264             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
02265         } else {
02266             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
02267             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
02268         }
02269     } else {
02270         *yuv2plane1 = yuv2plane1_8_c;
02271         *yuv2planeX = yuv2planeX_8_c;
02272         if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
02273             *yuv2nv12cX = yuv2nv12cX_c;
02274     }
02275 
02276     if(c->flags & SWS_FULL_CHR_H_INT) {
02277         switch (dstFormat) {
02278             case PIX_FMT_RGBA:
02279 #if CONFIG_SMALL
02280                 *yuv2packedX = yuv2rgba32_full_X_c;
02281 #else
02282 #if CONFIG_SWSCALE_ALPHA
02283                 if (c->alpPixBuf) {
02284                     *yuv2packedX = yuv2rgba32_full_X_c;
02285                 } else
02286 #endif /* CONFIG_SWSCALE_ALPHA */
02287                 {
02288                     *yuv2packedX = yuv2rgbx32_full_X_c;
02289                 }
02290 #endif /* !CONFIG_SMALL */
02291                 break;
02292             case PIX_FMT_ARGB:
02293 #if CONFIG_SMALL
02294                 *yuv2packedX = yuv2argb32_full_X_c;
02295 #else
02296 #if CONFIG_SWSCALE_ALPHA
02297                 if (c->alpPixBuf) {
02298                     *yuv2packedX = yuv2argb32_full_X_c;
02299                 } else
02300 #endif /* CONFIG_SWSCALE_ALPHA */
02301                 {
02302                     *yuv2packedX = yuv2xrgb32_full_X_c;
02303                 }
02304 #endif /* !CONFIG_SMALL */
02305                 break;
02306             case PIX_FMT_BGRA:
02307 #if CONFIG_SMALL
02308                 *yuv2packedX = yuv2bgra32_full_X_c;
02309 #else
02310 #if CONFIG_SWSCALE_ALPHA
02311                 if (c->alpPixBuf) {
02312                     *yuv2packedX = yuv2bgra32_full_X_c;
02313                 } else
02314 #endif /* CONFIG_SWSCALE_ALPHA */
02315                 {
02316                     *yuv2packedX = yuv2bgrx32_full_X_c;
02317                 }
02318 #endif /* !CONFIG_SMALL */
02319                 break;
02320             case PIX_FMT_ABGR:
02321 #if CONFIG_SMALL
02322                 *yuv2packedX = yuv2abgr32_full_X_c;
02323 #else
02324 #if CONFIG_SWSCALE_ALPHA
02325                 if (c->alpPixBuf) {
02326                     *yuv2packedX = yuv2abgr32_full_X_c;
02327                 } else
02328 #endif /* CONFIG_SWSCALE_ALPHA */
02329                 {
02330                     *yuv2packedX = yuv2xbgr32_full_X_c;
02331                 }
02332 #endif /* !CONFIG_SMALL */
02333                 break;
02334             case PIX_FMT_RGB24:
02335             *yuv2packedX = yuv2rgb24_full_X_c;
02336             break;
02337         case PIX_FMT_BGR24:
02338             *yuv2packedX = yuv2bgr24_full_X_c;
02339             break;
02340         }
02341         if(!*yuv2packedX)
02342             goto YUV_PACKED;
02343     } else {
02344         YUV_PACKED:
02345         switch (dstFormat) {
02346         case PIX_FMT_RGB48LE:
02347             *yuv2packed1 = yuv2rgb48le_1_c;
02348             *yuv2packed2 = yuv2rgb48le_2_c;
02349             *yuv2packedX = yuv2rgb48le_X_c;
02350             break;
02351         case PIX_FMT_RGB48BE:
02352             *yuv2packed1 = yuv2rgb48be_1_c;
02353             *yuv2packed2 = yuv2rgb48be_2_c;
02354             *yuv2packedX = yuv2rgb48be_X_c;
02355             break;
02356         case PIX_FMT_BGR48LE:
02357             *yuv2packed1 = yuv2bgr48le_1_c;
02358             *yuv2packed2 = yuv2bgr48le_2_c;
02359             *yuv2packedX = yuv2bgr48le_X_c;
02360             break;
02361         case PIX_FMT_BGR48BE:
02362             *yuv2packed1 = yuv2bgr48be_1_c;
02363             *yuv2packed2 = yuv2bgr48be_2_c;
02364             *yuv2packedX = yuv2bgr48be_X_c;
02365             break;
02366         case PIX_FMT_RGB32:
02367         case PIX_FMT_BGR32:
02368 #if CONFIG_SMALL
02369             *yuv2packed1 = yuv2rgb32_1_c;
02370             *yuv2packed2 = yuv2rgb32_2_c;
02371             *yuv2packedX = yuv2rgb32_X_c;
02372 #else
02373 #if CONFIG_SWSCALE_ALPHA
02374                 if (c->alpPixBuf) {
02375                     *yuv2packed1 = yuv2rgba32_1_c;
02376                     *yuv2packed2 = yuv2rgba32_2_c;
02377                     *yuv2packedX = yuv2rgba32_X_c;
02378                 } else
02379 #endif /* CONFIG_SWSCALE_ALPHA */
02380                 {
02381                     *yuv2packed1 = yuv2rgbx32_1_c;
02382                     *yuv2packed2 = yuv2rgbx32_2_c;
02383                     *yuv2packedX = yuv2rgbx32_X_c;
02384                 }
02385 #endif /* !CONFIG_SMALL */
02386             break;
02387         case PIX_FMT_RGB32_1:
02388         case PIX_FMT_BGR32_1:
02389 #if CONFIG_SMALL
02390                 *yuv2packed1 = yuv2rgb32_1_1_c;
02391                 *yuv2packed2 = yuv2rgb32_1_2_c;
02392                 *yuv2packedX = yuv2rgb32_1_X_c;
02393 #else
02394 #if CONFIG_SWSCALE_ALPHA
02395                 if (c->alpPixBuf) {
02396                     *yuv2packed1 = yuv2rgba32_1_1_c;
02397                     *yuv2packed2 = yuv2rgba32_1_2_c;
02398                     *yuv2packedX = yuv2rgba32_1_X_c;
02399                 } else
02400 #endif /* CONFIG_SWSCALE_ALPHA */
02401                 {
02402                     *yuv2packed1 = yuv2rgbx32_1_1_c;
02403                     *yuv2packed2 = yuv2rgbx32_1_2_c;
02404                     *yuv2packedX = yuv2rgbx32_1_X_c;
02405                 }
02406 #endif /* !CONFIG_SMALL */
02407                 break;
02408         case PIX_FMT_RGB24:
02409             *yuv2packed1 = yuv2rgb24_1_c;
02410             *yuv2packed2 = yuv2rgb24_2_c;
02411             *yuv2packedX = yuv2rgb24_X_c;
02412             break;
02413         case PIX_FMT_BGR24:
02414             *yuv2packed1 = yuv2bgr24_1_c;
02415             *yuv2packed2 = yuv2bgr24_2_c;
02416             *yuv2packedX = yuv2bgr24_X_c;
02417             break;
02418         case PIX_FMT_RGB565LE:
02419         case PIX_FMT_RGB565BE:
02420         case PIX_FMT_BGR565LE:
02421         case PIX_FMT_BGR565BE:
02422             *yuv2packed1 = yuv2rgb16_1_c;
02423             *yuv2packed2 = yuv2rgb16_2_c;
02424             *yuv2packedX = yuv2rgb16_X_c;
02425             break;
02426         case PIX_FMT_RGB555LE:
02427         case PIX_FMT_RGB555BE:
02428         case PIX_FMT_BGR555LE:
02429         case PIX_FMT_BGR555BE:
02430             *yuv2packed1 = yuv2rgb15_1_c;
02431             *yuv2packed2 = yuv2rgb15_2_c;
02432             *yuv2packedX = yuv2rgb15_X_c;
02433             break;
02434         case PIX_FMT_RGB444LE:
02435         case PIX_FMT_RGB444BE:
02436         case PIX_FMT_BGR444LE:
02437         case PIX_FMT_BGR444BE:
02438             *yuv2packed1 = yuv2rgb12_1_c;
02439             *yuv2packed2 = yuv2rgb12_2_c;
02440             *yuv2packedX = yuv2rgb12_X_c;
02441             break;
02442         case PIX_FMT_RGB8:
02443         case PIX_FMT_BGR8:
02444             *yuv2packed1 = yuv2rgb8_1_c;
02445             *yuv2packed2 = yuv2rgb8_2_c;
02446             *yuv2packedX = yuv2rgb8_X_c;
02447             break;
02448         case PIX_FMT_RGB4:
02449         case PIX_FMT_BGR4:
02450             *yuv2packed1 = yuv2rgb4_1_c;
02451             *yuv2packed2 = yuv2rgb4_2_c;
02452             *yuv2packedX = yuv2rgb4_X_c;
02453             break;
02454         case PIX_FMT_RGB4_BYTE:
02455         case PIX_FMT_BGR4_BYTE:
02456             *yuv2packed1 = yuv2rgb4b_1_c;
02457             *yuv2packed2 = yuv2rgb4b_2_c;
02458             *yuv2packedX = yuv2rgb4b_X_c;
02459             break;
02460         }
02461     }
02462     switch (dstFormat) {
02463     case PIX_FMT_GRAY16BE:
02464         *yuv2packed1 = yuv2gray16BE_1_c;
02465         *yuv2packed2 = yuv2gray16BE_2_c;
02466         *yuv2packedX = yuv2gray16BE_X_c;
02467         break;
02468     case PIX_FMT_GRAY16LE:
02469         *yuv2packed1 = yuv2gray16LE_1_c;
02470         *yuv2packed2 = yuv2gray16LE_2_c;
02471         *yuv2packedX = yuv2gray16LE_X_c;
02472         break;
02473     case PIX_FMT_MONOWHITE:
02474         *yuv2packed1 = yuv2monowhite_1_c;
02475         *yuv2packed2 = yuv2monowhite_2_c;
02476         *yuv2packedX = yuv2monowhite_X_c;
02477         break;
02478     case PIX_FMT_MONOBLACK:
02479         *yuv2packed1 = yuv2monoblack_1_c;
02480         *yuv2packed2 = yuv2monoblack_2_c;
02481         *yuv2packedX = yuv2monoblack_X_c;
02482         break;
02483     case PIX_FMT_YUYV422:
02484         *yuv2packed1 = yuv2yuyv422_1_c;
02485         *yuv2packed2 = yuv2yuyv422_2_c;
02486         *yuv2packedX = yuv2yuyv422_X_c;
02487         break;
02488     case PIX_FMT_UYVY422:
02489         *yuv2packed1 = yuv2uyvy422_1_c;
02490         *yuv2packed2 = yuv2uyvy422_2_c;
02491         *yuv2packedX = yuv2uyvy422_X_c;
02492         break;
02493     }
02494 }
02495 
02496 #define DEBUG_SWSCALE_BUFFERS 0
02497 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
02498 
02499 static int swScale(SwsContext *c, const uint8_t* src[],
02500                    int srcStride[], int srcSliceY,
02501                    int srcSliceH, uint8_t* dst[], int dstStride[])
02502 {
02503     /* load a few things into local vars to make the code more readable? and faster */
02504     const int srcW= c->srcW;
02505     const int dstW= c->dstW;
02506     const int dstH= c->dstH;
02507     const int chrDstW= c->chrDstW;
02508     const int chrSrcW= c->chrSrcW;
02509     const int lumXInc= c->lumXInc;
02510     const int chrXInc= c->chrXInc;
02511     const enum PixelFormat dstFormat= c->dstFormat;
02512     const int flags= c->flags;
02513     int32_t *vLumFilterPos= c->vLumFilterPos;
02514     int32_t *vChrFilterPos= c->vChrFilterPos;
02515     int32_t *hLumFilterPos= c->hLumFilterPos;
02516     int32_t *hChrFilterPos= c->hChrFilterPos;
02517     int16_t *hLumFilter= c->hLumFilter;
02518     int16_t *hChrFilter= c->hChrFilter;
02519     int32_t *lumMmxFilter= c->lumMmxFilter;
02520     int32_t *chrMmxFilter= c->chrMmxFilter;
02521     int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
02522     const int vLumFilterSize= c->vLumFilterSize;
02523     const int vChrFilterSize= c->vChrFilterSize;
02524     const int hLumFilterSize= c->hLumFilterSize;
02525     const int hChrFilterSize= c->hChrFilterSize;
02526     int16_t **lumPixBuf= c->lumPixBuf;
02527     int16_t **chrUPixBuf= c->chrUPixBuf;
02528     int16_t **chrVPixBuf= c->chrVPixBuf;
02529     int16_t **alpPixBuf= c->alpPixBuf;
02530     const int vLumBufSize= c->vLumBufSize;
02531     const int vChrBufSize= c->vChrBufSize;
02532     uint8_t *formatConvBuffer= c->formatConvBuffer;
02533     const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02534     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02535     int lastDstY;
02536     uint32_t *pal=c->pal_yuv;
02537     int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
02538 
02539     yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
02540     yuv2planarX_fn yuv2planeX = c->yuv2planeX;
02541     yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
02542     yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
02543     yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
02544     yuv2packedX_fn yuv2packedX = c->yuv2packedX;
02545 
02546     /* vars which will change and which we need to store back in the context */
02547     int dstY= c->dstY;
02548     int lumBufIndex= c->lumBufIndex;
02549     int chrBufIndex= c->chrBufIndex;
02550     int lastInLumBuf= c->lastInLumBuf;
02551     int lastInChrBuf= c->lastInChrBuf;
02552 
02553     if (isPacked(c->srcFormat)) {
02554         src[0]=
02555         src[1]=
02556         src[2]=
02557         src[3]= src[0];
02558         srcStride[0]=
02559         srcStride[1]=
02560         srcStride[2]=
02561         srcStride[3]= srcStride[0];
02562     }
02563     srcStride[1]<<= c->vChrDrop;
02564     srcStride[2]<<= c->vChrDrop;
02565 
02566     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
02567                   src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
02568                   dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
02569     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
02570                    srcSliceY,    srcSliceH,    dstY,    dstH);
02571     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
02572                    vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
02573 
02574     if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
02575         static int warnedAlready=0; //FIXME move this into the context perhaps
02576         if (flags & SWS_PRINT_INFO && !warnedAlready) {
02577             av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
02578                    "         ->cannot do aligned memory accesses anymore\n");
02579             warnedAlready=1;
02580         }
02581     }
02582 
02583     if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
02584         || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
02585         || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
02586     ) {
02587         static int warnedAlready=0;
02588         int cpu_flags = av_get_cpu_flags();
02589         if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
02590             av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
02591             warnedAlready=1;
02592         }
02593     }
02594 
02595     /* Note the user might start scaling the picture in the middle so this
02596        will not get executed. This is not really intended but works
02597        currently, so people might do it. */
02598     if (srcSliceY ==0) {
02599         lumBufIndex=-1;
02600         chrBufIndex=-1;
02601         dstY=0;
02602         lastInLumBuf= -1;
02603         lastInChrBuf= -1;
02604     }
02605 
02606     if (!should_dither) {
02607         c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
02608     }
02609     lastDstY= dstY;
02610 
02611     for (;dstY < dstH; dstY++) {
02612         const int chrDstY= dstY>>c->chrDstVSubSample;
02613         uint8_t *dest[4] = {
02614             dst[0] + dstStride[0] * dstY,
02615             dst[1] + dstStride[1] * chrDstY,
02616             dst[2] + dstStride[2] * chrDstY,
02617             (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
02618         };
02619         int use_mmx_vfilter= c->use_mmx_vfilter;
02620 
02621         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
02622         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
02623         const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
02624         int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
02625         int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
02626         int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
02627         int enough_lines;
02628 
02629         //handle holes (FAST_BILINEAR & weird filters)
02630         if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
02631         if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
02632         assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
02633         assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
02634 
02635         DEBUG_BUFFERS("dstY: %d\n", dstY);
02636         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
02637                          firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
02638         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
02639                          firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
02640 
02641         // Do we have enough lines in this slice to output the dstY line
02642         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
02643 
02644         if (!enough_lines) {
02645             lastLumSrcY = srcSliceY + srcSliceH - 1;
02646             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
02647             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
02648                                             lastLumSrcY, lastChrSrcY);
02649         }
02650 
02651         //Do horizontal scaling
02652         while(lastInLumBuf < lastLumSrcY) {
02653             const uint8_t *src1[4] = {
02654                 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
02655                 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
02656                 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
02657                 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
02658             };
02659             lumBufIndex++;
02660             assert(lumBufIndex < 2*vLumBufSize);
02661             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
02662             assert(lastInLumBuf + 1 - srcSliceY >= 0);
02663             hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
02664                     hLumFilter, hLumFilterPos, hLumFilterSize,
02665                     formatConvBuffer,
02666                     pal, 0);
02667             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
02668                 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
02669                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
02670                         formatConvBuffer,
02671                         pal, 1);
02672             lastInLumBuf++;
02673             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
02674                                lumBufIndex,    lastInLumBuf);
02675         }
02676         while(lastInChrBuf < lastChrSrcY) {
02677             const uint8_t *src1[4] = {
02678                 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
02679                 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
02680                 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
02681                 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
02682             };
02683             chrBufIndex++;
02684             assert(chrBufIndex < 2*vChrBufSize);
02685             assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
02686             assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
02687             //FIXME replace parameters through context struct (some at least)
02688 
02689             if (c->needs_hcscale)
02690                 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
02691                           chrDstW, src1, chrSrcW, chrXInc,
02692                           hChrFilter, hChrFilterPos, hChrFilterSize,
02693                           formatConvBuffer, pal);
02694             lastInChrBuf++;
02695             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
02696                                chrBufIndex,    lastInChrBuf);
02697         }
02698         //wrap buf index around to stay inside the ring buffer
02699         if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
02700         if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
02701         if (!enough_lines)
02702             break; //we can't output a dstY line so let's try with the next slice
02703 
02704 #if HAVE_MMX
02705         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
02706 #endif
02707         if (should_dither) {
02708             c->chrDither8 = dither_8x8_128[chrDstY & 7];
02709             c->lumDither8 = dither_8x8_128[dstY & 7];
02710         }
02711         if (dstY >= dstH-2) {
02712             // hmm looks like we can't use MMX here without overwriting this array's tail
02713             find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX,  &yuv2nv12cX,
02714                                            &yuv2packed1, &yuv2packed2, &yuv2packedX);
02715             use_mmx_vfilter= 0;
02716         }
02717 
02718         {
02719             const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02720             const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02721             const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02722             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
02723             int16_t *vLumFilter= c->vLumFilter;
02724             int16_t *vChrFilter= c->vChrFilter;
02725 
02726             if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
02727                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02728 
02729                 vLumFilter +=    dstY * vLumFilterSize;
02730                 vChrFilter += chrDstY * vChrFilterSize;
02731 
02732                 av_assert0(use_mmx_vfilter != (
02733                                yuv2planeX == yuv2planeX_10BE_c
02734                             || yuv2planeX == yuv2planeX_10LE_c
02735                             || yuv2planeX == yuv2planeX_9BE_c
02736                             || yuv2planeX == yuv2planeX_9LE_c
02737                             || yuv2planeX == yuv2planeX_16BE_c
02738                             || yuv2planeX == yuv2planeX_16LE_c
02739                             || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
02740 
02741                 if(use_mmx_vfilter){
02742                     vLumFilter= c->lumMmxFilter;
02743                     vChrFilter= c->chrMmxFilter;
02744                 }
02745 
02746                 if (vLumFilterSize == 1) {
02747                     yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
02748                 } else {
02749                     yuv2planeX(vLumFilter, vLumFilterSize,
02750                                lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
02751                 }
02752 
02753                 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
02754                     if (yuv2nv12cX) {
02755                         yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
02756                     } else if (vChrFilterSize == 1) {
02757                         yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
02758                         yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
02759                     } else {
02760                         yuv2planeX(vChrFilter, vChrFilterSize,
02761                                    chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
02762                         yuv2planeX(vChrFilter, vChrFilterSize,
02763                                    chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
02764                     }
02765                 }
02766 
02767                 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
02768                     if(use_mmx_vfilter){
02769                         vLumFilter= c->alpMmxFilter;
02770                     }
02771                     if (vLumFilterSize == 1) {
02772                         yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
02773                     } else {
02774                         yuv2planeX(vLumFilter, vLumFilterSize,
02775                                    alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
02776                     }
02777                 }
02778             } else {
02779                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
02780                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
02781                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
02782                     int chrAlpha = vChrFilter[2 * dstY + 1];
02783                     yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
02784                                 alpPixBuf ? *alpSrcPtr : NULL,
02785                                 dest[0], dstW, chrAlpha, dstY);
02786                 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
02787                     int lumAlpha = vLumFilter[2 * dstY + 1];
02788                     int chrAlpha = vChrFilter[2 * dstY + 1];
02789                     lumMmxFilter[2] =
02790                     lumMmxFilter[3] = vLumFilter[2 * dstY   ] * 0x10001;
02791                     chrMmxFilter[2] =
02792                     chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
02793                     yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
02794                                 alpPixBuf ? alpSrcPtr : NULL,
02795                                 dest[0], dstW, lumAlpha, chrAlpha, dstY);
02796                 } else { //general RGB
02797                     yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
02798                                 lumSrcPtr, vLumFilterSize,
02799                                 vChrFilter + dstY * vChrFilterSize,
02800                                 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
02801                                 alpSrcPtr, dest[0], dstW, dstY);
02802                 }
02803             }
02804         }
02805     }
02806 
02807     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
02808         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
02809 
02810 #if HAVE_MMX2
02811     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
02812         __asm__ volatile("sfence":::"memory");
02813 #endif
02814     emms_c();
02815 
02816     /* store changed local vars back in the context */
02817     c->dstY= dstY;
02818     c->lumBufIndex= lumBufIndex;
02819     c->chrBufIndex= chrBufIndex;
02820     c->lastInLumBuf= lastInLumBuf;
02821     c->lastInChrBuf= lastInChrBuf;
02822 
02823     return dstY - lastDstY;
02824 }
02825 
02826 static av_cold void sws_init_swScale_c(SwsContext *c)
02827 {
02828     enum PixelFormat srcFormat = c->srcFormat;
02829 
02830     find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
02831                                    &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
02832                                    &c->yuv2packedX);
02833 
02834     c->chrToYV12 = NULL;
02835     switch(srcFormat) {
02836         case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
02837         case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
02838         case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
02839         case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
02840         case PIX_FMT_RGB8     :
02841         case PIX_FMT_BGR8     :
02842         case PIX_FMT_PAL8     :
02843         case PIX_FMT_BGR4_BYTE:
02844         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
02845         case PIX_FMT_GBRP9LE:
02846         case PIX_FMT_GBRP10LE:
02847         case PIX_FMT_GBRP16LE:  c->readChrPlanar = planar_rgb16le_to_uv; break;
02848         case PIX_FMT_GBRP9BE:
02849         case PIX_FMT_GBRP10BE:
02850         case PIX_FMT_GBRP16BE:  c->readChrPlanar = planar_rgb16be_to_uv; break;
02851         case PIX_FMT_GBRP:      c->readChrPlanar = planar_rgb_to_uv; break;
02852 #if HAVE_BIGENDIAN
02853         case PIX_FMT_YUV444P9LE:
02854         case PIX_FMT_YUV422P9LE:
02855         case PIX_FMT_YUV420P9LE:
02856         case PIX_FMT_YUV422P10LE:
02857         case PIX_FMT_YUV420P10LE:
02858         case PIX_FMT_YUV444P10LE:
02859         case PIX_FMT_YUV420P16LE:
02860         case PIX_FMT_YUV422P16LE:
02861         case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
02862 #else
02863         case PIX_FMT_YUV444P9BE:
02864         case PIX_FMT_YUV422P9BE:
02865         case PIX_FMT_YUV420P9BE:
02866         case PIX_FMT_YUV444P10BE:
02867         case PIX_FMT_YUV422P10BE:
02868         case PIX_FMT_YUV420P10BE:
02869         case PIX_FMT_YUV420P16BE:
02870         case PIX_FMT_YUV422P16BE:
02871         case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
02872 #endif
02873     }
02874     if (c->chrSrcHSubSample) {
02875         switch(srcFormat) {
02876         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
02877         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
02878         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
02879         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
02880         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_half_c;   break;
02881         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c;  break;
02882         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_half_c;   break;
02883         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
02884         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
02885         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
02886         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
02887         case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
02888         case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
02889         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_half_c;   break;
02890         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c;  break;
02891         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_half_c;   break;
02892         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
02893         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
02894         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
02895         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
02896         case PIX_FMT_GBR24P  : c->chrToYV12 = gbr24pToUV_half_c;  break;
02897         case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
02898         case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
02899         }
02900     } else {
02901         switch(srcFormat) {
02902         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
02903         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
02904         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
02905         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
02906         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_c;   break;
02907         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c;  break;
02908         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_c;   break;
02909         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
02910         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
02911         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
02912         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
02913         case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
02914         case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
02915         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_c;   break;
02916         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c;  break;
02917         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_c;   break;
02918         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
02919         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
02920         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
02921         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
02922         case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
02923         case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
02924         }
02925     }
02926 
02927     c->lumToYV12 = NULL;
02928     c->alpToYV12 = NULL;
02929     switch (srcFormat) {
02930     case PIX_FMT_GBRP9LE:
02931     case PIX_FMT_GBRP10LE:
02932     case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
02933     case PIX_FMT_GBRP9BE:
02934     case PIX_FMT_GBRP10BE:
02935     case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
02936     case PIX_FMT_GBRP:     c->readLumPlanar = planar_rgb_to_y; break;
02937 #if HAVE_BIGENDIAN
02938     case PIX_FMT_YUV444P9LE:
02939     case PIX_FMT_YUV422P9LE:
02940     case PIX_FMT_YUV420P9LE:
02941     case PIX_FMT_YUV422P10LE:
02942     case PIX_FMT_YUV420P10LE:
02943     case PIX_FMT_YUV444P10LE:
02944     case PIX_FMT_YUV420P16LE:
02945     case PIX_FMT_YUV422P16LE:
02946     case PIX_FMT_YUV444P16LE:
02947     case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
02948 #else
02949     case PIX_FMT_YUV444P9BE:
02950     case PIX_FMT_YUV422P9BE:
02951     case PIX_FMT_YUV420P9BE:
02952     case PIX_FMT_YUV444P10BE:
02953     case PIX_FMT_YUV422P10BE:
02954     case PIX_FMT_YUV420P10BE:
02955     case PIX_FMT_YUV420P16BE:
02956     case PIX_FMT_YUV422P16BE:
02957     case PIX_FMT_YUV444P16BE:
02958     case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
02959 #endif
02960     case PIX_FMT_YUYV422  :
02961     case PIX_FMT_Y400A    : c->lumToYV12 = yuy2ToY_c; break;
02962     case PIX_FMT_UYVY422  : c->lumToYV12 = uyvyToY_c;    break;
02963     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c;   break;
02964     case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
02965     case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
02966     case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
02967     case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
02968     case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
02969     case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
02970     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c;   break;
02971     case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
02972     case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
02973     case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
02974     case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
02975     case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
02976     case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
02977     case PIX_FMT_RGB8     :
02978     case PIX_FMT_BGR8     :
02979     case PIX_FMT_PAL8     :
02980     case PIX_FMT_BGR4_BYTE:
02981     case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
02982     case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
02983     case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
02984     case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY_c;  break;
02985     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
02986     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
02987     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
02988     case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
02989     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
02990     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
02991     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
02992     }
02993     if (c->alpPixBuf) {
02994         switch (srcFormat) {
02995         case PIX_FMT_BGRA:
02996         case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
02997         case PIX_FMT_ABGR:
02998         case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
02999         case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
03000         case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
03001         }
03002     }
03003 
03004 
03005     if (c->srcBpc == 8) {
03006         if (c->dstBpc <= 10) {
03007             c->hyScale = c->hcScale = hScale8To15_c;
03008             if (c->flags & SWS_FAST_BILINEAR) {
03009                 c->hyscale_fast = hyscale_fast_c;
03010                 c->hcscale_fast = hcscale_fast_c;
03011             }
03012         } else {
03013             c->hyScale = c->hcScale = hScale8To19_c;
03014         }
03015     } else {
03016         c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
03017     }
03018 
03019     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
03020         if (c->dstBpc <= 10) {
03021             if (c->srcRange) {
03022                 c->lumConvertRange = lumRangeFromJpeg_c;
03023                 c->chrConvertRange = chrRangeFromJpeg_c;
03024             } else {
03025                 c->lumConvertRange = lumRangeToJpeg_c;
03026                 c->chrConvertRange = chrRangeToJpeg_c;
03027             }
03028         } else {
03029             if (c->srcRange) {
03030                 c->lumConvertRange = lumRangeFromJpeg16_c;
03031                 c->chrConvertRange = chrRangeFromJpeg16_c;
03032             } else {
03033                 c->lumConvertRange = lumRangeToJpeg16_c;
03034                 c->chrConvertRange = chrRangeToJpeg16_c;
03035             }
03036         }
03037     }
03038 
03039     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
03040           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
03041         c->needs_hcscale = 1;
03042 }
03043 
03044 SwsFunc ff_getSwsFunc(SwsContext *c)
03045 {
03046     sws_init_swScale_c(c);
03047 
03048     if (HAVE_MMX)
03049         ff_sws_init_swScale_mmx(c);
03050     if (HAVE_ALTIVEC)
03051         ff_sws_init_swScale_altivec(c);
03052 
03053     return swScale;
03054 }