swscale.c 76.7 KB
Newer Older
M
Michael Niedermayer 已提交
1
/*
2
    Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
M
Michael Niedermayer 已提交
3 4 5 6 7

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
8

M
Michael Niedermayer 已提交
9 10 11 12
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
13

M
Michael Niedermayer 已提交
14 15
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
M
Michael Niedermayer 已提交
17
*/
18

19
/*
M
Michael Niedermayer 已提交
20
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
A
Alex Beregszaszi 已提交
21
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
22
  {BGR,RGB}{1,4,8,15,16} support dithering
23
  
24 25 26 27 28 29
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
  x -> x
  YUV9 -> YV12
  YUV9/YV12 -> Y800
  Y800 -> YUV9/YV12
M
Michael Niedermayer 已提交
30 31
  BGR24 -> BGR32 & RGB24 -> RGB32
  BGR32 -> BGR24 & RGB32 -> RGB24
M
Michael Niedermayer 已提交
32
  BGR15 -> BGR16
M
Michael Niedermayer 已提交
33 34 35
*/

/* 
36 37
tested special converters (most are tested actually but i didnt write it down ...)
 YV12 -> BGR16
M
Michael Niedermayer 已提交
38
 YV12 -> YV12
M
Michael Niedermayer 已提交
39
 BGR15 -> BGR16
40
 BGR16 -> BGR16
41
 YVU9 -> YV12
M
Michael Niedermayer 已提交
42 43

untested special converters
44 45 46
  YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
  YV12/I420 -> YV12/I420
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
M
Michael Niedermayer 已提交
47 48
  BGR24 -> BGR32 & RGB24 -> RGB32
  BGR32 -> BGR24 & RGB32 -> RGB24
M
Michael Niedermayer 已提交
49
  BGR24 -> YV12
50 51
*/

52
#include <inttypes.h>
53
#include <string.h>
54
#include <math.h>
55
#include <stdio.h>
B
Bohdan Horst 已提交
56
#include <unistd.h>
57
#include "config.h"
M
Michael Niedermayer 已提交
58
#include <assert.h>
59 60
#ifdef HAVE_MALLOC_H
#include <malloc.h>
61 62
#else
#include <stdlib.h>
63
#endif
64 65
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
66 67 68
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
69
#endif
70
#include "swscale.h"
71
#include "swscale_internal.h"
72
#include "x86_cpu.h"
73
#include "bswap.h"
74
#include "img_format.h"
75
#include "rgb2rgb.h"
76
#ifdef USE_FASTMEMCPY
77
#include "libvo/fastmemcpy.h"
78
#endif
A
Arpi 已提交
79

M
Michael Niedermayer 已提交
80
#undef MOVNTQ
M
Michael Niedermayer 已提交
81
#undef PAVGB
82

83
//#undef HAVE_MMX2
M
101  
Michael Niedermayer 已提交
84
//#define HAVE_3DNOW
85
//#undef HAVE_MMX
86
//#undef ARCH_X86
M
101++  
Michael Niedermayer 已提交
87
//#define WORDS_BIGENDIAN
88
#define DITHER1XBPP
89

M
Michael Niedermayer 已提交
90 91
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit

92
#define RET 0xC3 //near return opcode for X86
93

94
#ifdef MP_DEBUG
M
Michael Niedermayer 已提交
95
#define ASSERT(x) assert(x);
96
#else
97
#define ASSERT(x) ;
98 99 100 101 102 103 104
#endif

#ifdef M_PI
#define PI M_PI
#else
#define PI 3.14159265358979323846
#endif
105

106
//FIXME replace this with something faster
107
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
V
Ville Syrjälä 已提交
108
			|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21 \
109
			|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
M
Michael Niedermayer 已提交
110
#define isYUV(x)       ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))
111
#define isGray(x)      ((x)==IMGFMT_Y800)
M
Michael Niedermayer 已提交
112 113
#define isRGB(x)       (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
#define isBGR(x)       (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
114
#define isSupportedIn(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
M
Michael Niedermayer 已提交
115
			|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
116
			|| (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
117 118
			|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
			|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
A
Alex Beregszaszi 已提交
119
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
120
			|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
M
Michael Niedermayer 已提交
121
			|| isRGB(x) || isBGR(x)\
V
Ville Syrjälä 已提交
122
			|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21\
123
			|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
M
Michael Niedermayer 已提交
124
#define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))
125 126

#define RGB2YUV_SHIFT 16
127 128 129 130 131 132 133 134 135
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
136

137 138
extern const int32_t Inverse_Table_6_9[8][4];

139 140
/*
NOTES
141
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
142

143
TODO
144
more intelligent missalignment avoidance for the horizontal scaler
145 146
write special vertical cubic upscale version
Optimize C code (yv12 / minmax)
147
add support for packed pixel yuv input & output
148 149
add support for Y8 output
optimize bgr24 & bgr32
150
add BGR4 output support
151
write special BGR->BGR scaler
152
*/
153

M
Michael Niedermayer 已提交
154 155
#define MIN(a,b) ((a) > (b) ? (b) : (a))
#define MAX(a,b) ((a) < (b) ? (b) : (a))
156

157
#if defined(ARCH_X86) || defined(ARCH_X86_64)
158 159
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
160
static uint64_t __attribute__((aligned(8))) w10=       0x0010001000100010LL;
161 162 163 164 165
static uint64_t attribute_used __attribute__((aligned(8))) w02=       0x0002000200020002LL;
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
166

167 168 169 170
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
M
Michael Niedermayer 已提交
171 172 173 174 175 176 177 178

static uint64_t __attribute__((aligned(8))) dither4[2]={
	0x0103010301030103LL,
	0x0200020002000200LL,};

static uint64_t __attribute__((aligned(8))) dither8[2]={
	0x0602060206020602LL,
	0x0004000400040004LL,};
179 180

static uint64_t __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
181 182
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
183
static uint64_t __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
184 185
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
186

187 188 189
static uint64_t attribute_used __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFLL;
static uint64_t attribute_used __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
static uint64_t attribute_used __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;
M
Michael Niedermayer 已提交
190

M
Michael Niedermayer 已提交
191
#ifdef FAST_BGR2YV12
192 193 194
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
M
Michael Niedermayer 已提交
195
#else
196 197 198
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
M
Michael Niedermayer 已提交
199
#endif
200 201 202
static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
static const uint64_t w1111       attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
203
#endif
204 205 206 207

// clipping helper table for C implementations:
static unsigned char clip_table[768];

208 209
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
		  
M
Michael Niedermayer 已提交
210 211 212 213 214
extern const uint8_t dither_2x2_4[2][8];
extern const uint8_t dither_2x2_8[2][8];
extern const uint8_t dither_8x8_32[8][8];
extern const uint8_t dither_8x8_73[8][8];
extern const uint8_t dither_8x8_220[8][8];
215

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
char *sws_format_name(int format)
{
    static char fmt_name[64];
    char *res;
    static int buffer;

    res = fmt_name + buffer * 32;
    buffer = 1 - buffer;
    snprintf(res, 32, "0x%x (%c%c%c%c)", format,
		    format >> 24, (format >> 16) & 0xFF,
		    (format >> 8) & 0xFF,
		    format & 0xFF);

    return res;
}

232
#if defined(ARCH_X86) || defined(ARCH_X86_64)
M
Michael Niedermayer 已提交
233 234
void in_asm_used_var_warning_killer()
{
M
cleanup  
Michael Niedermayer 已提交
235
 volatile int i= bF8+bFC+w10+
M
Michael Niedermayer 已提交
236
 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
237
 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
M
Michael Niedermayer 已提交
238 239 240
 if(i) i=0;
}
#endif
241

242
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
M
Michael Niedermayer 已提交
243
				    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
244
				    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
M
Michael Niedermayer 已提交
245 246 247
{
	//FIXME Optimize (just quickly writen not opti..)
	int i;
248
	for(i=0; i<dstW; i++)
M
Michael Niedermayer 已提交
249
	{
M
Michael Niedermayer 已提交
250
		int val=1<<18;
M
Michael Niedermayer 已提交
251 252 253 254 255 256 257 258
		int j;
		for(j=0; j<lumFilterSize; j++)
			val += lumSrc[j][i] * lumFilter[j];

		dest[i]= MIN(MAX(val>>19, 0), 255);
	}

	if(uDest != NULL)
259
		for(i=0; i<chrDstW; i++)
M
Michael Niedermayer 已提交
260
		{
M
Michael Niedermayer 已提交
261 262
			int u=1<<18;
			int v=1<<18;
M
Michael Niedermayer 已提交
263
			int j;
264
			for(j=0; j<chrFilterSize; j++)
M
Michael Niedermayer 已提交
265 266 267 268 269 270 271 272 273 274
			{
				u += chrSrc[j][i] * chrFilter[j];
				v += chrSrc[j][i + 2048] * chrFilter[j];
			}

			uDest[i]= MIN(MAX(u>>19, 0), 255);
			vDest[i]= MIN(MAX(v>>19, 0), 255);
		}
}

V
Ville Syrjälä 已提交
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
				int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
				uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
{
	//FIXME Optimize (just quickly writen not opti..)
	int i;
	for(i=0; i<dstW; i++)
	{
		int val=1<<18;
		int j;
		for(j=0; j<lumFilterSize; j++)
			val += lumSrc[j][i] * lumFilter[j];

		dest[i]= MIN(MAX(val>>19, 0), 255);
	}

	if(uDest == NULL)
		return;

	if(dstFormat == IMGFMT_NV12)
		for(i=0; i<chrDstW; i++)
		{
			int u=1<<18;
			int v=1<<18;
			int j;
			for(j=0; j<chrFilterSize; j++)
			{
				u += chrSrc[j][i] * chrFilter[j];
				v += chrSrc[j][i + 2048] * chrFilter[j];
			}

			uDest[2*i]= MIN(MAX(u>>19, 0), 255);
			uDest[2*i+1]= MIN(MAX(v>>19, 0), 255);
		}
	else
		for(i=0; i<chrDstW; i++)
		{
			int u=1<<18;
			int v=1<<18;
			int j;
			for(j=0; j<chrFilterSize; j++)
			{
				u += chrSrc[j][i] * chrFilter[j];
				v += chrSrc[j][i + 2048] * chrFilter[j];
			}

			uDest[2*i]= MIN(MAX(v>>19, 0), 255);
			uDest[2*i+1]= MIN(MAX(u>>19, 0), 255);
		}
}
M
Michael Niedermayer 已提交
325

M
Michael Niedermayer 已提交
326
#define YSCALE_YUV_2_PACKEDX_C(type) \
M
Michael Niedermayer 已提交
327 328
		for(i=0; i<(dstW>>1); i++){\
			int j;\
M
Michael Niedermayer 已提交
329 330 331 332
			int Y1=1<<18;\
			int Y2=1<<18;\
			int U=1<<18;\
			int V=1<<18;\
M
Michael Niedermayer 已提交
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
			type *r, *b, *g;\
			const int i2= 2*i;\
			\
			for(j=0; j<lumFilterSize; j++)\
			{\
				Y1 += lumSrc[j][i2] * lumFilter[j];\
				Y2 += lumSrc[j][i2+1] * lumFilter[j];\
			}\
			for(j=0; j<chrFilterSize; j++)\
			{\
				U += chrSrc[j][i] * chrFilter[j];\
				V += chrSrc[j][i+2048] * chrFilter[j];\
			}\
			Y1>>=19;\
			Y2>>=19;\
			U >>=19;\
			V >>=19;\
			if((Y1|Y2|U|V)&256)\
			{\
				if(Y1>255)   Y1=255;\
				else if(Y1<0)Y1=0;\
				if(Y2>255)   Y2=255;\
				else if(Y2<0)Y2=0;\
				if(U>255)    U=255;\
				else if(U<0) U=0;\
				if(V>255)    V=255;\
				else if(V<0) V=0;\
M
Michael Niedermayer 已提交
360 361 362
			}
                        
#define YSCALE_YUV_2_RGBX_C(type) \
M
Michael Niedermayer 已提交
363
			YSCALE_YUV_2_PACKEDX_C(type)\
M
Michael Niedermayer 已提交
364 365 366 367
			r = c->table_rV[V];\
			g = c->table_gU[U] + c->table_gV[V];\
			b = c->table_bU[U];\

M
Michael Niedermayer 已提交
368
#define YSCALE_YUV_2_PACKED2_C \
M
Michael Niedermayer 已提交
369 370 371 372 373 374
		for(i=0; i<(dstW>>1); i++){\
			const int i2= 2*i;\
			int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;\
			int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
			int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;\
			int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\
M
Michael Niedermayer 已提交
375 376

#define YSCALE_YUV_2_RGB2_C(type) \
M
Michael Niedermayer 已提交
377
			YSCALE_YUV_2_PACKED2_C\
M
Michael Niedermayer 已提交
378 379 380 381 382
			type *r, *b, *g;\
			r = c->table_rV[V];\
			g = c->table_gU[U] + c->table_gV[V];\
			b = c->table_bU[U];\

M
Michael Niedermayer 已提交
383
#define YSCALE_YUV_2_PACKED1_C \
M
Michael Niedermayer 已提交
384 385 386 387 388 389
		for(i=0; i<(dstW>>1); i++){\
			const int i2= 2*i;\
			int Y1= buf0[i2  ]>>7;\
			int Y2= buf0[i2+1]>>7;\
			int U= (uvbuf1[i     ])>>7;\
			int V= (uvbuf1[i+2048])>>7;\
M
Michael Niedermayer 已提交
390 391

#define YSCALE_YUV_2_RGB1_C(type) \
M
Michael Niedermayer 已提交
392
			YSCALE_YUV_2_PACKED1_C\
M
Michael Niedermayer 已提交
393 394 395 396 397
			type *r, *b, *g;\
			r = c->table_rV[V];\
			g = c->table_gU[U] + c->table_gV[V];\
			b = c->table_bU[U];\

M
Michael Niedermayer 已提交
398
#define YSCALE_YUV_2_PACKED1B_C \
M
Michael Niedermayer 已提交
399 400 401 402 403 404
		for(i=0; i<(dstW>>1); i++){\
			const int i2= 2*i;\
			int Y1= buf0[i2  ]>>7;\
			int Y2= buf0[i2+1]>>7;\
			int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
			int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
M
Michael Niedermayer 已提交
405 406

#define YSCALE_YUV_2_RGB1B_C(type) \
M
Michael Niedermayer 已提交
407
			YSCALE_YUV_2_PACKED1B_C\
M
Michael Niedermayer 已提交
408 409 410 411 412
			type *r, *b, *g;\
			r = c->table_rV[V];\
			g = c->table_gU[U] + c->table_gV[V];\
			b = c->table_bU[U];\

M
Michael Niedermayer 已提交
413
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
M
Michael Niedermayer 已提交
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
	switch(c->dstFormat)\
	{\
	case IMGFMT_BGR32:\
	case IMGFMT_RGB32:\
		func(uint32_t)\
			((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
			((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
		}		\
		break;\
	case IMGFMT_RGB24:\
		func(uint8_t)\
			((uint8_t*)dest)[0]= r[Y1];\
			((uint8_t*)dest)[1]= g[Y1];\
			((uint8_t*)dest)[2]= b[Y1];\
			((uint8_t*)dest)[3]= r[Y2];\
			((uint8_t*)dest)[4]= g[Y2];\
			((uint8_t*)dest)[5]= b[Y2];\
431
			dest+=6;\
M
Michael Niedermayer 已提交
432 433 434 435 436 437 438 439 440 441
		}\
		break;\
	case IMGFMT_BGR24:\
		func(uint8_t)\
			((uint8_t*)dest)[0]= b[Y1];\
			((uint8_t*)dest)[1]= g[Y1];\
			((uint8_t*)dest)[2]= r[Y1];\
			((uint8_t*)dest)[3]= b[Y2];\
			((uint8_t*)dest)[4]= g[Y2];\
			((uint8_t*)dest)[5]= r[Y2];\
442
			dest+=6;\
M
Michael Niedermayer 已提交
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
		}\
		break;\
	case IMGFMT_RGB16:\
	case IMGFMT_BGR16:\
		{\
			const int dr1= dither_2x2_8[y&1    ][0];\
			const int dg1= dither_2x2_4[y&1    ][0];\
			const int db1= dither_2x2_8[(y&1)^1][0];\
			const int dr2= dither_2x2_8[y&1    ][1];\
			const int dg2= dither_2x2_4[y&1    ][1];\
			const int db2= dither_2x2_8[(y&1)^1][1];\
			func(uint16_t)\
				((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
				((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
			}\
		}\
		break;\
	case IMGFMT_RGB15:\
	case IMGFMT_BGR15:\
		{\
			const int dr1= dither_2x2_8[y&1    ][0];\
			const int dg1= dither_2x2_8[y&1    ][1];\
			const int db1= dither_2x2_8[(y&1)^1][0];\
			const int dr2= dither_2x2_8[y&1    ][1];\
			const int dg2= dither_2x2_8[y&1    ][0];\
			const int db2= dither_2x2_8[(y&1)^1][1];\
			func(uint16_t)\
				((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
				((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
			}\
		}\
		break;\
	case IMGFMT_RGB8:\
	case IMGFMT_BGR8:\
		{\
			const uint8_t * const d64= dither_8x8_73[y&7];\
			const uint8_t * const d32= dither_8x8_32[y&7];\
			func(uint8_t)\
				((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
				((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
			}\
		}\
		break;\
	case IMGFMT_RGB4:\
	case IMGFMT_BGR4:\
488 489 490 491
		{\
			const uint8_t * const d64= dither_8x8_73 [y&7];\
			const uint8_t * const d128=dither_8x8_220[y&7];\
			func(uint8_t)\
492
				((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
493 494 495 496 497 498
				                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
			}\
		}\
		break;\
	case IMGFMT_RG4B:\
	case IMGFMT_BG4B:\
M
Michael Niedermayer 已提交
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
		{\
			const uint8_t * const d64= dither_8x8_73 [y&7];\
			const uint8_t * const d128=dither_8x8_220[y&7];\
			func(uint8_t)\
				((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
				((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
			}\
		}\
		break;\
	case IMGFMT_RGB1:\
	case IMGFMT_BGR1:\
		{\
			const uint8_t * const d128=dither_8x8_220[y&7];\
			uint8_t *g= c->table_gU[128] + c->table_gV[128];\
			for(i=0; i<dstW-7; i+=8){\
				int acc;\
				acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
				acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
				acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
				acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
				acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
				acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
				acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
				acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
				((uint8_t*)dest)[0]= acc;\
524
				dest++;\
M
Michael Niedermayer 已提交
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
			}\
\
/*\
((uint8_t*)dest)-= dstW>>4;\
{\
			int acc=0;\
			int left=0;\
			static int top[1024];\
			static int last_new[1024][1024];\
			static int last_in3[1024][1024];\
			static int drift[1024][1024];\
			int topLeft=0;\
			int shift=0;\
			int count=0;\
			const uint8_t * const d128=dither_8x8_220[y&7];\
			int error_new=0;\
			int error_in3=0;\
			int f=0;\
			\
			for(i=dstW>>1; i<dstW; i++){\
				int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
				int in2 = (76309 * (in - 16) + 32768) >> 16;\
				int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
				int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
					+ (last_new[y][i] - in3)*f/256;\
				int new= old> 128 ? 255 : 0;\
\
				error_new+= ABS(last_new[y][i] - new);\
				error_in3+= ABS(last_in3[y][i] - in3);\
				f= error_new - error_in3*4;\
				if(f<0) f=0;\
				if(f>256) f=256;\
\
				topLeft= top[i];\
				left= top[i]= old - new;\
				last_new[y][i]= new;\
				last_in3[y][i]= in3;\
\
				acc+= acc + (new&1);\
				if((i&7)==6){\
					((uint8_t*)dest)[0]= acc;\
					((uint8_t*)dest)++;\
				}\
			}\
}\
*/\
		}\
		break;\
M
Michael Niedermayer 已提交
573 574 575 576 577 578 579 580
	case IMGFMT_YUY2:\
		func2\
			((uint8_t*)dest)[2*i2+0]= Y1;\
			((uint8_t*)dest)[2*i2+1]= U;\
			((uint8_t*)dest)[2*i2+2]= Y2;\
			((uint8_t*)dest)[2*i2+3]= V;\
		}		\
		break;\
A
Alex Beregszaszi 已提交
581 582 583 584 585 586 587 588
	case IMGFMT_UYVY:\
		func2\
			((uint8_t*)dest)[2*i2+0]= U;\
			((uint8_t*)dest)[2*i2+1]= Y1;\
			((uint8_t*)dest)[2*i2+2]= V;\
			((uint8_t*)dest)[2*i2+3]= Y2;\
		}		\
		break;\
M
Michael Niedermayer 已提交
589 590 591
	}\


M
Michael Niedermayer 已提交
592
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
M
Michael Niedermayer 已提交
593
				    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
M
Michael Niedermayer 已提交
594
				    uint8_t *dest, int dstW, int y)
M
Michael Niedermayer 已提交
595
{
M
Michael Niedermayer 已提交
596 597
	int i;
	switch(c->dstFormat)
M
Michael Niedermayer 已提交
598
	{
M
Michael Niedermayer 已提交
599 600 601 602 603
	case IMGFMT_RGB32:
	case IMGFMT_BGR32:
		YSCALE_YUV_2_RGBX_C(uint32_t)
			((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
			((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
M
Michael Niedermayer 已提交
604
		}
M
Michael Niedermayer 已提交
605 606 607 608 609 610 611 612 613
		break;
	case IMGFMT_RGB24:
		YSCALE_YUV_2_RGBX_C(uint8_t)
			((uint8_t*)dest)[0]= r[Y1];
			((uint8_t*)dest)[1]= g[Y1];
			((uint8_t*)dest)[2]= b[Y1];
			((uint8_t*)dest)[3]= r[Y2];
			((uint8_t*)dest)[4]= g[Y2];
			((uint8_t*)dest)[5]= b[Y2];
614
			dest+=6;
M
Michael Niedermayer 已提交
615 616 617 618 619 620 621 622 623 624
		}
		break;
	case IMGFMT_BGR24:
		YSCALE_YUV_2_RGBX_C(uint8_t)
			((uint8_t*)dest)[0]= b[Y1];
			((uint8_t*)dest)[1]= g[Y1];
			((uint8_t*)dest)[2]= r[Y1];
			((uint8_t*)dest)[3]= b[Y2];
			((uint8_t*)dest)[4]= g[Y2];
			((uint8_t*)dest)[5]= r[Y2];
625
			dest+=6;
M
Michael Niedermayer 已提交
626 627 628 629 630 631 632 633 634 635 636 637 638 639
		}
		break;
	case IMGFMT_RGB16:
	case IMGFMT_BGR16:
		{
			const int dr1= dither_2x2_8[y&1    ][0];
			const int dg1= dither_2x2_4[y&1    ][0];
			const int db1= dither_2x2_8[(y&1)^1][0];
			const int dr2= dither_2x2_8[y&1    ][1];
			const int dg2= dither_2x2_4[y&1    ][1];
			const int db2= dither_2x2_8[(y&1)^1][1];
			YSCALE_YUV_2_RGBX_C(uint16_t)
				((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
				((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
M
Michael Niedermayer 已提交
640 641
			}
		}
M
Michael Niedermayer 已提交
642 643 644 645 646 647 648 649 650 651 652 653 654
		break;
	case IMGFMT_RGB15:
	case IMGFMT_BGR15:
		{
			const int dr1= dither_2x2_8[y&1    ][0];
			const int dg1= dither_2x2_8[y&1    ][1];
			const int db1= dither_2x2_8[(y&1)^1][0];
			const int dr2= dither_2x2_8[y&1    ][1];
			const int dg2= dither_2x2_8[y&1    ][0];
			const int db2= dither_2x2_8[(y&1)^1][1];
			YSCALE_YUV_2_RGBX_C(uint16_t)
				((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
				((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
M
Michael Niedermayer 已提交
655
			}
M
Michael Niedermayer 已提交
656 657 658 659 660 661 662 663 664 665
		}
		break;
	case IMGFMT_RGB8:
	case IMGFMT_BGR8:
		{
			const uint8_t * const d64= dither_8x8_73[y&7];
			const uint8_t * const d32= dither_8x8_32[y&7];
			YSCALE_YUV_2_RGBX_C(uint8_t)
				((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
				((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
M
Michael Niedermayer 已提交
666 667
			}
		}
M
Michael Niedermayer 已提交
668 669 670
		break;
	case IMGFMT_RGB4:
	case IMGFMT_BGR4:
671 672 673 674
		{
			const uint8_t * const d64= dither_8x8_73 [y&7];
			const uint8_t * const d128=dither_8x8_220[y&7];
			YSCALE_YUV_2_RGBX_C(uint8_t)
675
				((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
676 677 678 679 680 681
				                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
			}
		}
		break;
	case IMGFMT_RG4B:
	case IMGFMT_BG4B:
M
Michael Niedermayer 已提交
682 683 684 685 686 687
		{
			const uint8_t * const d64= dither_8x8_73 [y&7];
			const uint8_t * const d128=dither_8x8_220[y&7];
			YSCALE_YUV_2_RGBX_C(uint8_t)
				((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
				((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
M
Michael Niedermayer 已提交
688
			}
M
Michael Niedermayer 已提交
689 690 691 692 693 694 695 696 697 698
		}
		break;
	case IMGFMT_RGB1:
	case IMGFMT_BGR1:
		{
			const uint8_t * const d128=dither_8x8_220[y&7];
			uint8_t *g= c->table_gU[128] + c->table_gV[128];
			int acc=0;
			for(i=0; i<dstW-1; i+=2){
				int j;
M
Michael Niedermayer 已提交
699 700
				int Y1=1<<18;
				int Y2=1<<18;
M
Michael Niedermayer 已提交
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719

				for(j=0; j<lumFilterSize; j++)
				{
					Y1 += lumSrc[j][i] * lumFilter[j];
					Y2 += lumSrc[j][i+1] * lumFilter[j];
				}
				Y1>>=19;
				Y2>>=19;
				if((Y1|Y2)&256)
				{
					if(Y1>255)   Y1=255;
					else if(Y1<0)Y1=0;
					if(Y2>255)   Y2=255;
					else if(Y2<0)Y2=0;
				}
				acc+= acc + g[Y1+d128[(i+0)&7]];
				acc+= acc + g[Y2+d128[(i+1)&7]];
				if((i&7)==6){
					((uint8_t*)dest)[0]= acc;
720
					dest++;
M
Michael Niedermayer 已提交
721
				}
M
Michael Niedermayer 已提交
722 723
			}
		}
M
Michael Niedermayer 已提交
724
		break;
M
Michael Niedermayer 已提交
725
	case IMGFMT_YUY2:
M
Michael Niedermayer 已提交
726
		YSCALE_YUV_2_PACKEDX_C(void)
M
Michael Niedermayer 已提交
727 728 729 730 731 732
			((uint8_t*)dest)[2*i2+0]= Y1;
			((uint8_t*)dest)[2*i2+1]= U;
			((uint8_t*)dest)[2*i2+2]= Y2;
			((uint8_t*)dest)[2*i2+3]= V;
		}
                break;
A
Alex Beregszaszi 已提交
733 734 735 736 737 738 739 740
	case IMGFMT_UYVY:
		YSCALE_YUV_2_PACKEDX_C(void)
			((uint8_t*)dest)[2*i2+0]= U;
			((uint8_t*)dest)[2*i2+1]= Y1;
			((uint8_t*)dest)[2*i2+2]= V;
			((uint8_t*)dest)[2*i2+3]= Y2;
		}
                break;
M
Michael Niedermayer 已提交
741 742 743 744
	}
}


M
Michael Niedermayer 已提交
745 746
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
//Plain C versions
747 748 749 750
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
#define COMPILE_C
#endif

751
#ifdef ARCH_POWERPC
752
#if defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)
753 754 755 756
#define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC
#endif //ARCH_POWERPC

757
#if defined(ARCH_X86) || defined(ARCH_X86_64)
758 759 760 761 762 763 764 765 766 767 768 769

#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX
#endif

#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX2
#endif

#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_3DNOW
#endif
770
#endif //ARCH_X86 || ARCH_X86_64
771 772 773 774 775 776

#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW

#ifdef COMPILE_C
M
Michael Niedermayer 已提交
777 778 779
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
780
#undef HAVE_ALTIVEC
M
Michael Niedermayer 已提交
781 782
#define RENAME(a) a ## _C
#include "swscale_template.c"
783
#endif
M
Michael Niedermayer 已提交
784

785 786 787 788 789 790 791 792 793
#ifdef ARCH_POWERPC
#ifdef COMPILE_ALTIVEC
#undef RENAME
#define HAVE_ALTIVEC
#define RENAME(a) a ## _altivec
#include "swscale_template.c"
#endif
#endif //ARCH_POWERPC

794
#if defined(ARCH_X86) || defined(ARCH_X86_64)
M
Michael Niedermayer 已提交
795

M
Michael Niedermayer 已提交
796 797 798 799 800 801 802 803 804
//X86 versions
/*
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define ARCH_X86
#define RENAME(a) a ## _X86
#include "swscale_template.c"
M
Michael Niedermayer 已提交
805
*/
M
Michael Niedermayer 已提交
806
//MMX versions
807
#ifdef COMPILE_MMX
M
Michael Niedermayer 已提交
808 809 810 811 812 813
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _MMX
#include "swscale_template.c"
814
#endif
M
Michael Niedermayer 已提交
815 816

//MMX2 versions
817
#ifdef COMPILE_MMX2
M
Michael Niedermayer 已提交
818 819 820 821 822 823
#undef RENAME
#define HAVE_MMX
#define HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _MMX2
#include "swscale_template.c"
824
#endif
M
Michael Niedermayer 已提交
825 826

//3DNOW versions
827
#ifdef COMPILE_3DNOW
M
Michael Niedermayer 已提交
828 829 830 831 832 833
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#define HAVE_3DNOW
#define RENAME(a) a ## _3DNow
#include "swscale_template.c"
834
#endif
M
Michael Niedermayer 已提交
835

836
#endif //ARCH_X86 || ARCH_X86_64
M
Michael Niedermayer 已提交
837

G
Gabucino 已提交
838
// minor note: the HAVE_xyz is messed up after that line so don't use it
839

M
Michael Niedermayer 已提交
840 841 842 843 844 845 846 847 848 849
static double getSplineCoeff(double a, double b, double c, double d, double dist)
{
//	printf("%f %f %f %f %f\n", a,b,c,d,dist);
	if(dist<=1.0) 	return ((d*dist + c)*dist + b)*dist +a;
	else		return getSplineCoeff(	0.0, 
						 b+ 2.0*c + 3.0*d,
						        c + 3.0*d,
						-b- 3.0*c - 6.0*d,
						dist-1.0);
}
850

851
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
852
			      int srcW, int dstW, int filterAlign, int one, int flags,
853
			      SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
854 855
{
	int i;
856 857 858 859 860
	int filterSize;
	int filter2Size;
	int minFilterSize;
	double *filter=NULL;
	double *filter2=NULL;
861
#if defined(ARCH_X86) || defined(ARCH_X86_64)
862
	if(flags & SWS_CPU_CAPS_MMX)
863
		asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
864
#endif
865

866
	// Note the +1 is for the MMXscaler which reads over the end
867
	*filterPos = av_malloc((dstW+1)*sizeof(int16_t));
868

869 870 871
	if(ABS(xInc - 0x10000) <10) // unscaled
	{
		int i;
872
		filterSize= 1;
873
		filter= av_malloc(dstW*sizeof(double)*filterSize);
874
		for(i=0; i<dstW*filterSize; i++) filter[i]=0;
875 876 877

		for(i=0; i<dstW; i++)
		{
878 879
			filter[i*filterSize]=1;
			(*filterPos)[i]=i;
880 881 882
		}

	}
883 884 885 886 887
	else if(flags&SWS_POINT) // lame looking point sampling mode
	{
		int i;
		int xDstInSrc;
		filterSize= 1;
888
		filter= av_malloc(dstW*sizeof(double)*filterSize);
889 890 891 892
		
		xDstInSrc= xInc/2 - 0x8000;
		for(i=0; i<dstW; i++)
		{
893
			int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
894 895 896 897 898 899

			(*filterPos)[i]= xx;
			filter[i]= 1.0;
			xDstInSrc+= xInc;
		}
	}
M
Michael Niedermayer 已提交
900
	else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
901 902 903
	{
		int i;
		int xDstInSrc;
904 905
		if     (flags&SWS_BICUBIC) filterSize= 4;
		else if(flags&SWS_X      ) filterSize= 4;
906
		else			   filterSize= 2; // SWS_BILINEAR / SWS_AREA 
907
		filter= av_malloc(dstW*sizeof(double)*filterSize);
908 909 910 911

		xDstInSrc= xInc/2 - 0x8000;
		for(i=0; i<dstW; i++)
		{
912
			int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
913 914
			int j;

915
			(*filterPos)[i]= xx;
916
				//Bilinear upscale / linear interpolate / Area averaging
917
				for(j=0; j<filterSize; j++)
918 919 920 921
				{
					double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
					double coeff= 1.0 - d;
					if(coeff<0) coeff=0;
922
					filter[i*filterSize + j]= coeff;
923 924 925 926 927
					xx++;
				}
			xDstInSrc+= xInc;
		}
	}
M
Michael Niedermayer 已提交
928
	else
929
	{
M
Michael Niedermayer 已提交
930 931 932 933 934 935 936 937
		double xDstInSrc;
		double sizeFactor, filterSizeInSrc;
		const double xInc1= (double)xInc / (double)(1<<16);

		if     (flags&SWS_BICUBIC)	sizeFactor= 4.0;
		else if(flags&SWS_X)		sizeFactor= 8.0;
		else if(flags&SWS_AREA)		sizeFactor= 1.0; //downscale only, for upscale it is bilinear
		else if(flags&SWS_GAUSS)	sizeFactor= 8.0;   // infinite ;)
938
		else if(flags&SWS_LANCZOS)	sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
939
		else if(flags&SWS_SINC)		sizeFactor= 20.0; // infinite ;)
M
Michael Niedermayer 已提交
940 941
		else if(flags&SWS_SPLINE)	sizeFactor= 20.0;  // infinite ;)
		else if(flags&SWS_BILINEAR)	sizeFactor= 2.0;
942 943 944 945
		else {
			sizeFactor= 0.0; //GCC warning killer
			ASSERT(0)
		}
M
Michael Niedermayer 已提交
946 947 948
		
		if(xInc1 <= 1.0)	filterSizeInSrc= sizeFactor; // upscale
		else			filterSizeInSrc= sizeFactor*srcW / (double)dstW;
M
Michael Niedermayer 已提交
949

M
Michael Niedermayer 已提交
950 951
		filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
		if(filterSize > srcW-2) filterSize=srcW-2;
952

953
		filter= av_malloc(dstW*sizeof(double)*filterSize);
M
Michael Niedermayer 已提交
954 955

		xDstInSrc= xInc1 / 2.0 - 0.5;
956 957
		for(i=0; i<dstW; i++)
		{
M
Michael Niedermayer 已提交
958
			int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
959
			int j;
960 961
			(*filterPos)[i]= xx;
			for(j=0; j<filterSize; j++)
962
			{
M
Michael Niedermayer 已提交
963
				double d= ABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
964
				double coeff;
M
Michael Niedermayer 已提交
965
				if(flags & SWS_BICUBIC)
966
				{
967 968 969 970 971
					double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
					double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;

					if(d<1.0) 
						coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
972
					else if(d<2.0)
973
						coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
974 975 976
					else
						coeff=0.0;
				}
M
Michael Niedermayer 已提交
977 978 979 980 981 982 983 984
/*				else if(flags & SWS_X)
				{
					double p= param ? param*0.01 : 0.3;
					coeff = d ? sin(d*PI)/(d*PI) : 1.0;
					coeff*= pow(2.0, - p*d*d);
				}*/
				else if(flags & SWS_X)
				{
985
					double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
M
Michael Niedermayer 已提交
986 987 988 989 990 991 992 993 994
					
					if(d<1.0)
						coeff = cos(d*PI);
					else
						coeff=-1.0;
					if(coeff<0.0) 	coeff= -pow(-coeff, A);
					else		coeff=  pow( coeff, A);
					coeff= coeff*0.5 + 0.5;
				}
995
				else if(flags & SWS_AREA)
996
				{
M
Michael Niedermayer 已提交
997
					double srcPixelSize= 1.0/xInc1;
998 999 1000 1001
					if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
					else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
					else coeff=0.0;
				}
M
Michael Niedermayer 已提交
1002 1003
				else if(flags & SWS_GAUSS)
				{
1004
					double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
M
Michael Niedermayer 已提交
1005 1006 1007 1008 1009 1010 1011 1012
					coeff = pow(2.0, - p*d*d);
				}
				else if(flags & SWS_SINC)
				{
					coeff = d ? sin(d*PI)/(d*PI) : 1.0;
				}
				else if(flags & SWS_LANCZOS)
				{
1013
					double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; 
M
Michael Niedermayer 已提交
1014 1015 1016 1017
					coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
					if(d>p) coeff=0;
				}
				else if(flags & SWS_BILINEAR)
1018 1019 1020 1021
				{
					coeff= 1.0 - d;
					if(coeff<0) coeff=0;
				}
M
Michael Niedermayer 已提交
1022 1023 1024 1025 1026
				else if(flags & SWS_SPLINE)
				{
					double p=-2.196152422706632;
					coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
				}
1027 1028 1029 1030
				else {
					coeff= 0.0; //GCC warning killer
					ASSERT(0)
				}
M
Michael Niedermayer 已提交
1031

1032
				filter[i*filterSize + j]= coeff;
1033 1034
				xx++;
			}
M
Michael Niedermayer 已提交
1035
			xDstInSrc+= xInc1;
1036 1037 1038
		}
	}

1039
	/* apply src & dst Filter to filter -> filter2
1040
	   av_free(filter);
1041
	*/
M
Michael Niedermayer 已提交
1042
	ASSERT(filterSize>0)
1043 1044 1045
	filter2Size= filterSize;
	if(srcFilter) filter2Size+= srcFilter->length - 1;
	if(dstFilter) filter2Size+= dstFilter->length - 1;
M
Michael Niedermayer 已提交
1046
	ASSERT(filter2Size>0)
1047
	filter2= av_malloc(filter2Size*dstW*sizeof(double));
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057

	for(i=0; i<dstW; i++)
	{
		int j;
		SwsVector scaleFilter;
		SwsVector *outVec;

		scaleFilter.coeff= filter + i*filterSize;
		scaleFilter.length= filterSize;

1058
		if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
		else	      outVec= &scaleFilter;

		ASSERT(outVec->length == filter2Size)
		//FIXME dstFilter

		for(j=0; j<outVec->length; j++)
		{
			filter2[i*filter2Size + j]= outVec->coeff[j];
		}

		(*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;

1071
		if(outVec != &scaleFilter) sws_freeVec(outVec);
1072
	}
1073
	av_free(filter); filter=NULL;
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091

	/* try to reduce the filter-size (step1 find size and shift left) */
	// Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
	minFilterSize= 0;
	for(i=dstW-1; i>=0; i--)
	{
		int min= filter2Size;
		int j;
		double cutOff=0.0;

		/* get rid off near zero elements on the left by shifting left */
		for(j=0; j<filter2Size; j++)
		{
			int k;
			cutOff += ABS(filter2[i*filter2Size]);

			if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;

G
Gabucino 已提交
1092
			/* preserve Monotonicity because the core can't handle the filter otherwise */
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
			if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;

			// Move filter coeffs left
			for(k=1; k<filter2Size; k++)
				filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
			filter2[i*filter2Size + k - 1]= 0.0;
			(*filterPos)[i]++;
		}

		cutOff=0.0;
		/* count near zeros on the right */
		for(j=filter2Size-1; j>0; j--)
		{
			cutOff += ABS(filter2[i*filter2Size + j]);

			if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
			min--;
		}

		if(min>minFilterSize) minFilterSize= min;
	}

1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
        if (flags & SWS_CPU_CAPS_ALTIVEC) {
          // we can handle the special case 4,
          // so we don't want to go to the full 8
          if (minFilterSize < 5)
            filterAlign = 4;

          // we really don't want to waste our time
          // doing useless computation, so fall-back on
          // the scalar C code for very small filter.
          // vectorizing is worth it only if you have
          // decent-sized vector.
          if (minFilterSize < 3)
            filterAlign = 1;
        }

1130 1131 1132 1133 1134 1135
        if (flags & SWS_CPU_CAPS_MMX) {
                // special case for unscaled vertical filtering
                if(minFilterSize == 1 && filterAlign == 2)
                        filterAlign= 1;
        }

M
Michael Niedermayer 已提交
1136
	ASSERT(minFilterSize > 0)
1137
	filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
M
Michael Niedermayer 已提交
1138
	ASSERT(filterSize > 0)
1139
	filter= av_malloc(filterSize*dstW*sizeof(double));
1140 1141
        if(filterSize >= MAX_FILTER_SIZE)
                return -1;
1142 1143
	*outFilterSize= filterSize;

A
Alban Bedel 已提交
1144
	if(flags&SWS_PRINT_INFO)
1145
		MSG_V("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1146 1147 1148 1149 1150
	/* try to reduce the filter-size (step2 reduce it) */
	for(i=0; i<dstW; i++)
	{
		int j;

1151 1152 1153 1154 1155
		for(j=0; j<filterSize; j++)
		{
			if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
			else		   filter[i*filterSize + j]= filter2[i*filter2Size + j];
		}
1156
	}
1157
	av_free(filter2); filter2=NULL;
1158
	
1159 1160 1161

	//FIXME try to align filterpos if possible

1162 1163 1164 1165
	//fix borders
	for(i=0; i<dstW; i++)
	{
		int j;
1166
		if((*filterPos)[i] < 0)
1167 1168
		{
			// Move filter coeffs left to compensate for filterPos
1169
			for(j=1; j<filterSize; j++)
1170
			{
1171
				int left= MAX(j + (*filterPos)[i], 0);
1172 1173
				filter[i*filterSize + left] += filter[i*filterSize + j];
				filter[i*filterSize + j]=0;
1174
			}
1175
			(*filterPos)[i]= 0;
1176 1177
		}

1178
		if((*filterPos)[i] + filterSize > srcW)
1179
		{
1180
			int shift= (*filterPos)[i] + filterSize - srcW;
1181
			// Move filter coeffs right to compensate for filterPos
1182
			for(j=filterSize-2; j>=0; j--)
1183
			{
1184 1185 1186
				int right= MIN(j + shift, filterSize-1);
				filter[i*filterSize +right] += filter[i*filterSize +j];
				filter[i*filterSize +j]=0;
1187
			}
1188
			(*filterPos)[i]= srcW - filterSize;
1189 1190 1191
		}
	}

1192
	// Note the +1 is for the MMXscaler which reads over the end
1193
	/* align at 16 for AltiVec (needed by hScale_altivec_real) */
1194
	*outFilter= av_malloc(*outFilterSize*(dstW+1)*sizeof(int16_t));
1195
	memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
1196 1197

	/* Normalize & Store in outFilter */
1198 1199 1200
	for(i=0; i<dstW; i++)
	{
		int j;
1201
		double error=0;
1202 1203
		double sum=0;
		double scale= one;
1204

1205
		for(j=0; j<filterSize; j++)
1206
		{
1207
			sum+= filter[i*filterSize + j];
1208 1209
		}
		scale/= sum;
1210
		for(j=0; j<*outFilterSize; j++)
1211
		{
1212 1213 1214 1215
			double v= filter[i*filterSize + j]*scale + error;
			int intV= floor(v + 0.5);
			(*outFilter)[i*(*outFilterSize) + j]= intV;
			error = v - intV;
1216 1217
		}
	}
1218 1219 1220 1221 1222 1223 1224
	
	(*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
	for(i=0; i<*outFilterSize; i++)
	{
		int j= dstW*(*outFilterSize);
		(*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
	}
1225

1226
	av_free(filter);
1227
        return 0;
M
Michael Niedermayer 已提交
1228
}
1229

1230
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1231
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1232
{
1233
	uint8_t *fragmentA;
1234 1235 1236
	long imm8OfPShufW1A;
	long imm8OfPShufW2A;
	long fragmentLengthA;
1237
	uint8_t *fragmentB;
1238 1239 1240
	long imm8OfPShufW1B;
	long imm8OfPShufW2B;
	long fragmentLengthB;
1241
	int fragmentPos;
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252

	int xpos, i;

	// create an optimized horizontal scaling routine

	//code fragment

	asm volatile(
		"jmp 9f				\n\t"
	// Begin
		"0:				\n\t"
1253 1254 1255
		"movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
		"movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
		"movd 1(%%"REG_c", %%"REG_S"), %%mm1\n\t"
1256 1257
		"punpcklbw %%mm7, %%mm1		\n\t"
		"punpcklbw %%mm7, %%mm0		\n\t"
1258 1259 1260 1261 1262
		"pshufw $0xFF, %%mm1, %%mm1	\n\t"
		"1:				\n\t"
		"pshufw $0xFF, %%mm0, %%mm0	\n\t"
		"2:				\n\t"
		"psubw %%mm1, %%mm0		\n\t"
1263
		"movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
1264 1265 1266 1267
		"pmullw %%mm3, %%mm0		\n\t"
		"psllw $7, %%mm1		\n\t"
		"paddw %%mm1, %%mm0		\n\t"

1268
		"movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1269

1270
		"add $8, %%"REG_a"		\n\t"
1271 1272 1273
	// End
		"9:				\n\t"
//		"int $3\n\t"
1274 1275 1276 1277 1278 1279 1280 1281 1282
		"lea 0b, %0			\n\t"
		"lea 1b, %1			\n\t"
		"lea 2b, %2			\n\t"
		"dec %1				\n\t"
		"dec %2				\n\t"
		"sub %0, %1			\n\t"
		"sub %0, %2			\n\t"
		"lea 9b, %3			\n\t"
		"sub %0, %3			\n\t"
1283 1284 1285 1286


		:"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
		"=r" (fragmentLengthA)
1287 1288
	);

1289 1290 1291 1292
	asm volatile(
		"jmp 9f				\n\t"
	// Begin
		"0:				\n\t"
1293 1294
		"movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
		"movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1295 1296 1297 1298 1299 1300
		"punpcklbw %%mm7, %%mm0		\n\t"
		"pshufw $0xFF, %%mm0, %%mm1	\n\t"
		"1:				\n\t"
		"pshufw $0xFF, %%mm0, %%mm0	\n\t"
		"2:				\n\t"
		"psubw %%mm1, %%mm0		\n\t"
1301
		"movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
1302 1303 1304 1305
		"pmullw %%mm3, %%mm0		\n\t"
		"psllw $7, %%mm1		\n\t"
		"paddw %%mm1, %%mm0		\n\t"

1306
		"movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1307

1308
		"add $8, %%"REG_a"		\n\t"
1309 1310 1311
	// End
		"9:				\n\t"
//		"int $3\n\t"
1312 1313 1314 1315 1316 1317 1318 1319 1320
		"lea 0b, %0			\n\t"
		"lea 1b, %1			\n\t"
		"lea 2b, %2			\n\t"
		"dec %1				\n\t"
		"dec %2				\n\t"
		"sub %0, %1			\n\t"
		"sub %0, %2			\n\t"
		"lea 9b, %3			\n\t"
		"sub %0, %3			\n\t"
1321 1322 1323 1324 1325 1326 1327 1328 1329 1330


		:"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
		"=r" (fragmentLengthB)
	);

	xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
	fragmentPos=0;
	
	for(i=0; i<dstW/numSplits; i++)
1331 1332 1333 1334 1335 1336 1337 1338 1339 1340
	{
		int xx=xpos>>16;

		if((i&3) == 0)
		{
			int a=0;
			int b=((xpos+xInc)>>16) - xx;
			int c=((xpos+xInc*2)>>16) - xx;
			int d=((xpos+xInc*3)>>16) - xx;

1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
			filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
			filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
			filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
			filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
			filterPos[i/2]= xx;

			if(d+1<4)
			{
				int maxShift= 3-(d+1);
				int shift=0;

				memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);

				funnyCode[fragmentPos + imm8OfPShufW1B]=
					(a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
				funnyCode[fragmentPos + imm8OfPShufW2B]=
					a | (b<<2) | (c<<4) | (d<<6);

				if(i+3>=dstW) shift=maxShift; //avoid overread
				else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align

				if(shift && i>=shift)
				{
					funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
					funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
					filterPos[i/2]-=shift;
				}

				fragmentPos+= fragmentLengthB;
			}
			else
			{
				int maxShift= 3-d;
				int shift=0;

				memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1377

1378 1379 1380
				funnyCode[fragmentPos + imm8OfPShufW1A]=
				funnyCode[fragmentPos + imm8OfPShufW2A]=
					a | (b<<2) | (c<<4) | (d<<6);
1381

1382 1383
				if(i+4>=dstW) shift=maxShift; //avoid overread
				else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1384

1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395
				if(shift && i>=shift)
				{
					funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
					funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
					filterPos[i/2]-=shift;
				}

				fragmentPos+= fragmentLengthA;
			}

			funnyCode[fragmentPos]= RET;
1396 1397 1398
		}
		xpos+=xInc;
	}
1399
	filterPos[i/2]= xpos>>16; // needed to jump to the next part
1400
}
1401
#endif // ARCH_X86 || ARCH_X86_64
1402

1403
static void globalInit(void){
1404 1405
    // generating tables:
    int i;
1406 1407 1408
    for(i=0; i<768; i++){
	int c= MIN(MAX(i-256, 0), 255);
	clip_table[i]=c;
M
Michael Niedermayer 已提交
1409
    }
1410
}
1411

1412 1413
static SwsFunc getSwsFunc(int flags){
    
1414
#ifdef RUNTIME_CPUDETECT
1415
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1416
	// ordered per speed fasterst first
1417 1418 1419 1420 1421 1422
	if(flags & SWS_CPU_CAPS_MMX2)
		return swScale_MMX2;
	else if(flags & SWS_CPU_CAPS_3DNOW)
		return swScale_3DNow;
	else if(flags & SWS_CPU_CAPS_MMX)
		return swScale_MMX;
1423
	else
1424
		return swScale_C;
1425 1426

#else
1427 1428 1429 1430 1431 1432
#ifdef ARCH_POWERPC
	if(flags & SWS_CPU_CAPS_ALTIVEC)
	  return swScale_altivec;
	else
	  return swScale_C;
#endif
1433
	return swScale_C;
1434 1435 1436
#endif
#else //RUNTIME_CPUDETECT
#ifdef HAVE_MMX2
1437
	return swScale_MMX2;
1438
#elif defined (HAVE_3DNOW)
1439
	return swScale_3DNow;
1440
#elif defined (HAVE_MMX)
1441
	return swScale_MMX;
1442 1443
#elif defined (HAVE_ALTIVEC)
	return swScale_altivec;
1444
#else
1445
	return swScale_C;
1446 1447
#endif
#endif //!RUNTIME_CPUDETECT
1448
}
M
Michael Niedermayer 已提交
1449

1450
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
A
Arpi 已提交
1451 1452 1453
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
	uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
	/* Copy Y plane */
1454
	if(dstStride[0]==srcStride[0] && srcStride[0] > 0)
A
Arpi 已提交
1455 1456 1457 1458 1459 1460 1461 1462
		memcpy(dst, src[0], srcSliceH*dstStride[0]);
	else
	{
		int i;
		uint8_t *srcPtr= src[0];
		uint8_t *dstPtr= dst;
		for(i=0; i<srcSliceH; i++)
		{
V
Ville Syrjälä 已提交
1463
			memcpy(dstPtr, srcPtr, c->srcW);
A
Arpi 已提交
1464 1465 1466 1467
			srcPtr+= srcStride[0];
			dstPtr+= dstStride[0];
		}
	}
V
Ville Syrjälä 已提交
1468 1469 1470 1471 1472
	dst = dstParam[1] + dstStride[1]*srcSliceY/2;
	if (c->dstFormat == IMGFMT_NV12)
		interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] );
	else
		interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] );
1473

1474
	return srcSliceH;
A
Arpi 已提交
1475 1476
}

1477
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
A
Arpi 已提交
1478 1479 1480
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
	uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;

1481 1482
	yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );

1483
	return srcSliceH;
A
Arpi 已提交
1484 1485
}

A
Alex Beregszaszi 已提交
1486 1487 1488 1489 1490 1491 1492 1493 1494
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
	uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;

	yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );

	return srcSliceH;
}

1495
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1496
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1497 1498 1499 1500 1501 1502 1503
			   int srcSliceH, uint8_t* dst[], int dstStride[]){
	const int srcFormat= c->srcFormat;
	const int dstFormat= c->dstFormat;
	const int srcBpp= ((srcFormat&0xFF) + 7)>>3;
	const int dstBpp= ((dstFormat&0xFF) + 7)>>3;
	const int srcId= (srcFormat&0xFF)>>2; // 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 
	const int dstId= (dstFormat&0xFF)>>2;
1504
	void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1505 1506

	/* BGR -> BGR */
M
cleanup  
Michael Niedermayer 已提交
1507 1508
	if(   (isBGR(srcFormat) && isBGR(dstFormat))
	   || (isRGB(srcFormat) && isRGB(dstFormat))){
1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522
		switch(srcId | (dstId<<4)){
		case 0x34: conv= rgb16to15; break;
		case 0x36: conv= rgb24to15; break;
		case 0x38: conv= rgb32to15; break;
		case 0x43: conv= rgb15to16; break;
		case 0x46: conv= rgb24to16; break;
		case 0x48: conv= rgb32to16; break;
		case 0x63: conv= rgb15to24; break;
		case 0x64: conv= rgb16to24; break;
		case 0x68: conv= rgb32to24; break;
		case 0x83: conv= rgb15to32; break;
		case 0x84: conv= rgb16to32; break;
		case 0x86: conv= rgb24to32; break;
		default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1523
				 sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
M
Michael Niedermayer 已提交
1524
		}
M
cleanup  
Michael Niedermayer 已提交
1525 1526
	}else if(   (isBGR(srcFormat) && isRGB(dstFormat))
		 || (isRGB(srcFormat) && isBGR(dstFormat))){
1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544
		switch(srcId | (dstId<<4)){
		case 0x33: conv= rgb15tobgr15; break;
		case 0x34: conv= rgb16tobgr15; break;
		case 0x36: conv= rgb24tobgr15; break;
		case 0x38: conv= rgb32tobgr15; break;
		case 0x43: conv= rgb15tobgr16; break;
		case 0x44: conv= rgb16tobgr16; break;
		case 0x46: conv= rgb24tobgr16; break;
		case 0x48: conv= rgb32tobgr16; break;
		case 0x63: conv= rgb15tobgr24; break;
		case 0x64: conv= rgb16tobgr24; break;
		case 0x66: conv= rgb24tobgr24; break;
		case 0x68: conv= rgb32tobgr24; break;
		case 0x83: conv= rgb15tobgr32; break;
		case 0x84: conv= rgb16tobgr32; break;
		case 0x86: conv= rgb24tobgr32; break;
		case 0x88: conv= rgb32tobgr32; break;
		default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1545
				 sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
A
Arpi 已提交
1546
		}
M
cleanup  
Michael Niedermayer 已提交
1547 1548
	}else{
		MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1549
			 sws_format_name(srcFormat), sws_format_name(dstFormat));
1550
	}
M
cleanup  
Michael Niedermayer 已提交
1551

1552 1553
	if(dstStride[0]*srcBpp == srcStride[0]*dstBpp)
		conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
A
Arpi 已提交
1554 1555 1556 1557 1558 1559 1560 1561
	else
	{
		int i;
		uint8_t *srcPtr= src[0];
		uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;

		for(i=0; i<srcSliceH; i++)
		{
1562
			conv(srcPtr, dstPtr, c->srcW*srcBpp);
A
Arpi 已提交
1563 1564 1565 1566
			srcPtr+= srcStride[0];
			dstPtr+= dstStride[0];
		}
	}     
1567
	return srcSliceH;
A
Arpi 已提交
1568 1569
}

1570
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
M
Michael Niedermayer 已提交
1571 1572 1573 1574 1575 1576 1577 1578 1579
             int srcSliceH, uint8_t* dst[], int dstStride[]){

	rgb24toyv12(
		src[0], 
		dst[0]+ srcSliceY    *dstStride[0], 
		dst[1]+(srcSliceY>>1)*dstStride[1], 
		dst[2]+(srcSliceY>>1)*dstStride[2],
		c->srcW, srcSliceH, 
		dstStride[0], dstStride[1], srcStride[0]);
1580
	return srcSliceH;
M
Michael Niedermayer 已提交
1581 1582
}

1583
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1584 1585 1586 1587
             int srcSliceH, uint8_t* dst[], int dstStride[]){
	int i;

	/* copy Y */
1588
	if(srcStride[0]==dstStride[0] && srcStride[0] > 0) 
1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608
		memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
	else{
		uint8_t *srcPtr= src[0];
		uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;

		for(i=0; i<srcSliceH; i++)
		{
			memcpy(dstPtr, srcPtr, c->srcW);
			srcPtr+= srcStride[0];
			dstPtr+= dstStride[0];
		}
	}

	if(c->dstFormat==IMGFMT_YV12){
		planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
		planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
	}else{
		planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
		planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
	}
1609
	return srcSliceH;
1610 1611
}

1612 1613 1614
/**
 * bring pointers in YUV order instead of YVU
 */
1615 1616
static inline void sws_orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
	if(format == IMGFMT_YV12 || format == IMGFMT_YVU9
1617
           || format == IMGFMT_444P || format == IMGFMT_422P || format == IMGFMT_411P){
1618
		sortedP[0]= p[0];
1619 1620
		sortedP[1]= p[2];
		sortedP[2]= p[1];
M
Michael Niedermayer 已提交
1621
		sortedStride[0]= stride[0];
1622 1623
		sortedStride[1]= stride[2];
		sortedStride[2]= stride[1];
1624
	}
1625
	else if(isPacked(format) || isGray(format) || format == IMGFMT_Y8)
1626 1627 1628 1629
	{
		sortedP[0]= p[0];
		sortedP[1]= 
		sortedP[2]= NULL;
M
Michael Niedermayer 已提交
1630
		sortedStride[0]= stride[0];
1631 1632 1633
		sortedStride[1]= 
		sortedStride[2]= 0;
	}
1634
	else if(format == IMGFMT_I420 || format == IMGFMT_IYUV)
1635 1636
	{
		sortedP[0]= p[0];
1637 1638
		sortedP[1]= p[1];
		sortedP[2]= p[2];
M
Michael Niedermayer 已提交
1639
		sortedStride[0]= stride[0];
1640 1641
		sortedStride[1]= stride[1];
		sortedStride[2]= stride[2];
V
Ville Syrjälä 已提交
1642 1643 1644 1645 1646 1647 1648 1649 1650
	}
	else if(format == IMGFMT_NV12 || format == IMGFMT_NV21)
	{
		sortedP[0]= p[0];
		sortedP[1]= p[1];
		sortedP[2]= NULL;
		sortedStride[0]= stride[0];
		sortedStride[1]= stride[1];
		sortedStride[2]= 0;
1651 1652
	}else{
		MSG_ERR("internal error in orderYUV\n");
1653 1654
	}
}
M
Michael Niedermayer 已提交
1655

1656
/* unscaled copy like stuff (assumes nearly identical formats) */
M
cleanup  
Michael Niedermayer 已提交
1657 1658
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1659 1660 1661

	if(isPacked(c->srcFormat))
	{
1662
		if(dstStride[0]==srcStride[0] && srcStride[0] > 0)
1663 1664 1665 1666 1667 1668
			memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
		else
		{
			int i;
			uint8_t *srcPtr= src[0];
			uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1669 1670 1671
			int length=0;

			/* universal length finder */
1672 1673
			while(length+c->srcW <= ABS(dstStride[0]) 
			   && length+c->srcW <= ABS(srcStride[0])) length+= c->srcW;
1674
			ASSERT(length!=0);
1675 1676 1677 1678 1679 1680 1681 1682 1683 1684

			for(i=0; i<srcSliceH; i++)
			{
				memcpy(dstPtr, srcPtr, length);
				srcPtr+= srcStride[0];
				dstPtr+= dstStride[0];
			}
		}
	}
	else 
1685
	{ /* Planar YUV or gray */
1686 1687 1688
		int plane;
		for(plane=0; plane<3; plane++)
		{
1689 1690 1691
			int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
			int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
			int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1692 1693

			if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
A
Arpi 已提交
1694
			{
1695
				if(!isGray(c->dstFormat))
1696
					memset(dst[plane], 128, dstStride[plane]*height);
A
Arpi 已提交
1697
			}
1698 1699
			else
			{
1700
				if(dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1701 1702
					memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
				else
1703
				{
1704 1705 1706 1707 1708 1709 1710 1711 1712
					int i;
					uint8_t *srcPtr= src[plane];
					uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
					for(i=0; i<height; i++)
					{
						memcpy(dstPtr, srcPtr, length);
						srcPtr+= srcStride[plane];
						dstPtr+= dstStride[plane];
					}
1713 1714 1715 1716
				}
			}
		}
	}
1717
	return srcSliceH;
1718
}
1719

1720
static int remove_dup_fourcc(int fourcc)
A
Arpi 已提交
1721 1722 1723
{
	switch(fourcc)
	{
1724 1725
	    case IMGFMT_I420:
	    case IMGFMT_IYUV: return IMGFMT_YV12;
A
Arpi 已提交
1726
	    case IMGFMT_Y8  : return IMGFMT_Y800;
1727
	    case IMGFMT_IF09: return IMGFMT_YVU9;
A
Arpi 已提交
1728 1729 1730 1731
	    default: return fourcc;
	}
}

M
Michael Niedermayer 已提交
1732 1733
static void getSubSampleFactors(int *h, int *v, int format){
	switch(format){
M
Michael Niedermayer 已提交
1734
	case IMGFMT_UYVY:
M
Michael Niedermayer 已提交
1735 1736 1737 1738 1739
	case IMGFMT_YUY2:
		*h=1;
		*v=0;
		break;
	case IMGFMT_YV12:
1740
	case IMGFMT_Y800: //FIXME remove after different subsamplings are fully implemented
V
Ville Syrjälä 已提交
1741 1742
	case IMGFMT_NV12:
	case IMGFMT_NV21:
M
Michael Niedermayer 已提交
1743 1744 1745 1746 1747 1748 1749
		*h=1;
		*v=1;
		break;
	case IMGFMT_YVU9:
		*h=2;
		*v=2;
		break;
1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761
	case IMGFMT_444P:
		*h=0;
		*v=0;
		break;
	case IMGFMT_422P:
		*h=1;
		*v=0;
		break;
	case IMGFMT_411P:
		*h=2;
		*v=0;
		break;
M
Michael Niedermayer 已提交
1762 1763 1764 1765 1766 1767 1768
	default:
		*h=0;
		*v=0;
		break;
	}
}

1769 1770 1771 1772 1773
static uint16_t roundToInt16(int64_t f){
	int r= (f + (1<<15))>>16;
	     if(r<-0x7FFF) return 0x8000;
	else if(r> 0x7FFF) return 0x7FFF;
	else               return r;
1774 1775 1776
}

/**
1777
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1778
 * @param fullRange if 1 then the luma range is 0..255 if 0 its 16..235
1779
 * @return -1 if not supported
1780
 */
1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
	int64_t crv =  inv_table[0];
	int64_t cbu =  inv_table[1];
	int64_t cgu = -inv_table[2];
	int64_t cgv = -inv_table[3];
	int64_t cy  = 1<<16;
	int64_t oy  = 0;

	if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
	memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
	memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);

	c->brightness= brightness;
	c->contrast  = contrast;
	c->saturation= saturation;
	c->srcRange  = srcRange;
	c->dstRange  = dstRange;
1798 1799 1800 1801

	c->uOffset=   0x0400040004000400LL;
	c->vOffset=   0x0400040004000400LL;

1802 1803 1804
	if(!srcRange){
		cy= (cy*255) / 219;
		oy= 16<<16;
1805 1806
	}

1807 1808 1809 1810 1811
	cy = (cy *contrast             )>>16;
	crv= (crv*contrast * saturation)>>32;
	cbu= (cbu*contrast * saturation)>>32;
	cgu= (cgu*contrast * saturation)>>32;
	cgv= (cgv*contrast * saturation)>>32;
1812

1813
	oy -= 256*brightness;
1814 1815 1816 1817 1818 1819 1820

	c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
	c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
	c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
	c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
	c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
	c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1821 1822 1823

	yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
	//FIXME factorize
M
Michael Niedermayer 已提交
1824

1825 1826 1827
#ifdef COMPILE_ALTIVEC
	if (c->flags & SWS_CPU_CAPS_ALTIVEC)
	    yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
M
Michael Niedermayer 已提交
1828
#endif	
1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846
	return 0;
}

/**
 * @return -1 if not supported
 */
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
	if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;

	*inv_table = c->srcColorspaceTable;
	*table     = c->dstColorspaceTable;
	*srcRange  = c->srcRange;
	*dstRange  = c->dstRange;
	*brightness= c->brightness;
	*contrast  = c->contrast;
	*saturation= c->saturation;
	
	return 0;	
1847 1848
}

1849
SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int dstH, int origDstFormat, int flags,
1850
                         SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
1851 1852 1853

	SwsContext *c;
	int i;
M
Michael Niedermayer 已提交
1854
	int usesVFilter, usesHFilter;
1855
	int unscaled, needsDither;
1856
	int srcFormat, dstFormat;
1857
	SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1858
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1859
	if(flags & SWS_CPU_CAPS_MMX)
1860 1861
		asm volatile("emms\n\t"::: "memory");
#endif
1862 1863

#ifndef RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
1864
	flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
1865 1866 1867 1868 1869 1870
#ifdef HAVE_MMX2
	flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
#elif defined (HAVE_3DNOW)
	flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
#elif defined (HAVE_MMX)
	flags |= SWS_CPU_CAPS_MMX;
1871 1872
#elif defined (HAVE_ALTIVEC)
	flags |= SWS_CPU_CAPS_ALTIVEC;
1873 1874 1875
#endif
#endif
	if(clip_table[512] != 255) globalInit();
1876
	if(rgb15to16 == NULL) sws_rgb2rgb_init(flags);
1877

G
Gabucino 已提交
1878
	/* avoid duplicate Formats, so we don't need to check to much */
1879 1880
	srcFormat = remove_dup_fourcc(origSrcFormat);
	dstFormat = remove_dup_fourcc(origDstFormat);
1881 1882

	unscaled = (srcW == dstW && srcH == dstH);
1883 1884 1885
	needsDither= (isBGR(dstFormat) || isRGB(dstFormat)) 
		     && (dstFormat&0xFF)<24
		     && ((dstFormat&0xFF)<(srcFormat&0xFF) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
1886 1887

	if(!isSupportedIn(srcFormat)) 
1888
	{
1889
		MSG_ERR("swScaler: %s is not supported as input format\n", sws_format_name(srcFormat));
1890 1891 1892 1893
		return NULL;
	}
	if(!isSupportedOut(dstFormat))
	{
1894
		MSG_ERR("swScaler: %s is not supported as output format\n", sws_format_name(dstFormat));
1895
		return NULL;
1896
	}
1897

1898
	/* sanity check */
1899 1900
	if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
	{
A
Arpi 已提交
1901
		 MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", 
1902 1903 1904
			srcW, srcH, dstW, dstH);
		return NULL;
	}
1905

1906 1907 1908
	if(!dstFilter) dstFilter= &dummyFilter;
	if(!srcFilter) srcFilter= &dummyFilter;

1909
	c= av_malloc(sizeof(SwsContext));
1910
	memset(c, 0, sizeof(SwsContext));
1911 1912 1913 1914 1915

	c->srcW= srcW;
	c->srcH= srcH;
	c->dstW= dstW;
	c->dstH= dstH;
1916 1917
	c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
	c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
1918 1919 1920
	c->flags= flags;
	c->dstFormat= dstFormat;
	c->srcFormat= srcFormat;
1921 1922
	c->origDstFormat= origDstFormat;
	c->origSrcFormat= origSrcFormat;
M
Michael Niedermayer 已提交
1923
        c->vRounder= 4* 0x0001000100010001ULL;
1924

M
Michael Niedermayer 已提交
1925 1926 1927 1928 1929 1930 1931 1932 1933
	usesHFilter= usesVFilter= 0;
	if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
	if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
	if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
	if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
	if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
	if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
	if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
	if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
1934 1935 1936 1937 1938 1939 1940

	getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
	getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);

	// reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
	if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;

1941 1942 1943
	// drop some chroma lines if the user wants it
	c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
	c->chrSrcVSubSample+= c->vChrDrop;
1944

1945
	// drop every 2. pixel for chroma calculation unless user wants full chroma
1946 1947 1948
	if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)) 
		c->chrSrcHSubSample=1;

1949 1950 1951 1952 1953 1954 1955 1956
	if(param){
		c->param[0] = param[0];
		c->param[1] = param[1];
	}else{
		c->param[0] =
		c->param[1] = SWS_PARAM_DEFAULT;
	}

1957 1958
	c->chrIntHSubSample= c->chrDstHSubSample;
	c->chrIntVSubSample= c->chrSrcVSubSample;
1959

1960 1961 1962 1963 1964
	// note the -((-x)>>y) is so that we allways round toward +inf
	c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
	c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
	c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
	c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
1965 1966

	sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], 0, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, 0, 0, 1<<16, 1<<16); 
M
Michael Niedermayer 已提交
1967

M
Michael Niedermayer 已提交
1968
	/* unscaled special Cases */
M
Michael Niedermayer 已提交
1969
	if(unscaled && !usesHFilter && !usesVFilter)
1970
	{
A
Arpi 已提交
1971
		/* yv12_to_nv12 */
V
Ville Syrjälä 已提交
1972
		if(srcFormat == IMGFMT_YV12 && (dstFormat == IMGFMT_NV12 || dstFormat == IMGFMT_NV21))
A
Arpi 已提交
1973 1974 1975
		{
			c->swScale= PlanarToNV12Wrapper;
		}
1976
		/* yuv2bgr */
1977
		if((srcFormat==IMGFMT_YV12 || srcFormat==IMGFMT_422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
1978
		{
1979
			c->swScale= yuv2rgb_get_func_ptr(c);
1980
		}
1981
		
1982
		if( srcFormat==IMGFMT_YVU9 && dstFormat==IMGFMT_YV12 )
1983 1984 1985 1986
		{
			c->swScale= yvu9toyv12Wrapper;
		}

M
Michael Niedermayer 已提交
1987 1988 1989
		/* bgr24toYV12 */
		if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
			c->swScale= bgr24toyv12Wrapper;
1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
		
		/* rgb/bgr -> rgb/bgr (no dither needed forms) */
		if(   (isBGR(srcFormat) || isRGB(srcFormat))
		   && (isBGR(dstFormat) || isRGB(dstFormat)) 
		   && !needsDither)
			c->swScale= rgb2rgbWrapper;

		/* LQ converters if -sws 0 or -sws 4*/
		if(c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
			/* rgb/bgr -> rgb/bgr (dither needed forms) */
			if(  (isBGR(srcFormat) || isRGB(srcFormat))
			  && (isBGR(dstFormat) || isRGB(dstFormat)) 
			  && needsDither)
				c->swScale= rgb2rgbWrapper;
2004 2005

			/* yv12_to_yuy2 */
A
Alex Beregszaszi 已提交
2006 2007
			if(srcFormat == IMGFMT_YV12 && 
			    (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY))
2008
			{
A
Alex Beregszaszi 已提交
2009 2010 2011 2012
				if (dstFormat == IMGFMT_YUY2)
				    c->swScale= PlanarToYuy2Wrapper;
				else
				    c->swScale= PlanarToUyvyWrapper;
2013
			}
2014
		}
M
Michael Niedermayer 已提交
2015

2016
#ifdef COMPILE_ALTIVEC
2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027
		if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
		    ((srcFormat == IMGFMT_YV12 && 
		      (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY)))) {
		  // unscaled YV12 -> packed YUV, we want speed
		  if (dstFormat == IMGFMT_YUY2)
		    c->swScale= yv12toyuy2_unscaled_altivec;
		  else
		    c->swScale= yv12touyvy_unscaled_altivec;
		}
#endif

M
cleanup  
Michael Niedermayer 已提交
2028 2029 2030 2031 2032 2033 2034 2035 2036
		/* simple copy */
		if(   srcFormat == dstFormat
		   || (isPlanarYUV(srcFormat) && isGray(dstFormat))
		   || (isPlanarYUV(dstFormat) && isGray(srcFormat))
		  )
		{
			c->swScale= simpleCopy;
		}

2037
		if(c->swScale){
M
Michael Niedermayer 已提交
2038
			if(flags&SWS_PRINT_INFO)
A
Arpi 已提交
2039
				MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2040
					sws_format_name(srcFormat), sws_format_name(dstFormat));
M
Michael Niedermayer 已提交
2041 2042
			return c;
		}
2043 2044
	}

2045
	if(flags & SWS_CPU_CAPS_MMX2)
2046 2047 2048 2049 2050
	{
		c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
		if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
		{
			if(flags&SWS_PRINT_INFO)
A
Arpi 已提交
2051
				MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2052
		}
M
Michael Niedermayer 已提交
2053
		if(usesHFilter) c->canMMX2BeUsed=0;
2054 2055 2056 2057
	}
	else
		c->canMMX2BeUsed=0;

2058 2059 2060
	c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
	c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;

2061 2062 2063 2064 2065 2066 2067 2068
	// match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
	// but only for the FAST_BILINEAR mode otherwise do correct scaling
	// n-2 is the last chrominance sample available
	// this is not perfect, but noone shuld notice the difference, the more correct variant
	// would be like the vertical one, but that would require some special code for the
	// first and last pixel
	if(flags&SWS_FAST_BILINEAR)
	{
2069 2070 2071 2072 2073
		if(c->canMMX2BeUsed)
		{
			c->lumXInc+= 20;
			c->chrXInc+= 20;
		}
G
Gabucino 已提交
2074
		//we don't use the x86asm scaler if mmx is available
2075
		else if(flags & SWS_CPU_CAPS_MMX)
2076 2077 2078 2079
		{
			c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
			c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
		}
2080 2081 2082 2083
	}

	/* precalculate horizontal scaler filter coefficients */
	{
2084 2085 2086 2087
		const int filterAlign=
		  (flags & SWS_CPU_CAPS_MMX) ? 4 :
		  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
		  1;
2088

2089
		initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2090 2091
				 srcW      ,       dstW, filterAlign, 1<<14,
				 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2092
				 srcFilter->lumH, dstFilter->lumH, c->param);
2093
		initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2094 2095
				 c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
				 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2096
				 srcFilter->chrH, dstFilter->chrH, c->param);
2097

2098
#if defined(ARCH_X86) || defined(ARCH_X86_64)
G
Gabucino 已提交
2099
// can't downscale !!!
2100 2101
		if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
		{
2102
#define MAX_FUNNY_CODE_SIZE 10000
2103
#ifdef MAP_ANONYMOUS
2104 2105 2106
			c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
			c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
#else
2107 2108
			c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
			c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2109 2110
#endif

2111 2112 2113 2114
			c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
			c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
			c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
			c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2115 2116 2117

			initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
			initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2118 2119 2120 2121 2122 2123 2124
		}
#endif
	} // Init Horizontal stuff



	/* precalculate vertical scaler filter coefficients */
2125 2126
	{
		const int filterAlign=
2127
		  (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2128 2129 2130 2131 2132 2133
		  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
		  1;

		initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
				srcH      ,        dstH, filterAlign, (1<<12)-4,
				(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2134
				srcFilter->lumV, dstFilter->lumV, c->param);
2135 2136 2137
		initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
				c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
				(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2138
				srcFilter->chrV, dstFilter->chrV, c->param);
2139 2140

#ifdef HAVE_ALTIVEC
2141
		c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2142
		c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2143 2144 2145 2146 2147 2148 2149 2150

		for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
                  int j;
		  short *p = (short *)&c->vYCoeffsBank[i];
		  for (j=0;j<8;j++)
		    p[j] = c->vLumFilter[i];
		}

2151
		for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2152 2153 2154 2155 2156 2157
                  int j;
		  short *p = (short *)&c->vCCoeffsBank[i];
		  for (j=0;j<8;j++)
		    p[j] = c->vChrFilter[i];
		}
#endif
2158
	}
2159

G
Gabucino 已提交
2160
	// Calculate Buffer Sizes so that they won't run out while handling these damn slices
2161 2162 2163 2164 2165 2166
	c->vLumBufSize= c->vLumFilterSize;
	c->vChrBufSize= c->vChrFilterSize;
	for(i=0; i<dstH; i++)
	{
		int chrI= i*c->chrDstH / dstH;
		int nextSlice= MAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2167
				 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2168 2169 2170

		nextSlice>>= c->chrSrcVSubSample;
		nextSlice<<= c->chrSrcVSubSample;
2171 2172
		if(c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
			c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2173 2174
		if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
			c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2175 2176 2177
	}

	// allocate pixbufs (we use dynamic allocation because otherwise we would need to
2178 2179
	c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
	c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2180
	//Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2181
	/* align at 16 bytes for AltiVec */
2182
	for(i=0; i<c->vLumBufSize; i++)
2183
		c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_malloc(4000);
2184
	for(i=0; i<c->vChrBufSize; i++)
2185
		c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc(8000);
2186 2187 2188 2189 2190 2191 2192 2193 2194 2195

	//try to avoid drawing green stuff between the right end and the stride end
	for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
	for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);

	ASSERT(c->chrDstH <= dstH)

	if(flags&SWS_PRINT_INFO)
	{
#ifdef DITHER1XBPP
2196 2197 2198
		char *dither= " dithered";
#else
		char *dither= "";
2199 2200
#endif
		if(flags&SWS_FAST_BILINEAR)
A
Arpi 已提交
2201
			MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
2202
		else if(flags&SWS_BILINEAR)
A
Arpi 已提交
2203
			MSG_INFO("\nSwScaler: BILINEAR scaler, ");
2204
		else if(flags&SWS_BICUBIC)
A
Arpi 已提交
2205
			MSG_INFO("\nSwScaler: BICUBIC scaler, ");
2206
		else if(flags&SWS_X)
A
Arpi 已提交
2207
			MSG_INFO("\nSwScaler: Experimental scaler, ");
2208
		else if(flags&SWS_POINT)
A
Arpi 已提交
2209
			MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
2210
		else if(flags&SWS_AREA)
A
Arpi 已提交
2211
			MSG_INFO("\nSwScaler: Area Averageing scaler, ");
2212
		else if(flags&SWS_BICUBLIN)
M
Michael Niedermayer 已提交
2213 2214 2215 2216 2217 2218 2219 2220 2221
			MSG_INFO("\nSwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
		else if(flags&SWS_GAUSS)
			MSG_INFO("\nSwScaler: Gaussian scaler, ");
		else if(flags&SWS_SINC)
			MSG_INFO("\nSwScaler: Sinc scaler, ");
		else if(flags&SWS_LANCZOS)
			MSG_INFO("\nSwScaler: Lanczos scaler, ");
		else if(flags&SWS_SPLINE)
			MSG_INFO("\nSwScaler: Bicubic spline scaler, ");
2222
		else
A
Arpi 已提交
2223
			MSG_INFO("\nSwScaler: ehh flags invalid?! ");
2224

A
Arpi 已提交
2225 2226
		if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
			MSG_INFO("from %s to%s %s ", 
2227
				sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
A
Arpi 已提交
2228 2229
		else
			MSG_INFO("from %s to %s ", 
2230
				sws_format_name(srcFormat), sws_format_name(dstFormat));
2231

2232
		if(flags & SWS_CPU_CAPS_MMX2)
A
Arpi 已提交
2233
			MSG_INFO("using MMX2\n");
2234
		else if(flags & SWS_CPU_CAPS_3DNOW)
A
Arpi 已提交
2235
			MSG_INFO("using 3DNOW\n");
2236
		else if(flags & SWS_CPU_CAPS_MMX)
A
Arpi 已提交
2237
			MSG_INFO("using MMX\n");
2238 2239 2240
		else if(flags & SWS_CPU_CAPS_ALTIVEC)
			MSG_INFO("using AltiVec\n");
		else 
A
Arpi 已提交
2241
			MSG_INFO("using C\n");
2242 2243
	}

2244
	if(flags & SWS_PRINT_INFO)
2245
	{
2246
		if(flags & SWS_CPU_CAPS_MMX)
2247 2248
		{
			if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
A
Arpi 已提交
2249
				MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2250 2251 2252
			else
			{
				if(c->hLumFilterSize==4)
A
Arpi 已提交
2253
					MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2254
				else if(c->hLumFilterSize==8)
A
Arpi 已提交
2255
					MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2256
				else
A
Arpi 已提交
2257
					MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2258 2259

				if(c->hChrFilterSize==4)
A
Arpi 已提交
2260
					MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2261
				else if(c->hChrFilterSize==8)
A
Arpi 已提交
2262
					MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2263
				else
A
Arpi 已提交
2264
					MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2265 2266 2267 2268
			}
		}
		else
		{
2269
#if defined(ARCH_X86) || defined(ARCH_X86_64)
A
Arpi 已提交
2270
			MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
2271 2272
#else
			if(flags & SWS_FAST_BILINEAR)
A
Arpi 已提交
2273
				MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2274
			else
A
Arpi 已提交
2275
				MSG_V("SwScaler: using C scaler for horizontal scaling\n");
2276 2277
#endif
		}
2278
		if(isPlanarYUV(dstFormat))
2279 2280
		{
			if(c->vLumFilterSize==1)
2281
				MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2282
			else
2283
				MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2284 2285 2286 2287
		}
		else
		{
			if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
A
Arpi 已提交
2288
				MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2289
				       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2290
			else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
2291
				MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2292
			else
2293
				MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2294 2295 2296
		}

		if(dstFormat==IMGFMT_BGR24)
A
Arpi 已提交
2297
			MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
2298
				(flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2299
		else if(dstFormat==IMGFMT_BGR32)
2300
			MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2301
		else if(dstFormat==IMGFMT_BGR16)
2302
			MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2303
		else if(dstFormat==IMGFMT_BGR15)
2304
			MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2305

A
Arpi 已提交
2306
		MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2307
	}
2308
	if(flags & SWS_PRINT_INFO)
2309
	{
A
Arpi 已提交
2310
		MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2311
			c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
A
Arpi 已提交
2312
		MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2313 2314
			c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
	}
2315

2316
	c->swScale= getSwsFunc(flags);
2317 2318 2319
	return c;
}

2320
/**
G
Gabucino 已提交
2321
 * swscale warper, so we don't need to export the SwsContext.
2322 2323 2324 2325
 * assumes planar YUV to be in YUV order instead of YVU
 */
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353
	if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
	    MSG_ERR("swScaler: slices start in the middle!\n");
	    return 0;
	}
	if (c->sliceDir == 0) {
	    if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
	}

	// copy strides, so they can safely be modified
	if (c->sliceDir == 1) {
	    // slices go from top to bottom
	    int srcStride2[3]= {srcStride[0], srcStride[1], srcStride[2]};
	    int dstStride2[3]= {dstStride[0], dstStride[1], dstStride[2]};
	    return c->swScale(c, src, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
	} else {
	    // slices go from bottom to top => we flip the image internally
	    uint8_t* src2[3]= {src[0] + (srcSliceH-1)*srcStride[0],
			       src[1] + ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1],
			       src[2] + ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2]
	    };
	    uint8_t* dst2[3]= {dst[0] + (c->dstH-1)*dstStride[0],
			       dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
			       dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
	    int srcStride2[3]= {-srcStride[0], -srcStride[1], -srcStride[2]};
	    int dstStride2[3]= {-dstStride[0], -dstStride[1], -dstStride[2]};
	    
	    return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
	}
2354 2355
}

2356
/**
G
Gabucino 已提交
2357
 * swscale warper, so we don't need to export the SwsContext
2358
 */
M
cleanup  
Michael Niedermayer 已提交
2359 2360 2361 2362 2363 2364
int sws_scale(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
                           int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
	int srcStride[3];
	int dstStride[3];
	uint8_t *src[3];
	uint8_t *dst[3];
2365 2366
	sws_orderYUV(c->origSrcFormat, src, srcStride, srcParam, srcStrideParam);
	sws_orderYUV(c->origDstFormat, dst, dstStride, dstParam, dstStrideParam);
M
cleanup  
Michael Niedermayer 已提交
2367
//printf("sws: slice %d %d\n", srcSliceY, srcSliceH);
2368

M
10l  
Michael Niedermayer 已提交
2369
	return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2370 2371
}

2372 2373 2374 2375 2376
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, 
				float lumaSharpen, float chromaSharpen,
				float chromaHShift, float chromaVShift,
				int verbose)
{
2377
	SwsFilter *filter= av_malloc(sizeof(SwsFilter));
2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395

	if(lumaGBlur!=0.0){
		filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
		filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
	}else{
		filter->lumH= sws_getIdentityVec();
		filter->lumV= sws_getIdentityVec();
	}

	if(chromaGBlur!=0.0){
		filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
		filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
	}else{
		filter->chrH= sws_getIdentityVec();
		filter->chrV= sws_getIdentityVec();
	}

	if(chromaSharpen!=0.0){
2396 2397 2398 2399 2400
		SwsVector *id= sws_getIdentityVec();
                sws_scaleVec(filter->chrH, -chromaSharpen);
                sws_scaleVec(filter->chrV, -chromaSharpen);
		sws_addVec(filter->chrH, id);
		sws_addVec(filter->chrV, id);
2401 2402 2403 2404
		sws_freeVec(id);
	}

	if(lumaSharpen!=0.0){
2405 2406 2407 2408 2409
		SwsVector *id= sws_getIdentityVec();
                sws_scaleVec(filter->lumH, -lumaSharpen);
                sws_scaleVec(filter->lumV, -lumaSharpen);
		sws_addVec(filter->lumH, id);
		sws_addVec(filter->lumV, id);
2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429
		sws_freeVec(id);
	}

	if(chromaHShift != 0.0)
		sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));

	if(chromaVShift != 0.0)
		sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));

	sws_normalizeVec(filter->chrH, 1.0);
	sws_normalizeVec(filter->chrV, 1.0);
	sws_normalizeVec(filter->lumH, 1.0);
	sws_normalizeVec(filter->lumV, 1.0);

	if(verbose) sws_printVec(filter->chrH);
	if(verbose) sws_printVec(filter->lumH);

        return filter;
}

2430 2431 2432 2433
/**
 * returns a normalized gaussian curve used to filter stuff
 * quality=3 is high quality, lowwer is lowwer quality
 */
2434
SwsVector *sws_getGaussianVec(double variance, double quality){
2435 2436
	const int length= (int)(variance*quality + 0.5) | 1;
	int i;
2437
	double *coeff= av_malloc(length*sizeof(double));
2438
	double middle= (length-1)*0.5;
2439
	SwsVector *vec= av_malloc(sizeof(SwsVector));
2440 2441 2442

	vec->coeff= coeff;
	vec->length= length;
2443 2444 2445 2446 2447 2448 2449

	for(i=0; i<length; i++)
	{
		double dist= i-middle;
		coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
	}

2450
	sws_normalizeVec(vec, 1.0);
2451 2452

	return vec;
2453 2454
}

2455
SwsVector *sws_getConstVec(double c, int length){
2456
	int i;
2457
	double *coeff= av_malloc(length*sizeof(double));
2458
	SwsVector *vec= av_malloc(sizeof(SwsVector));
2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469

	vec->coeff= coeff;
	vec->length= length;

	for(i=0; i<length; i++)
		coeff[i]= c;

	return vec;
}


2470
SwsVector *sws_getIdentityVec(void){
2471
        return sws_getConstVec(1.0, 1);
2472 2473
}

2474
double sws_dcVec(SwsVector *a){
2475
	int i;
2476
        double sum=0;
2477

2478 2479
	for(i=0; i<a->length; i++)
		sum+= a->coeff[i];
2480

2481
        return sum;
2482 2483
}

2484
void sws_scaleVec(SwsVector *a, double scalar){
2485 2486 2487 2488 2489 2490
	int i;

	for(i=0; i<a->length; i++)
		a->coeff[i]*= scalar;
}

2491 2492 2493 2494
void sws_normalizeVec(SwsVector *a, double height){
        sws_scaleVec(a, height/sws_dcVec(a));
}

2495
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2496
	int length= a->length + b->length - 1;
2497
	double *coeff= av_malloc(length*sizeof(double));
2498
	int i, j;
2499
	SwsVector *vec= av_malloc(sizeof(SwsVector));
2500 2501 2502

	vec->coeff= coeff;
	vec->length= length;
2503 2504 2505

	for(i=0; i<length; i++) coeff[i]= 0.0;

2506
	for(i=0; i<a->length; i++)
2507
	{
2508
		for(j=0; j<b->length; j++)
2509
		{
2510
			coeff[i+j]+= a->coeff[i]*b->coeff[j];
2511 2512 2513
		}
	}

2514
	return vec;
2515 2516
}

2517
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2518
	int length= MAX(a->length, b->length);
2519
	double *coeff= av_malloc(length*sizeof(double));
2520
	int i;
2521
	SwsVector *vec= av_malloc(sizeof(SwsVector));
2522 2523 2524

	vec->coeff= coeff;
	vec->length= length;
2525 2526 2527

	for(i=0; i<length; i++) coeff[i]= 0.0;

2528 2529 2530 2531
	for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
	for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];

	return vec;
2532
}
2533

2534
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2535
	int length= MAX(a->length, b->length);
2536
	double *coeff= av_malloc(length*sizeof(double));
2537
	int i;
2538
	SwsVector *vec= av_malloc(sizeof(SwsVector));
2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551

	vec->coeff= coeff;
	vec->length= length;

	for(i=0; i<length; i++) coeff[i]= 0.0;

	for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
	for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];

	return vec;
}

/* shift left / or right if "shift" is negative */
2552
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2553
	int length= a->length + ABS(shift)*2;
2554
	double *coeff= av_malloc(length*sizeof(double));
2555
	int i;
2556
	SwsVector *vec= av_malloc(sizeof(SwsVector));
2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570

	vec->coeff= coeff;
	vec->length= length;

	for(i=0; i<length; i++) coeff[i]= 0.0;

	for(i=0; i<a->length; i++)
	{
		coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
	}

	return vec;
}

2571 2572
void sws_shiftVec(SwsVector *a, int shift){
	SwsVector *shifted= sws_getShiftedVec(a, shift);
2573
	av_free(a->coeff);
2574 2575
	a->coeff= shifted->coeff;
	a->length= shifted->length;
2576
	av_free(shifted);
2577 2578
}

2579 2580
void sws_addVec(SwsVector *a, SwsVector *b){
	SwsVector *sum= sws_sumVec(a, b);
2581
	av_free(a->coeff);
2582 2583
	a->coeff= sum->coeff;
	a->length= sum->length;
2584
	av_free(sum);
2585 2586
}

2587 2588
void sws_subVec(SwsVector *a, SwsVector *b){
	SwsVector *diff= sws_diffVec(a, b);
2589
	av_free(a->coeff);
2590 2591
	a->coeff= diff->coeff;
	a->length= diff->length;
2592
	av_free(diff);
2593 2594
}

2595 2596
void sws_convVec(SwsVector *a, SwsVector *b){
	SwsVector *conv= sws_getConvVec(a, b);
2597
	av_free(a->coeff);  
2598 2599
	a->coeff= conv->coeff;
	a->length= conv->length;
2600
	av_free(conv);
2601 2602
}

2603
SwsVector *sws_cloneVec(SwsVector *a){
2604
	double *coeff= av_malloc(a->length*sizeof(double));
2605
	int i;
2606
	SwsVector *vec= av_malloc(sizeof(SwsVector));
2607 2608 2609 2610 2611 2612 2613 2614 2615

	vec->coeff= coeff;
	vec->length= a->length;

	for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];

	return vec;
}

2616
void sws_printVec(SwsVector *a){
2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632
	int i;
	double max=0;
	double min=0;
	double range;

	for(i=0; i<a->length; i++)
		if(a->coeff[i]>max) max= a->coeff[i];

	for(i=0; i<a->length; i++)
		if(a->coeff[i]<min) min= a->coeff[i];

	range= max - min;

	for(i=0; i<a->length; i++)
	{
		int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
A
Arpi 已提交
2633 2634 2635
		MSG_DBG2("%1.3f ", a->coeff[i]);
		for(;x>0; x--) MSG_DBG2(" ");
		MSG_DBG2("|\n");
2636 2637 2638
	}
}

2639
void sws_freeVec(SwsVector *a){
2640
	if(!a) return;
2641
	av_free(a->coeff);
2642 2643
	a->coeff=NULL;
	a->length=0;
2644
	av_free(a);
2645 2646
}

2647 2648 2649 2650 2651 2652 2653
void sws_freeFilter(SwsFilter *filter){
	if(!filter) return;

	if(filter->lumH) sws_freeVec(filter->lumH);
	if(filter->lumV) sws_freeVec(filter->lumV);
	if(filter->chrH) sws_freeVec(filter->chrH);
	if(filter->chrV) sws_freeVec(filter->chrV);
2654
	av_free(filter);
2655 2656 2657
}


2658
void sws_freeContext(SwsContext *c){
2659 2660 2661 2662 2663
	int i;
	if(!c) return;

	if(c->lumPixBuf)
	{
2664
		for(i=0; i<c->vLumBufSize; i++)
2665
		{
2666
			av_free(c->lumPixBuf[i]);
2667 2668
			c->lumPixBuf[i]=NULL;
		}
2669
		av_free(c->lumPixBuf);
2670 2671 2672 2673 2674
		c->lumPixBuf=NULL;
	}

	if(c->chrPixBuf)
	{
2675
		for(i=0; i<c->vChrBufSize; i++)
2676
		{
2677
			av_free(c->chrPixBuf[i]);
2678 2679
			c->chrPixBuf[i]=NULL;
		}
2680
		av_free(c->chrPixBuf);
2681 2682 2683
		c->chrPixBuf=NULL;
	}

2684
	av_free(c->vLumFilter);
2685
	c->vLumFilter = NULL;
2686
	av_free(c->vChrFilter);
2687
	c->vChrFilter = NULL;
2688
	av_free(c->hLumFilter);
2689
	c->hLumFilter = NULL;
2690
	av_free(c->hChrFilter);
2691
	c->hChrFilter = NULL;
2692
#ifdef HAVE_ALTIVEC
2693
	av_free(c->vYCoeffsBank);
2694
	c->vYCoeffsBank = NULL;
2695
	av_free(c->vCCoeffsBank);
2696 2697
	c->vCCoeffsBank = NULL;
#endif
2698

2699
	av_free(c->vLumFilterPos);
2700
	c->vLumFilterPos = NULL;
2701
	av_free(c->vChrFilterPos);
2702
	c->vChrFilterPos = NULL;
2703
	av_free(c->hLumFilterPos);
2704
	c->hLumFilterPos = NULL;
2705
	av_free(c->hChrFilterPos);
2706 2707
	c->hChrFilterPos = NULL;

R
Reimar Döffinger 已提交
2708
#if defined(ARCH_X86) || defined(ARCH_X86_64)
2709
#ifdef MAP_ANONYMOUS
2710 2711 2712
	if(c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
	if(c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
#else
2713 2714
	av_free(c->funnyYCode);
	av_free(c->funnyUVCode);
2715 2716 2717
#endif
	c->funnyYCode=NULL;
	c->funnyUVCode=NULL;
R
Reimar Döffinger 已提交
2718
#endif
2719

2720
	av_free(c->lumMmx2Filter);
2721
	c->lumMmx2Filter=NULL;
2722
	av_free(c->chrMmx2Filter);
2723
	c->chrMmx2Filter=NULL;
2724
	av_free(c->lumMmx2FilterPos);
2725
	c->lumMmx2FilterPos=NULL;
2726
	av_free(c->chrMmx2FilterPos);
2727
	c->chrMmx2FilterPos=NULL;
2728
	av_free(c->yuvTable);
M
Michael Niedermayer 已提交
2729
	c->yuvTable=NULL;
2730

2731
	av_free(c);
2732 2733
}