rounding fixes

Originally committed as revision 11123 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
2003-10-15 11:21:54 +00:00 · 2003-10-15 11:21:54 +00:00 · 379a20360c
commit 379a20360c
parent 93cb9d7f73
3 changed files with 27 additions and 22 deletions
--- a/postproc/swscale.c
+++ b/postproc/swscale.c
@ -222,7 +222,7 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
 	int i;
 	for(i=0; i<dstW; i++)
 	{
-		int val=0;
+		int val=1<<18;
 		int j;
 		for(j=0; j<lumFilterSize; j++)
 			val += lumSrc[j][i] * lumFilter[j];
@ -233,8 +233,8 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
 	if(uDest != NULL)
 		for(i=0; i<chrDstW; i++)
 		{
-			int u=0;
-			int v=0;
+			int u=1<<18;
+			int v=1<<18;
 			int j;
 			for(j=0; j<chrFilterSize; j++)
 			{
@ -251,10 +251,10 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
 #define YSCALE_YUV_2_PACKEDX_C(type) \
 		for(i=0; i<(dstW>>1); i++){\
 			int j;\
-			int Y1=0;\
-			int Y2=0;\
-			int U=0;\
-			int V=0;\
+			int Y1=1<<18;\
+			int Y2=1<<18;\
+			int U=1<<18;\
+			int V=1<<18;\
 			type *r, *b, *g;\
 			const int i2= 2*i;\
 			\
@ -621,8 +621,8 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l
 			int acc=0;
 			for(i=0; i<dstW-1; i+=2){
 				int j;
-				int Y1=0;
-				int Y2=0;
+				int Y1=1<<18;
+				int Y2=1<<18;

 				for(j=0; j<lumFilterSize; j++)
 				{
@ -1093,7 +1093,7 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
 		scale/= sum;
 		for(j=0; j<*outFilterSize; j++)
 		{
-			(*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale);
+			(*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale + 0.5);
 		}
 	}
 	
@ -1772,6 +1772,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
 	c->srcFormat= srcFormat;
 	c->origDstFormat= origDstFormat;
 	c->origSrcFormat= origSrcFormat;
+        c->vRounder= 4* 0x0001000100010001ULL;

 	usesFilter=0;
 	if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
--- a/postproc/swscale_internal.h
+++ b/postproc/swscale_internal.h
@ -109,10 +109,11 @@ typedef struct SwsContext{
 #define Y_OFFSET     "8*8"
 #define U_OFFSET     "9*8"
 #define V_OFFSET     "10*8"
-#define LUM_MMX_FILTER_OFFSET "11*8"
-#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
-#define DSTW_OFFSET  "11*8+4*4*256*2"
-#define ESP_OFFSET  "11*8+4*4*256*2+4"
+#define VROUNDER_OFFSET "11*8"  
+#define LUM_MMX_FILTER_OFFSET "12*8"
+#define CHR_MMX_FILTER_OFFSET "12*8+4*4*256"
+#define DSTW_OFFSET  "12*8+4*4*256*2"
+#define ESP_OFFSET  "12*8+4*4*256*2+4"
                  
 	uint64_t redDither   __attribute__((aligned(8)));
 	uint64_t greenDither __attribute__((aligned(8)));
@ -126,6 +127,7 @@ typedef struct SwsContext{
 	uint64_t yOffset     __attribute__((aligned(8)));
 	uint64_t uOffset     __attribute__((aligned(8)));
 	uint64_t vOffset     __attribute__((aligned(8)));
+	uint64_t vRounder     __attribute__((aligned(8)));
 	int32_t  lumMmxFilter[4*MAX_FILTER_SIZE];
 	int32_t  chrMmxFilter[4*MAX_FILTER_SIZE];
 	int dstW;
--- a/postproc/swscale_template.c
+++ b/postproc/swscale_template.c
@ -61,8 +61,8 @@

 #define YSCALEYUV2YV12X(x, offset) \
 			"xorl %%eax, %%eax		\n\t"\
-			"pxor %%mm3, %%mm3		\n\t"\
-			"pxor %%mm4, %%mm4		\n\t"\
+			"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+			"movq %%mm3, %%mm4		\n\t"\
 			"leal " offset "(%0), %%edx	\n\t"\
 			"movl (%%edx), %%esi		\n\t"\
 			".balign 16			\n\t" /* FIXME Unroll? */\
@ -84,8 +84,8 @@
 			MOVNTQ(%%mm3, (%1, %%eax))\
 			"addl $8, %%eax			\n\t"\
 			"cmpl %2, %%eax			\n\t"\
-			"pxor %%mm3, %%mm3		\n\t"\
-			"pxor %%mm4, %%mm4		\n\t"\
+			"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+			"movq %%mm3, %%mm4		\n\t"\
 			"leal " offset "(%0), %%edx	\n\t"\
 			"movl (%%edx), %%esi		\n\t"\
 			"jb 1b				\n\t"
@ -117,8 +117,8 @@
 		"1:				\n\t"\
 		"leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx	\n\t"\
 		"movl (%%edx), %%esi		\n\t"\
-		"pxor %%mm3, %%mm3		\n\t"\
-		"pxor %%mm4, %%mm4		\n\t"\
+		"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+		"movq %%mm3, %%mm4		\n\t"\
 		".balign 16			\n\t"\
 		"2:				\n\t"\
 		"movq 8(%%edx), %%mm0		\n\t" /* filterCoeff */\
@ -135,8 +135,8 @@
 \
 		"leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx	\n\t"\
 		"movl (%%edx), %%esi		\n\t"\
-		"pxor %%mm1, %%mm1		\n\t"\
-		"pxor %%mm7, %%mm7		\n\t"\
+		"movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
+		"movq %%mm1, %%mm7		\n\t"\
 		".balign 16			\n\t"\
 		"2:				\n\t"\
 		"movq 8(%%edx), %%mm0		\n\t" /* filterCoeff */\
@ -2611,6 +2611,8 @@ i--;
 		const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
 		const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input

+//printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
+// dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize,  c->chrSrcVSubSample);
 		//handle holes (FAST_BILINEAR & weird filters)
 		if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
 		if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;