Snow mmx + sse2 part 2
Patch by Robert Edele, yartrebo <<at>> earthlink <<dot>> net Originally committed as revision 5184 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
1956e16653
commit
561a18d3ba
@ -230,57 +230,57 @@ static const uint8_t obmc16[256]={
|
||||
};
|
||||
#elif 1 // 64*linear
|
||||
static const uint8_t obmc32[1024]={
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
|
||||
0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
|
||||
1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||
1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
|
||||
1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
|
||||
1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
|
||||
1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
|
||||
1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
|
||||
1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
|
||||
1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
|
||||
2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
|
||||
2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
|
||||
2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
|
||||
2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
|
||||
2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
|
||||
2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
|
||||
2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
|
||||
2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
|
||||
1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
|
||||
1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
|
||||
1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
|
||||
1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
|
||||
1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
|
||||
1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
|
||||
1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
|
||||
1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
|
||||
0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
|
||||
0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
|
||||
0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
|
||||
0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
|
||||
0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
|
||||
4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
|
||||
4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
|
||||
4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
|
||||
4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
|
||||
4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
|
||||
4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
|
||||
4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
|
||||
4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
|
||||
8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
|
||||
8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
|
||||
8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
|
||||
8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
|
||||
8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
|
||||
8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
|
||||
8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
|
||||
8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
|
||||
4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
|
||||
4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
|
||||
4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
|
||||
4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
|
||||
4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
|
||||
4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
|
||||
4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
|
||||
4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
|
||||
0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
|
||||
0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
|
||||
0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
|
||||
0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
|
||||
//error:0.000020
|
||||
};
|
||||
static const uint8_t obmc16[256]={
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
|
||||
1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
|
||||
1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
|
||||
2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
|
||||
2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
|
||||
3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
|
||||
3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
|
||||
4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
|
||||
4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
|
||||
3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
|
||||
3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
|
||||
2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
|
||||
2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
|
||||
1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
|
||||
1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
|
||||
0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
|
||||
4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
|
||||
4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
|
||||
8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
|
||||
8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
|
||||
12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
|
||||
12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
|
||||
16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
|
||||
16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
|
||||
12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
|
||||
12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
|
||||
8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
|
||||
8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
|
||||
4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
|
||||
4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
|
||||
0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
|
||||
//error:0.000015
|
||||
};
|
||||
#else //64*cos
|
||||
@ -342,23 +342,23 @@ static const uint8_t obmc16[256]={
|
||||
|
||||
//linear *64
|
||||
static const uint8_t obmc8[64]={
|
||||
1, 3, 5, 7, 7, 5, 3, 1,
|
||||
3, 9,15,21,21,15, 9, 3,
|
||||
5,15,25,35,35,25,15, 5,
|
||||
7,21,35,49,49,35,21, 7,
|
||||
7,21,35,49,49,35,21, 7,
|
||||
5,15,25,35,35,25,15, 5,
|
||||
3, 9,15,21,21,15, 9, 3,
|
||||
1, 3, 5, 7, 7, 5, 3, 1,
|
||||
4, 12, 20, 28, 28, 20, 12, 4,
|
||||
12, 36, 60, 84, 84, 60, 36, 12,
|
||||
20, 60,100,140,140,100, 60, 20,
|
||||
28, 84,140,196,196,140, 84, 28,
|
||||
28, 84,140,196,196,140, 84, 28,
|
||||
20, 60,100,140,140,100, 60, 20,
|
||||
12, 36, 60, 84, 84, 60, 36, 12,
|
||||
4, 12, 20, 28, 28, 20, 12, 4,
|
||||
//error:0.000000
|
||||
};
|
||||
|
||||
//linear *64
|
||||
static const uint8_t obmc4[16]={
|
||||
4,12,12, 4,
|
||||
12,36,36,12,
|
||||
12,36,36,12,
|
||||
4,12,12, 4,
|
||||
16, 48, 48, 16,
|
||||
48,144,144, 48,
|
||||
48,144,144, 48,
|
||||
16, 48, 48, 16,
|
||||
//error:0.000000
|
||||
};
|
||||
|
||||
@ -2962,7 +2962,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
|
||||
}
|
||||
*b= backup;
|
||||
|
||||
return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
|
||||
return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
|
||||
}
|
||||
|
||||
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
|
||||
@ -3022,10 +3022,10 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con
|
||||
const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
|
||||
int sx= block_w*mb_x - block_w/2;
|
||||
int sy= block_w*mb_y - block_w/2;
|
||||
const int x0= FFMAX(0,-sx);
|
||||
const int y0= FFMAX(0,-sy);
|
||||
const int x1= FFMIN(block_w*2, w-sx);
|
||||
const int y1= FFMIN(block_w*2, h-sy);
|
||||
int x0= FFMAX(0,-sx);
|
||||
int y0= FFMAX(0,-sy);
|
||||
int x1= FFMIN(block_w*2, w-sx);
|
||||
int y1= FFMIN(block_w*2, h-sy);
|
||||
int i,x,y;
|
||||
|
||||
pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
|
||||
@ -3043,6 +3043,22 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con
|
||||
}
|
||||
}
|
||||
|
||||
/* copy the regions where obmc[] = (uint8_t)256 */
|
||||
if(LOG2_OBMC_MAX == 8
|
||||
&& (mb_x == 0 || mb_x == b_stride-1)
|
||||
&& (mb_y == 0 || mb_y == b_height-1)){
|
||||
if(mb_x == 0)
|
||||
x1 = block_w;
|
||||
else
|
||||
x0 = block_w;
|
||||
if(mb_y == 0)
|
||||
y1 = block_w;
|
||||
else
|
||||
y0 = block_w;
|
||||
for(y=y0; y<y1; y++)
|
||||
memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
|
||||
}
|
||||
|
||||
//FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
|
||||
if(block_w==16){
|
||||
distortion = 0;
|
||||
|
@ -31,7 +31,7 @@
|
||||
#define LOSSLESS_QLOG -128
|
||||
#define FRAC_BITS 8
|
||||
|
||||
#define LOG2_OBMC_MAX 6
|
||||
#define LOG2_OBMC_MAX 8
|
||||
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
|
||||
|
||||
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
|
||||
|
Loading…
x
Reference in New Issue
Block a user