fastdiv patch by (BERO <bero at geocities dot co dot jp>) with fixes & cleanup by me
Originally committed as revision 1879 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
b82cdc7278
commit
d4961b3523
@ -197,6 +197,25 @@ inline void dprintf(const char* fmt,...) {}
|
|||||||
#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
|
#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
|
||||||
#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
|
#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
|
||||||
|
|
||||||
|
extern const uint32_t inverse[256];
|
||||||
|
|
||||||
|
#ifdef ARCH_X86
|
||||||
|
# define FASTDIV(a,b) \
|
||||||
|
({\
|
||||||
|
int ret,dmy;\
|
||||||
|
asm volatile(\
|
||||||
|
"mull %3"\
|
||||||
|
:"=d"(ret),"=a"(dmy)\
|
||||||
|
:"1"(a),"g"(inverse[b])\
|
||||||
|
);\
|
||||||
|
ret;\
|
||||||
|
})
|
||||||
|
#elif defined(CONFIG_FASTDIV)
|
||||||
|
# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a)*inverse[b])>>32))
|
||||||
|
#else
|
||||||
|
# define FASTDIV(a,b) ((a)/(b))
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
// avoid +32 for shift optimization (gcc should do that ...)
|
// avoid +32 for shift optimization (gcc should do that ...)
|
||||||
static inline int32_t NEG_SSR32( int32_t a, int8_t s){
|
static inline int32_t NEG_SSR32( int32_t a, int8_t s){
|
||||||
|
@ -73,8 +73,6 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
|
|||||||
static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
|
static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
|
||||||
static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr);
|
static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr);
|
||||||
|
|
||||||
extern uint32_t inverse[256];
|
|
||||||
|
|
||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
static uint8_t uni_DCtab_lum_len[512];
|
static uint8_t uni_DCtab_lum_len[512];
|
||||||
static uint8_t uni_DCtab_chrom_len[512];
|
static uint8_t uni_DCtab_chrom_len[512];
|
||||||
@ -1823,7 +1821,6 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_
|
|||||||
{
|
{
|
||||||
int a, b, c, wrap, pred, scale;
|
int a, b, c, wrap, pred, scale;
|
||||||
uint16_t *dc_val;
|
uint16_t *dc_val;
|
||||||
int dummy;
|
|
||||||
|
|
||||||
/* find prediction */
|
/* find prediction */
|
||||||
if (n < 4) {
|
if (n < 4) {
|
||||||
@ -1859,16 +1856,7 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_
|
|||||||
*dir_ptr = 0; /* left */
|
*dir_ptr = 0; /* left */
|
||||||
}
|
}
|
||||||
/* we assume pred is positive */
|
/* we assume pred is positive */
|
||||||
#ifdef ARCH_X86
|
pred = FASTDIV((pred + (scale >> 1)), scale);
|
||||||
asm volatile (
|
|
||||||
"xorl %%edx, %%edx \n\t"
|
|
||||||
"mul %%ecx \n\t"
|
|
||||||
: "=d" (pred), "=a"(dummy)
|
|
||||||
: "a" (pred + (scale >> 1)), "c" (inverse[scale])
|
|
||||||
);
|
|
||||||
#else
|
|
||||||
pred = (pred + (scale >> 1)) / scale;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* prepare address for prediction update */
|
/* prepare address for prediction update */
|
||||||
*dc_val_ptr = &dc_val[0];
|
*dc_val_ptr = &dc_val[0];
|
||||||
@ -3668,8 +3656,8 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
|||||||
/* DC coef */
|
/* DC coef */
|
||||||
if(s->partitioned_frame){
|
if(s->partitioned_frame){
|
||||||
level = s->dc_val[0][ s->block_index[n] ];
|
level = s->dc_val[0][ s->block_index[n] ];
|
||||||
if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs
|
if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
|
||||||
else level= (level + (s->c_dc_scale>>1))/s->c_dc_scale;
|
else level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale);
|
||||||
dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32;
|
dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32;
|
||||||
}else{
|
}else{
|
||||||
level = mpeg4_decode_dc(s, n, &dc_pred_dir);
|
level = mpeg4_decode_dc(s, n, &dc_pred_dir);
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
|
|
||||||
extern uint8_t zigzag_direct_noperm[64];
|
extern uint8_t zigzag_direct_noperm[64];
|
||||||
extern uint16_t inv_zigzag_direct16[64];
|
extern uint16_t inv_zigzag_direct16[64];
|
||||||
extern uint32_t inverse[256];
|
|
||||||
|
|
||||||
static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
|
static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
|
||||||
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
|
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
|
||||||
|
@ -78,8 +78,6 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
|
|||||||
static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
|
static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
|
||||||
static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
|
static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
|
||||||
|
|
||||||
extern uint32_t inverse[256];
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
int intra_count = 0;
|
int intra_count = 0;
|
||||||
@ -699,7 +697,7 @@ static int get_dc(uint8_t *src, int stride, int scale)
|
|||||||
sum+=src[x + y*stride];
|
sum+=src[x + y*stride];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (sum + (scale>>1))/scale;
|
return FASTDIV((sum + (scale>>1)), scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* dir = 0: left, dir = 1: top prediction */
|
/* dir = 0: left, dir = 1: top prediction */
|
||||||
@ -763,9 +761,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
|
|||||||
b = (b + (8 >> 1)) / 8;
|
b = (b + (8 >> 1)) / 8;
|
||||||
c = (c + (8 >> 1)) / 8;
|
c = (c + (8 >> 1)) / 8;
|
||||||
} else {
|
} else {
|
||||||
a = (a + (scale >> 1)) / scale;
|
a = FASTDIV((a + (scale >> 1)), scale);
|
||||||
b = (b + (scale >> 1)) / scale;
|
b = FASTDIV((b + (scale >> 1)), scale);
|
||||||
c = (c + (scale >> 1)) / scale;
|
c = FASTDIV((c + (scale >> 1)), scale);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* XXX: WARNING: they did not choose the same test as MPEG4. This
|
/* XXX: WARNING: they did not choose the same test as MPEG4. This
|
||||||
|
Loading…
x
Reference in New Issue
Block a user