dnxhdenc: Optimize get_pixels_8x4_sym for 10-bit
This reverts commit 628e6d0164
and uses
a better fix.
Before:
4483 decicycles in get_pixels_8x4_sym, 131032 runs, 40 skips
After:
2569 decicycles in get_pixels_8x4_sym, 131054 runs, 18 skips
This commit is contained in:
parent
00ae5b401b
commit
bc22cd244e
@ -87,22 +87,14 @@ void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block,
|
||||
const uint8_t *pixels,
|
||||
ptrdiff_t line_size)
|
||||
{
|
||||
int i;
|
||||
const uint16_t* pixels16 = (const uint16_t*)pixels;
|
||||
line_size >>= 1;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
block[0] = pixels16[0]; block[1] = pixels16[1];
|
||||
block[2] = pixels16[2]; block[3] = pixels16[3];
|
||||
block[4] = pixels16[4]; block[5] = pixels16[5];
|
||||
block[6] = pixels16[6]; block[7] = pixels16[7];
|
||||
pixels16 += line_size;
|
||||
block += 8;
|
||||
}
|
||||
memcpy(block, block - 8, sizeof(*block) * 8);
|
||||
memcpy(block + 8, block - 16, sizeof(*block) * 8);
|
||||
memcpy(block + 16, block - 24, sizeof(*block) * 8);
|
||||
memcpy(block + 24, block - 32, sizeof(*block) * 8);
|
||||
memcpy(block + 0 * 8, pixels + 0 * line_size, 8 * sizeof(*block));
|
||||
memcpy(block + 7 * 8, pixels + 0 * line_size, 8 * sizeof(*block));
|
||||
memcpy(block + 1 * 8, pixels + 1 * line_size, 8 * sizeof(*block));
|
||||
memcpy(block + 6 * 8, pixels + 1 * line_size, 8 * sizeof(*block));
|
||||
memcpy(block + 2 * 8, pixels + 2 * line_size, 8 * sizeof(*block));
|
||||
memcpy(block + 5 * 8, pixels + 2 * line_size, 8 * sizeof(*block));
|
||||
memcpy(block + 3 * 8, pixels + 3 * line_size, 8 * sizeof(*block));
|
||||
memcpy(block + 4 * 8, pixels + 3 * line_size, 8 * sizeof(*block));
|
||||
}
|
||||
|
||||
static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
|
||||
|
Loading…
Reference in New Issue
Block a user