Move vpx_add_plane from codec to vpx_dsp and dedup.
Change-Id: I12218d8331c0558c0587a66321e3ca46da7e5cc7
This commit is contained in:
parent
021105e3ac
commit
fce3cee8dd
@ -13,6 +13,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "./vp10_rtcd.h"
|
||||
|
||||
@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
|
||||
state->last_noise = a;
|
||||
}
|
||||
|
||||
void vp10_plane_add_noise_c(uint8_t *start, char *noise,
|
||||
char blackclamp[16],
|
||||
char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
unsigned int width, unsigned int height, int pitch) {
|
||||
unsigned int i, j;
|
||||
|
||||
// TODO(jbb): why does simd code use both but c doesn't, normalize and
|
||||
// fix..
|
||||
(void) bothclamp;
|
||||
for (i = 0; i < height; i++) {
|
||||
uint8_t *pos = start + i * pitch;
|
||||
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
|
||||
|
||||
for (j = 0; j < width; j++) {
|
||||
if (pos[j] < blackclamp[0])
|
||||
pos[j] = blackclamp[0];
|
||||
|
||||
if (pos[j] > 255 + whiteclamp[0])
|
||||
pos[j] = 255 + whiteclamp[0];
|
||||
|
||||
pos[j] += ref[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
|
||||
// Current mip will be the prev_mip for the next frame.
|
||||
MODE_INFO *temp = cm->postproc_state.prev_mip;
|
||||
@ -727,7 +702,7 @@ int vp10_post_proc_frame(struct VP10Common *cm,
|
||||
fillrd(ppstate, 63 - q, noise_level);
|
||||
}
|
||||
|
||||
vp10_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
ppstate->whiteclamp, ppstate->bothclamp,
|
||||
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
|
||||
}
|
||||
|
@ -70,10 +70,6 @@ add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint
|
||||
specialize qw/vp10_post_proc_down_and_across sse2/;
|
||||
$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
|
||||
|
||||
add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp10_plane_add_noise sse2/;
|
||||
$vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
|
||||
|
||||
add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
|
||||
specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
|
||||
|
||||
@ -326,9 +322,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
|
||||
specialize qw/vp10_highbd_post_proc_down_and_across/;
|
||||
|
||||
add_proto qw/void vp10_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp10_highbd_plane_add_noise/;
|
||||
}
|
||||
|
||||
#
|
||||
|
@ -624,68 +624,6 @@ sym(vp10_mbpost_proc_across_ip_xmm):
|
||||
%undef flimit4
|
||||
|
||||
|
||||
;void vp10_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int width, unsigned int height, int pitch)
|
||||
global sym(vp10_plane_add_noise_wmt) PRIVATE
|
||||
sym(vp10_plane_add_noise_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movdqu xmm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb xmm1, [rdx+32] ;bothclamp
|
||||
psubusb xmm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movdqu xmm2,[rdi+rax] ; get the noise for this line
|
||||
paddb xmm1,xmm2 ; add it in
|
||||
movdqu [rsi+rax],xmm1 ; store the result
|
||||
|
||||
add rax,16 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
rd42:
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static const int16_t vp8_rv_msa[] =
|
||||
@ -798,54 +799,3 @@ void vp8_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
|
||||
char blackclamp[16], char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
uint32_t width, uint32_t height,
|
||||
int32_t pitch)
|
||||
{
|
||||
uint32_t i, j;
|
||||
|
||||
for (i = 0; i < height / 2; ++i)
|
||||
{
|
||||
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
|
||||
int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff));
|
||||
uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
|
||||
int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff));
|
||||
for (j = width / 16; j--;)
|
||||
{
|
||||
v16i8 temp00_s, temp01_s;
|
||||
v16u8 temp00, temp01, black_clamp, white_clamp;
|
||||
v16u8 pos0, ref0, pos1, ref1;
|
||||
v16i8 const127 = __msa_ldi_b(127);
|
||||
|
||||
pos0 = LD_UB(pos0_ptr);
|
||||
ref0 = LD_UB(ref0_ptr);
|
||||
pos1 = LD_UB(pos1_ptr);
|
||||
ref1 = LD_UB(ref1_ptr);
|
||||
black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
|
||||
white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
|
||||
temp00 = (pos0 < black_clamp);
|
||||
pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
|
||||
temp01 = (pos1 < black_clamp);
|
||||
pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
|
||||
XORI_B2_128_UB(pos0, pos1);
|
||||
temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
|
||||
temp00 = (v16u8)(temp00_s < pos0);
|
||||
pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
|
||||
temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
|
||||
temp01 = (temp01_s < pos1);
|
||||
pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
|
||||
XORI_B2_128_UB(pos0, pos1);
|
||||
pos0 += ref0;
|
||||
ST_UB(pos0, pos0_ptr);
|
||||
pos1 += ref1;
|
||||
ST_UB(pos1, pos1_ptr);
|
||||
pos0_ptr += 16;
|
||||
pos1_ptr += 16;
|
||||
ref0_ptr += 16;
|
||||
ref1_ptr += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_dsp_rtcd.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_scale_rtcd.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
@ -490,54 +491,6 @@ static void fillrd(struct postproc_state *state, int q, int a)
|
||||
state->last_noise = a;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : plane_add_noise_c
|
||||
*
|
||||
* INPUTS : unsigned char *Start starting address of buffer to add gaussian
|
||||
* noise to
|
||||
* unsigned int Width width of plane
|
||||
* unsigned int Height height of plane
|
||||
* int Pitch distance between subsequent lines of frame
|
||||
* int q quantizer used to determine amount of noise
|
||||
* to add
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void.
|
||||
*
|
||||
* FUNCTION : adds gaussian noise to a plane of pixels
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
|
||||
char blackclamp[16],
|
||||
char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
unsigned int Width, unsigned int Height, int Pitch)
|
||||
{
|
||||
unsigned int i, j;
|
||||
(void)bothclamp;
|
||||
|
||||
for (i = 0; i < Height; i++)
|
||||
{
|
||||
unsigned char *Pos = Start + i * Pitch;
|
||||
char *Ref = (char *)(noise + (rand() & 0xff));
|
||||
|
||||
for (j = 0; j < Width; j++)
|
||||
{
|
||||
if (Pos[j] < blackclamp[0])
|
||||
Pos[j] = blackclamp[0];
|
||||
|
||||
if (Pos[j] > 255 + whiteclamp[0])
|
||||
Pos[j] = 255 + whiteclamp[0];
|
||||
|
||||
Pos[j] += Ref[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Blend the macro block with a solid colored square. Leave the
|
||||
* edges unblended to give distinction to macro blocks in areas
|
||||
* filled with the same color block.
|
||||
@ -828,7 +781,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
fillrd(&oci->postproc_state, 63 - q, noise_level);
|
||||
}
|
||||
|
||||
vp8_plane_add_noise
|
||||
vpx_plane_add_noise
|
||||
(oci->post_proc_buffer.y_buffer,
|
||||
oci->postproc_state.noise,
|
||||
oci->postproc_state.blackclamp,
|
||||
|
@ -167,10 +167,6 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") {
|
||||
add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
|
||||
specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/;
|
||||
|
||||
add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
|
||||
specialize qw/vp8_plane_add_noise mmx sse2 msa/;
|
||||
$vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
|
||||
|
||||
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
|
||||
# no asm yet
|
||||
|
||||
|
@ -241,68 +241,6 @@ sym(vp8_mbpost_proc_down_mmx):
|
||||
%undef flimit2
|
||||
|
||||
|
||||
;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int Width, unsigned int Height, int Pitch)
|
||||
global sym(vp8_plane_add_noise_mmx) PRIVATE
|
||||
sym(vp8_plane_add_noise_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movq mm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb mm1, [rdx+32] ;bothclamp
|
||||
psubusb mm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movq mm2,[rdi+rax] ; get the noise for this line
|
||||
paddb mm1,mm2 ; add it in
|
||||
movq [rsi+rax],mm1 ; store the result
|
||||
|
||||
add rax,8 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
Blur:
|
||||
|
@ -655,68 +655,6 @@ sym(vp8_mbpost_proc_across_ip_xmm):
|
||||
%undef flimit4
|
||||
|
||||
|
||||
;void vp8_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int Width, unsigned int Height, int Pitch)
|
||||
global sym(vp8_plane_add_noise_wmt) PRIVATE
|
||||
sym(vp8_plane_add_noise_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movdqu xmm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb xmm1, [rdx+32] ;bothclamp
|
||||
psubusb xmm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movdqu xmm2,[rdi+rax] ; get the noise for this line
|
||||
paddb xmm1,xmm2 ; add it in
|
||||
movdqu [rsi+rax],xmm1 ; store the result
|
||||
|
||||
add rax,16 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
four8s:
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
|
||||
state->last_noise = a;
|
||||
}
|
||||
|
||||
void vp9_plane_add_noise_c(uint8_t *start, char *noise,
|
||||
char blackclamp[16],
|
||||
char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
unsigned int width, unsigned int height, int pitch) {
|
||||
unsigned int i, j;
|
||||
|
||||
// TODO(jbb): why does simd code use both but c doesn't, normalize and
|
||||
// fix..
|
||||
(void) bothclamp;
|
||||
for (i = 0; i < height; i++) {
|
||||
uint8_t *pos = start + i * pitch;
|
||||
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
|
||||
|
||||
for (j = 0; j < width; j++) {
|
||||
if (pos[j] < blackclamp[0])
|
||||
pos[j] = blackclamp[0];
|
||||
|
||||
if (pos[j] > 255 + whiteclamp[0])
|
||||
pos[j] = 255 + whiteclamp[0];
|
||||
|
||||
pos[j] += ref[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
|
||||
// Current mip will be the prev_mip for the next frame.
|
||||
MODE_INFO *temp = cm->postproc_state.prev_mip;
|
||||
@ -726,8 +701,7 @@ int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
ppstate->last_noise != noise_level) {
|
||||
fillrd(ppstate, 63 - q, noise_level);
|
||||
}
|
||||
|
||||
vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
ppstate->whiteclamp, ppstate->bothclamp,
|
||||
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
|
||||
}
|
||||
|
@ -70,10 +70,6 @@ add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8
|
||||
specialize qw/vp9_post_proc_down_and_across sse2/;
|
||||
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
|
||||
|
||||
add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp9_plane_add_noise sse2/;
|
||||
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
|
||||
|
||||
add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
|
||||
specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
|
||||
|
||||
@ -169,9 +165,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
|
||||
specialize qw/vp9_highbd_post_proc_down_and_across/;
|
||||
|
||||
add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp9_highbd_plane_add_noise/;
|
||||
}
|
||||
|
||||
#
|
||||
|
@ -624,68 +624,6 @@ sym(vp9_mbpost_proc_across_ip_xmm):
|
||||
%undef flimit4
|
||||
|
||||
|
||||
;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int width, unsigned int height, int pitch)
|
||||
global sym(vp9_plane_add_noise_wmt) PRIVATE
|
||||
sym(vp9_plane_add_noise_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movdqu xmm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb xmm1, [rdx+32] ;bothclamp
|
||||
psubusb xmm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movdqu xmm2,[rdi+rax] ; get the noise for this line
|
||||
paddb xmm1,xmm2 ; add it in
|
||||
movdqu [rsi+rax],xmm1 ; store the result
|
||||
|
||||
add rax,16 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
rd42:
|
||||
|
59
vpx_dsp/mips/postproc_msa.c
Normal file
59
vpx_dsp/mips/postproc_msa.c
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./macros_msa.h"
|
||||
|
||||
void vpx_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
|
||||
char blackclamp[16], char whiteclamp[16],
|
||||
char bothclamp[16], uint32_t width,
|
||||
uint32_t height, int32_t pitch) {
|
||||
uint32_t i, j;
|
||||
|
||||
for (i = 0; i < height / 2; ++i) {
|
||||
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
|
||||
int8_t *ref0_ptr = (int8_t *)(noise + (rand() & 0xff));
|
||||
uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
|
||||
int8_t *ref1_ptr = (int8_t *)(noise + (rand() & 0xff));
|
||||
for (j = width / 16; j--;) {
|
||||
v16i8 temp00_s, temp01_s;
|
||||
v16u8 temp00, temp01, black_clamp, white_clamp;
|
||||
v16u8 pos0, ref0, pos1, ref1;
|
||||
v16i8 const127 = __msa_ldi_b(127);
|
||||
|
||||
pos0 = LD_UB(pos0_ptr);
|
||||
ref0 = LD_UB(ref0_ptr);
|
||||
pos1 = LD_UB(pos1_ptr);
|
||||
ref1 = LD_UB(ref1_ptr);
|
||||
black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
|
||||
white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
|
||||
temp00 = (pos0 < black_clamp);
|
||||
pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
|
||||
temp01 = (pos1 < black_clamp);
|
||||
pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
|
||||
XORI_B2_128_UB(pos0, pos1);
|
||||
temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
|
||||
temp00 = (v16u8)(temp00_s < pos0);
|
||||
pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
|
||||
temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
|
||||
temp01 = (temp01_s < pos1);
|
||||
pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
|
||||
XORI_B2_128_UB(pos0, pos1);
|
||||
pos0 += ref0;
|
||||
ST_UB(pos0, pos0_ptr);
|
||||
pos1 += ref1;
|
||||
ST_UB(pos1, pos1_ptr);
|
||||
pos0_ptr += 16;
|
||||
pos1_ptr += 16;
|
||||
ref0_ptr += 16;
|
||||
ref1_ptr += 16;
|
||||
}
|
||||
}
|
||||
}
|
43
vpx_dsp/postproc.c
Normal file
43
vpx_dsp/postproc.c
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vpx_plane_add_noise_c(uint8_t *start, char *noise,
|
||||
char blackclamp[16],
|
||||
char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
unsigned int width, unsigned int height, int pitch) {
|
||||
unsigned int i, j;
|
||||
|
||||
// TODO(jbb): why does simd code use both but c doesn't, normalize and
|
||||
// fix..
|
||||
(void) bothclamp;
|
||||
for (i = 0; i < height; i++) {
|
||||
uint8_t *pos = start + i * pitch;
|
||||
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
|
||||
|
||||
for (j = 0; j < width; j++) {
|
||||
if (pos[j] < blackclamp[0])
|
||||
pos[j] = blackclamp[0];
|
||||
|
||||
if (pos[j] > 255 + whiteclamp[0])
|
||||
pos[j] = 255 + whiteclamp[0];
|
||||
|
||||
pos[j] += ref[j];
|
||||
}
|
||||
}
|
||||
}
|
@ -53,6 +53,13 @@ DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm
|
||||
endif # CONFIG_USE_X86INC
|
||||
endif # CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
ifneq ($(filter yes,$(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),)
|
||||
DSP_SRCS-yes += postproc.c
|
||||
DSP_SRCS-$(HAVE_MSA) += mips/postproc_msa.c
|
||||
DSP_SRCS-$(HAVE_MMX) += x86/postproc_mmx.asm
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/postproc_sse2.asm
|
||||
endif # CONFIG_POSTPROC
|
||||
|
||||
DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM)
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/intrapred_neon.c
|
||||
DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c
|
||||
|
@ -1907,6 +1907,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
|
||||
|
||||
} # CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#
|
||||
# Post Processing
|
||||
#
|
||||
if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
|
||||
add_proto qw/void vpx_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vpx_plane_add_noise mmx sse2 msa/;
|
||||
}
|
||||
|
||||
} # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
|
||||
|
||||
1;
|
||||
|
84
vpx_dsp/x86/postproc_mmx.asm
Normal file
84
vpx_dsp/x86/postproc_mmx.asm
Normal file
@ -0,0 +1,84 @@
|
||||
;
|
||||
; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vpx_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int Width, unsigned int Height, int Pitch)
|
||||
global sym(vpx_plane_add_noise_mmx) PRIVATE
|
||||
sym(vpx_plane_add_noise_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movq mm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb mm1, [rdx+32] ;bothclamp
|
||||
psubusb mm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movq mm2,[rdi+rax] ; get the noise for this line
|
||||
paddb mm1,mm2 ; add it in
|
||||
movq [rsi+rax],mm1 ; store the result
|
||||
|
||||
add rax,8 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
Blur:
|
||||
times 16 dw 16
|
||||
times 8 dw 64
|
||||
times 16 dw 16
|
||||
times 8 dw 0
|
||||
|
||||
rd:
|
||||
times 4 dw 0x40
|
82
vpx_dsp/x86/postproc_sse2.asm
Normal file
82
vpx_dsp/x86/postproc_sse2.asm
Normal file
@ -0,0 +1,82 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vpx_plane_add_noise_sse2(unsigned char *start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int width, unsigned int height,
|
||||
; int pitch)
|
||||
global sym(vpx_plane_add_noise_sse2) PRIVATE
|
||||
sym(vpx_plane_add_noise_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movdqu xmm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb xmm1, [rdx+32] ;bothclamp
|
||||
psubusb xmm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movdqu xmm2,[rdi+rax] ; get the noise for this line
|
||||
paddb xmm1,xmm2 ; add it in
|
||||
movdqu [rsi+rax],xmm1 ; store the result
|
||||
|
||||
add rax,16 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
rd42:
|
||||
times 8 dw 0x04
|
||||
four8s:
|
||||
times 4 dd 8
|
Loading…
x
Reference in New Issue
Block a user