dsp/mips: cosmetics: add whitespace around XSTR macro

normalizes formatting after:
eebaf97 dsp/mips: add whitespace around stringizing operator

Change-Id: I1e3986b6d08195d79072747eb99d7e0549aece72
This commit is contained in:
James Zern 2015-09-03 23:09:13 -07:00
parent eebaf97f5a
commit 29377d55b6
5 changed files with 218 additions and 218 deletions

View File

@ -391,7 +391,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
"sra %[temp7], %[temp7], 3 \n\t"
"sra %[temp4], %[temp4], 3 \n\t"
"addiu %[temp6], $zero, 255 \n\t"
"lbu %[temp1], 0+0*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t"
"addu %[temp1], %[temp1], %[temp5] \n\t"
"sra %[temp5], %[temp1], 8 \n\t"
"sra %[temp18], %[temp1], 31 \n\t"
@ -399,8 +399,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
"xor %[temp1], %[temp1], %[temp1] \n\t"
"movz %[temp1], %[temp6], %[temp18] \n\t"
"1: \n\t"
"lbu %[temp18], 1+0*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp1], 0+0*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t"
"addu %[temp18], %[temp18], %[temp11] \n\t"
"sra %[temp11], %[temp18], 8 \n\t"
"sra %[temp1], %[temp18], 31 \n\t"
@ -408,8 +408,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
"xor %[temp18], %[temp18], %[temp18] \n\t"
"movz %[temp18], %[temp6], %[temp1] \n\t"
"2: \n\t"
"lbu %[temp1], 2+0*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp18], 1+0*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t"
"addu %[temp1], %[temp1], %[temp8] \n\t"
"sra %[temp8], %[temp1], 8 \n\t"
"sra %[temp18], %[temp1], 31 \n\t"
@ -417,8 +417,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
"xor %[temp1], %[temp1], %[temp1] \n\t"
"movz %[temp1], %[temp6], %[temp18] \n\t"
"3: \n\t"
"lbu %[temp18], 3+0*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp1], 2+0*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t"
"addu %[temp18], %[temp18], %[temp16] \n\t"
"sra %[temp16], %[temp18], 8 \n\t"
"sra %[temp1], %[temp18], 31 \n\t"
@ -426,11 +426,11 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
"xor %[temp18], %[temp18], %[temp18] \n\t"
"movz %[temp18], %[temp6], %[temp1] \n\t"
"4: \n\t"
"sb %[temp18], 3+0*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp5], 0+1*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp8], 1+1*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp11], 2+1*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp16], 3+1*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t"
"addu %[temp5], %[temp5], %[temp17] \n\t"
"addu %[temp8], %[temp8], %[temp15] \n\t"
"addu %[temp11], %[temp11], %[temp12] \n\t"
@ -459,14 +459,14 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
"xor %[temp16], %[temp16], %[temp16] \n\t"
"movz %[temp16], %[temp6], %[temp15] \n\t"
"8: \n\t"
"sb %[temp5], 0+1*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp8], 1+1*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp11], 2+1*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp16], 3+1*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp5], 0+2*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp8], 1+2*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp11], 2+2*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp16], 3+2*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t"
"addu %[temp5], %[temp5], %[temp9] \n\t"
"addu %[temp8], %[temp8], %[temp3] \n\t"
"addu %[temp11], %[temp11], %[temp0] \n\t"
@ -495,14 +495,14 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
"xor %[temp16], %[temp16], %[temp16] \n\t"
"movz %[temp16], %[temp6], %[temp3] \n\t"
"12: \n\t"
"sb %[temp5], 0+2*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp8], 1+2*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp11], 2+2*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp16], 3+2*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp5], 0+3*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp8], 1+3*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp11], 2+3*"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp16], 3+3*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t"
"lbu %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t"
"addu %[temp5], %[temp5], %[temp13] \n\t"
"addu %[temp8], %[temp8], %[temp7] \n\t"
"addu %[temp11], %[temp11], %[temp4] \n\t"
@ -531,10 +531,10 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
"xor %[temp16], %[temp16], %[temp16] \n\t"
"movz %[temp16], %[temp6], %[temp3] \n\t"
"16: \n\t"
"sb %[temp5], 0+3*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp8], 1+3*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp11], 2+3*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp16], 3+3*"XSTR(BPS)"(%[dst]) \n\t"
"sb %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t"
"sb %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),

View File

@ -548,10 +548,10 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
// TEMP3 = SRC[D + D1 * BPS]
#define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3, \
A, A1, B, B1, C, C1, D, D1, SRC) \
"lbu %[" #TEMP0 "], " #A "+" #A1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
"lbu %[" #TEMP1 "], " #B "+" #B1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
"lbu %[" #TEMP2 "], " #C "+" #C1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
"lbu %[" #TEMP3 "], " #D "+" #D1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
"lbu %[" #TEMP0 "], " #A "+" #A1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \
"lbu %[" #TEMP1 "], " #B "+" #B1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \
"lbu %[" #TEMP2 "], " #C "+" #C1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \
"lbu %[" #TEMP3 "], " #D "+" #D1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
int i;
@ -623,8 +623,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
// DST[A * BPS] = TEMP0
// DST[B + C * BPS] = TEMP1
#define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST) \
"usw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #DST "]) \n\t" \
"usw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #DST "]) \n\t"
"usw %[" #TEMP0 "], " #A "*" XSTR(BPS) "(%[" #DST "]) \n\t" \
"usw %[" #TEMP1 "], " #B "+" #C "*" XSTR(BPS) "(%[" #DST "]) \n\t"
static void VE4(uint8_t* dst) { // vertical
const uint8_t* top = dst - BPS;
@ -659,7 +659,7 @@ static void VE4(uint8_t* dst) { // vertical
static void DC4(uint8_t* dst) { // DC
int temp0, temp1, temp2, temp3, temp4;
__asm__ volatile (
"ulw %[temp0], -1*"XSTR(BPS)"(%[dst]) \n\t"
"ulw %[temp0], -1*" XSTR(BPS) "(%[dst]) \n\t"
LOAD_4_BYTES(temp1, temp2, temp3, temp4, -1, 0, -1, 1, -1, 2, -1, 3, dst)
"ins %[temp1], %[temp2], 8, 8 \n\t"
"ins %[temp1], %[temp3], 16, 8 \n\t"
@ -683,7 +683,7 @@ static void RD4(uint8_t* dst) { // Down-right
int temp5, temp6, temp7, temp8;
__asm__ volatile (
LOAD_4_BYTES(temp0, temp1, temp2, temp3, -1, 0, -1, 1, -1, 2, -1, 3, dst)
"ulw %[temp7], -1-"XSTR(BPS)"(%[dst]) \n\t"
"ulw %[temp7], -1-" XSTR(BPS) "(%[dst]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"preceu.ph.qbr %[temp5], %[temp7] \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
@ -702,7 +702,7 @@ static void RD4(uint8_t* dst) { // Down-right
"shll.ph %[temp0], %[temp0], 1 \n\t"
"shra_r.ph %[temp1], %[temp1], 2 \n\t"
"addq.ph %[temp8], %[temp0], %[temp8] \n\t"
"lbu %[temp5], 3-"XSTR(BPS)"(%[dst]) \n\t"
"lbu %[temp5], 3-" XSTR(BPS) "(%[dst]) \n\t"
"precrq.ph.w %[temp7], %[temp7], %[temp7] \n\t"
"shra_r.ph %[temp8], %[temp8], 2 \n\t"
"ins %[temp7], %[temp5], 0, 8 \n\t"
@ -725,8 +725,8 @@ static void RD4(uint8_t* dst) { // Down-right
// TEMP0 = SRC[A * BPS]
// TEMP1 = SRC[B + C * BPS]
#define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC) \
"ulw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
"ulw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #SRC "]) \n\t"
"ulw %[" #TEMP0 "], " #A "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \
"ulw %[" #TEMP1 "], " #B "+" #C "*" XSTR(BPS) "(%[" #SRC "]) \n\t"
static void LD4(uint8_t* dst) { // Down-Left
int temp0, temp1, temp2, temp3, temp4;

View File

@ -81,10 +81,10 @@ static const int kC2 = 35468;
"sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \
"sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \
"sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \
"lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
"lbu %[temp17], 1+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
"lbu %[temp18], 2+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
"lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
"lbu %[temp16], 0+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \
"lbu %[temp17], 1+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \
"lbu %[temp18], 2+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \
"lbu %[temp19], 3+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \
"addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \
"addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \
"addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \
@ -107,10 +107,10 @@ static const int kC2 = 35468;
"lw %[temp16], 8(%[args]) \n\t" \
"movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \
"movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \
"sb %[" #TEMP0 "], 0+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
"sb %[" #TEMP4 "], 1+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
"sb %[" #TEMP8 "], 2+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
"sb %[" #TEMP12 "], 3+"XSTR(BPS)"*" #A "(%[temp16]) \n\t"
"sb %[" #TEMP0 "], 0+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \
"sb %[" #TEMP4 "], 1+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \
"sb %[" #TEMP8 "], 2+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
// Does one or two inverse transforms.
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
@ -254,14 +254,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
// E..H - offsets in bytes to store first results to tmp buffer
// E1..H1 - offsets in bytes to store second results to tmp buffer
#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \
"lbu %[temp0], 0+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
"lbu %[temp1], 1+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
"lbu %[temp2], 2+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
"lbu %[temp3], 3+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
"lbu %[temp4], 0+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
"lbu %[temp5], 1+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
"lbu %[temp6], 2+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
"lbu %[temp7], 3+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
"lbu %[temp0], 0+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
"lbu %[temp1], 1+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
"lbu %[temp2], 2+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
"lbu %[temp3], 3+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
"lbu %[temp4], 0+" XSTR(BPS) "*" #A "(%[b]) \n\t" \
"lbu %[temp5], 1+" XSTR(BPS) "*" #A "(%[b]) \n\t" \
"lbu %[temp6], 2+" XSTR(BPS) "*" #A "(%[b]) \n\t" \
"lbu %[temp7], 3+" XSTR(BPS) "*" #A "(%[b]) \n\t" \
"addu %[temp8], %[temp0], %[temp2] \n\t" \
"subu %[temp0], %[temp0], %[temp2] \n\t" \
"addu %[temp2], %[temp1], %[temp3] \n\t" \
@ -415,16 +415,16 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
"lw %[" #TEMP1 "], 0(%[args]) \n\t" \
"lw %[" #TEMP2 "], 4(%[args]) \n\t" \
"lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp17], 0+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
"lbu %[temp18], 1+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp19], 1+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
"lbu %[temp16], 0+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp17], 0+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \
"lbu %[temp18], 1+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp19], 1+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \
"subu %[temp20], %[temp16], %[temp17] \n\t" \
"lbu %[temp16], 2+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp17], 2+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
"lbu %[temp16], 2+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp17], 2+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \
"subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \
"lbu %[temp18], 3+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
"lbu %[temp18], 3+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp19], 3+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \
"subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \
"subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \
"addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \

View File

@ -79,8 +79,8 @@ static const int kC2 = 35468;
#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
"lw %[" #TEMP0 "], 0(%[args]) \n\t" \
"lw %[" #TEMP1 "], 4(%[args]) \n\t" \
"lw %[" #TEMP2 "], "XSTR(BPS)"*" #A "(%[" #TEMP0 "]) \n\t" \
"lw %[" #TEMP3 "], "XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
"lw %[" #TEMP2 "], " XSTR(BPS) "*" #A "(%[" #TEMP0 "]) \n\t" \
"lw %[" #TEMP3 "], " XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
"preceu.ph.qbl %[" #TEMP0 "], %[" #TEMP2 "] \n\t" \
"preceu.ph.qbl %[" #TEMP1 "], %[" #TEMP3 "] \n\t" \
"preceu.ph.qbr %[" #TEMP2 "], %[" #TEMP2 "] \n\t" \
@ -329,11 +329,11 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
// Intra predictions
#define FILL_PART(J, SIZE) \
"usw %[value], 0+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
"usw %[value], 4+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
"usw %[value], 0+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \
"usw %[value], 4+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \
".if " #SIZE " == 16 \n\t" \
"usw %[value], 8+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
"usw %[value], 12+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
"usw %[value], 8+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \
"usw %[value], 12+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \
".endif \n\t"
#define FILL_8_OR_16(DST, VALUE, SIZE) do { \
@ -597,10 +597,10 @@ static void DC4(uint8_t* dst, const uint8_t* top) {
"addiu %[temp0], %[temp0], 4 \n\t"
"srl %[temp0], %[temp0], 3 \n\t"
"replv.qb %[temp0], %[temp0] \n\t"
"usw %[temp0], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp0], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp0], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp0], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp0], 0*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp0], 1*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp0], 2*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp0], 3*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
: [top]"r"(top), [dst]"r"(dst)
: "memory"
@ -650,10 +650,10 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
"shll_s.ph %[temp5], %[temp5], 7 \n\t"
"precrqu_s.qb.ph %[temp2], %[temp3], %[temp2] \n\t"
"precrqu_s.qb.ph %[temp3], %[temp4], %[temp5] \n\t"
"usw %[temp1], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp0], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp3], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp2], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp1], 0*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp0], 1*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp3], 2*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp2], 3*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[a10]"=&r"(a10), [a32]"=&r"(a32)
@ -681,10 +681,10 @@ static void VE4(uint8_t* dst, const uint8_t* top) {
"shra_r.ph %[temp2], %[temp2], 2 \n\t"
"shra_r.ph %[temp6], %[temp6], 2 \n\t"
"precr.qb.ph %[temp4], %[temp6], %[temp2] \n\t"
"usw %[temp4], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp4], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp4], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp4], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp4], 0*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp4], 1*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp4], 2*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp4], 3*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6)
@ -717,10 +717,10 @@ static void HE4(uint8_t* dst, const uint8_t* top) {
"srl %[temp2], %[temp2], 16 \n\t"
"replv.qb %[temp3], %[temp3] \n\t"
"replv.qb %[temp2], %[temp2] \n\t"
"usw %[temp3], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp0], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp2], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp1], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp3], 0*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp0], 1*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp2], 2*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp1], 3*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6)
@ -763,12 +763,12 @@ static void RD4(uint8_t* dst, const uint8_t* top) {
"precr.qb.ph %[temp9], %[temp10], %[temp9] \n\t"
"shra_r.w %[temp0], %[temp0], 2 \n\t"
"precr.qb.ph %[temp10], %[temp11], %[temp10] \n\t"
"usw %[temp9], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp10], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp9], 3*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp10], 1*" XSTR(BPS) "(%[dst]) \n\t"
"prepend %[temp9], %[temp11], 8 \n\t"
"prepend %[temp10], %[temp0], 8 \n\t"
"usw %[temp9], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp10], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp9], 2*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp10], 0*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
@ -812,13 +812,13 @@ static void VR4(uint8_t* dst, const uint8_t* top) {
"append %[temp3], %[temp1], 16 \n\t"
"precr.qb.ph %[temp8], %[temp8], %[temp4] \n\t"
"precr.qb.ph %[temp3], %[temp2], %[temp3] \n\t"
"usw %[temp8], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp3], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp8], 0*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp3], 1*" XSTR(BPS) "(%[dst]) \n\t"
"append %[temp3], %[temp6], 8 \n\t"
"srl %[temp6], %[temp6], 16 \n\t"
"append %[temp8], %[temp6], 8 \n\t"
"usw %[temp3], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp8], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp3], 3*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp8], 2*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
@ -860,12 +860,12 @@ static void LD4(uint8_t* dst, const uint8_t* top) {
"precr.qb.ph %[temp10], %[temp11], %[temp10] \n\t"
"addu %[temp1], %[temp1], %[temp5] \n\t"
"shra_r.w %[temp1], %[temp1], 2 \n\t"
"usw %[temp9], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp10], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp9], 0*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp10], 2*" XSTR(BPS) "(%[dst]) \n\t"
"prepend %[temp9], %[temp11], 8 \n\t"
"prepend %[temp10], %[temp1], 8 \n\t"
"usw %[temp9], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp10], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp9], 1*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp10], 3*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
@ -908,13 +908,13 @@ static void VL4(uint8_t* dst, const uint8_t* top) {
"append %[temp2], %[temp0], 16 \n\t"
"precr.qb.ph %[temp8], %[temp8], %[temp5] \n\t"
"precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"
"usw %[temp8], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp8], 0*" XSTR(BPS) "(%[dst]) \n\t"
"prepend %[temp8], %[temp6], 8 \n\t"
"usw %[temp3], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp3], 1*" XSTR(BPS) "(%[dst]) \n\t"
"srl %[temp6], %[temp6], 16 \n\t"
"prepend %[temp3], %[temp6], 8 \n\t"
"usw %[temp8], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp3], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp8], 2*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp3], 3*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
@ -955,14 +955,14 @@ static void HD4(uint8_t* dst, const uint8_t* top) {
"precrq.ph.w %[temp3], %[temp0], %[temp4] \n\t"
"precr.qb.ph %[temp7], %[temp6], %[temp1] \n\t"
"precr.qb.ph %[temp6], %[temp1], %[temp3] \n\t"
"usw %[temp7], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp6], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp7], 0*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp6], 1*" XSTR(BPS) "(%[dst]) \n\t"
"append %[temp2], %[temp5], 16 \n\t"
"append %[temp0], %[temp4], 16 \n\t"
"precr.qb.ph %[temp5], %[temp3], %[temp2] \n\t"
"precr.qb.ph %[temp4], %[temp2], %[temp0] \n\t"
"usw %[temp5], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp4], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp5], 2*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp4], 3*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
@ -994,12 +994,12 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
"precrq.ph.w %[temp2], %[temp6], %[temp4] \n\t"
"append %[temp0], %[temp5], 16 \n\t"
"precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"
"usw %[temp3], 0*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp3], 0*" XSTR(BPS) "(%[dst]) \n\t"
"precr.qb.ph %[temp1], %[temp7], %[temp0] \n\t"
"usw %[temp7], 3*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp7], 3*" XSTR(BPS) "(%[dst]) \n\t"
"packrl.ph %[temp2], %[temp1], %[temp3] \n\t"
"usw %[temp1], 2*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp2], 1*"XSTR(BPS)"(%[dst]) \n\t"
"usw %[temp1], 2*" XSTR(BPS) "(%[dst]) \n\t"
"usw %[temp2], 1*" XSTR(BPS) "(%[dst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7)

View File

@ -40,10 +40,10 @@
// I1..I9 - offsets in bytes
#define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, \
I0, I1, I2, I3, I4, I5, I6, I7, I8, I9) \
"ulw %[" #O0 "], " #I1 "+"XSTR(I9)"*" #I5 "(%[" #I0 "]) \n\t" \
"ulw %[" #O1 "], " #I2 "+"XSTR(I9)"*" #I6 "(%[" #I0 "]) \n\t" \
"ulw %[" #O2 "], " #I3 "+"XSTR(I9)"*" #I7 "(%[" #I0 "]) \n\t" \
"ulw %[" #O3 "], " #I4 "+"XSTR(I9)"*" #I8 "(%[" #I0 "]) \n\t"
"ulw %[" #O0 "], " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0 "]) \n\t" \
"ulw %[" #O1 "], " #I2 "+" XSTR(I9) "*" #I6 "(%[" #I0 "]) \n\t" \
"ulw %[" #O2 "], " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "]) \n\t" \
"ulw %[" #O3 "], " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "]) \n\t"
// O - output
// IO - input/output
@ -180,10 +180,10 @@
"precrqu_s.qb.ph %[" #IO2 "], %[" #IO3 "], %[" #IO2 "] \n\t" \
"precrqu_s.qb.ph %[" #IO4 "], %[" #IO5 "], %[" #IO4 "] \n\t" \
"precrqu_s.qb.ph %[" #IO6 "], %[" #IO7 "], %[" #IO6 "] \n\t" \
"usw %[" #IO0 "], "XSTR(I13)"*" #I9 "(%[" #I8 "]) \n\t" \
"usw %[" #IO2 "], "XSTR(I13)"*" #I10 "(%[" #I8 "]) \n\t" \
"usw %[" #IO4 "], "XSTR(I13)"*" #I11 "(%[" #I8 "]) \n\t" \
"usw %[" #IO6 "], "XSTR(I13)"*" #I12 "(%[" #I8 "]) \n\t"
"usw %[" #IO0 "], " XSTR(I13) "*" #I9 "(%[" #I8 "]) \n\t" \
"usw %[" #IO2 "], " XSTR(I13) "*" #I10 "(%[" #I8 "]) \n\t" \
"usw %[" #IO4 "], " XSTR(I13) "*" #I11 "(%[" #I8 "]) \n\t" \
"usw %[" #IO6 "], " XSTR(I13) "*" #I12 "(%[" #I8 "]) \n\t"
#define OUTPUT_EARLY_CLOBBER_REGS_10() \
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \