209 lines
6.6 KiB
C
209 lines
6.6 KiB
C
/* YUV-> RGB conversion code. (YUV420 to RGB565)
|
|
*
|
|
* Copyright (C) 2008-9 Robin Watts (robin@wss.co.uk) for Pinknoise
|
|
* Productions Ltd.
|
|
*
|
|
* Licensed under the GNU GPL. If you need it under another license, contact
|
|
* me and ask.
|
|
*
|
|
* This program is free software ; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation ; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program ; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*
|
|
*
|
|
* The algorithm used here is based heavily on one created by Sophie Wilson
|
|
* of Acorn/e-14/Broadcomm. Many thanks.
|
|
*
|
|
* Additional tweaks (in the fast fixup code) are from Paul Gardiner.
|
|
*
|
|
* The old implementation of YUV -> RGB did:
|
|
*
|
|
* R = CLAMP((Y-16)*1.164 + 1.596*V)
|
|
* G = CLAMP((Y-16)*1.164 - 0.391*U - 0.813*V)
|
|
* B = CLAMP((Y-16)*1.164 + 2.018*U )
|
|
*
|
|
* We're going to bend that here as follows:
|
|
*
|
|
* R = CLAMP(y + 1.596*V)
|
|
* G = CLAMP(y - 0.383*U - 0.813*V)
|
|
* B = CLAMP(y + 1.976*U )
|
|
*
|
|
* where y = 0 for Y <= 16,
|
|
* y = ( Y-16)*1.164, for 16 < Y <= 239,
|
|
* y = (239-16)*1.164, for 239 < Y
|
|
*
|
|
* i.e. We clamp Y to the 16 to 239 range (which it is supposed to be in
|
|
* anyway). We then pick the B_U factor so that B never exceeds 511. We then
|
|
* shrink the G_U factor in line with that to avoid a colour shift as much as
|
|
* possible.
|
|
*
|
|
* We're going to use tables to do it faster, but rather than doing it using
|
|
* 5 tables as as the above suggests, we're going to do it using just 3.
|
|
*
|
|
* We do this by working in parallel within a 32 bit word, and using one
|
|
* table each for Y U and V.
|
|
*
|
|
* Source Y values are 0 to 255, so 0.. 260 after scaling
|
|
* Source U values are -128 to 127, so -49.. 49(G), -253..251(B) after
|
|
* Source V values are -128 to 127, so -204..203(R), -104..103(G) after
|
|
*
|
|
* So total summed values:
|
|
* -223 <= R <= 481, -173 <= G <= 431, -253 <= B < 511
|
|
*
|
|
* We need to pack R G and B into a 32 bit word, and because of Bs range we
|
|
* need 2 bits above the valid range of B to detect overflow, and another one
|
|
* to detect the sense of the overflow. We therefore adopt the following
|
|
* representation:
|
|
*
|
|
* osGGGGGgggggosBBBBBbbbosRRRRRrrr
|
|
*
|
|
* Each such word breaks down into 3 ranges.
|
|
*
|
|
* osGGGGGggggg osBBBBBbbb osRRRRRrrr
|
|
*
|
|
* Thus we have 8 bits for each B and R table entry, and 10 bits for G (good
|
|
* as G is the most noticable one). The s bit for each represents the sign,
|
|
* and o represents the overflow.
|
|
*
|
|
* For R and B we pack the table by taking the 11 bit representation of their
|
|
* values, and toggling bit 10 in the U and V tables.
|
|
*
|
|
* For the green case we calculate 4*G (thus effectively using 10 bits for the
|
|
* valid range) truncate to 12 bits. We toggle bit 11 in the Y table.
|
|
*/
|
|
|
|
#include "yuv2rgb.h"
|
|
|
|
enum
|
|
{
|
|
FLAGS = 0x40080100
|
|
};
|
|
|
|
#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)])
|
|
#define READY(Y) tables[Y]
|
|
#define FIXUP(Y) \
|
|
do { \
|
|
int tmp = (Y) & FLAGS; \
|
|
if (tmp != 0) \
|
|
{ \
|
|
tmp -= tmp>>8; \
|
|
(Y) |= tmp; \
|
|
tmp = FLAGS & ~(Y>>1); \
|
|
(Y) += tmp>>8; \
|
|
} \
|
|
} while (0 == 1)
|
|
|
|
#define STORE(Y,DSTPTR) \
|
|
do { \
|
|
uint32_t Y2 = (Y); \
|
|
uint8_t *DSTPTR2 = (DSTPTR); \
|
|
(DSTPTR2)[0] = (Y2); \
|
|
(DSTPTR2)[1] = (Y2)>>22; \
|
|
(DSTPTR2)[2] = (Y2)>>11; \
|
|
} while (0 == 1)
|
|
|
|
void yuv420_2_rgb888(uint8_t *dst_ptr,
|
|
const uint8_t *y_ptr,
|
|
const uint8_t *u_ptr,
|
|
const uint8_t *v_ptr,
|
|
int32_t width,
|
|
int32_t height,
|
|
int32_t y_span,
|
|
int32_t uv_span,
|
|
int32_t dst_span,
|
|
const uint32_t *tables,
|
|
int32_t dither)
|
|
{
|
|
height -= 1;
|
|
while (height > 0)
|
|
{
|
|
height -= width<<16;
|
|
height += 1<<16;
|
|
while (height < 0)
|
|
{
|
|
/* Do 2 column pairs */
|
|
uint32_t uv, y0, y1;
|
|
|
|
uv = READUV(*u_ptr++,*v_ptr++);
|
|
y1 = uv + READY(y_ptr[y_span]);
|
|
y0 = uv + READY(*y_ptr++);
|
|
FIXUP(y1);
|
|
FIXUP(y0);
|
|
STORE(y1, &dst_ptr[dst_span]);
|
|
STORE(y0, dst_ptr);
|
|
dst_ptr += 3;
|
|
y1 = uv + READY(y_ptr[y_span]);
|
|
y0 = uv + READY(*y_ptr++);
|
|
FIXUP(y1);
|
|
FIXUP(y0);
|
|
STORE(y1, &dst_ptr[dst_span]);
|
|
STORE(y0, dst_ptr);
|
|
dst_ptr += 3;
|
|
height += (2<<16);
|
|
}
|
|
if ((height>>16) == 0)
|
|
{
|
|
/* Trailing column pair */
|
|
uint32_t uv, y0, y1;
|
|
|
|
uv = READUV(*u_ptr,*v_ptr);
|
|
y1 = uv + READY(y_ptr[y_span]);
|
|
y0 = uv + READY(*y_ptr++);
|
|
FIXUP(y1);
|
|
FIXUP(y0);
|
|
STORE(y0, &dst_ptr[dst_span]);
|
|
STORE(y1, dst_ptr);
|
|
dst_ptr += 3;
|
|
}
|
|
dst_ptr += dst_span*2-width*3;
|
|
y_ptr += y_span*2-width;
|
|
u_ptr += uv_span-(width>>1);
|
|
v_ptr += uv_span-(width>>1);
|
|
height = (height<<16)>>16;
|
|
height -= 2;
|
|
}
|
|
if (height == 0)
|
|
{
|
|
/* Trail row */
|
|
height -= width<<16;
|
|
height += 1<<16;
|
|
while (height < 0)
|
|
{
|
|
/* Do a row pair */
|
|
uint32_t uv, y0, y1;
|
|
|
|
uv = READUV(*u_ptr++,*v_ptr++);
|
|
y1 = uv + READY(*y_ptr++);
|
|
y0 = uv + READY(*y_ptr++);
|
|
FIXUP(y1);
|
|
FIXUP(y0);
|
|
STORE(y1, dst_ptr);
|
|
dst_ptr += 3;
|
|
STORE(y0, dst_ptr);
|
|
dst_ptr += 3;
|
|
height += (2<<16);
|
|
}
|
|
if ((height>>16) == 0)
|
|
{
|
|
/* Trailing pix */
|
|
uint32_t uv, y0;
|
|
|
|
uv = READUV(*u_ptr++,*v_ptr++);
|
|
y0 = uv + READY(*y_ptr++);
|
|
FIXUP(y0);
|
|
STORE(y0, dst_ptr);
|
|
dst_ptr += 3;
|
|
}
|
|
}
|
|
}
|