Added multithreaded implementation for RGB to YUV420p color conversion

This commit is contained in:
Daniil Osokin 2013-02-24 23:17:40 +04:00
parent d8f538d67b
commit cf5e272878

View File

@ -2744,6 +2744,16 @@ const int ITUR_BT_601_CVG = -852492;
const int ITUR_BT_601_CVR = 1673527;
const int ITUR_BT_601_SHIFT = 20;
// Coefficients for RGB to YUV420p conversion
const int ITUR_BT_601_CRY = 269484;
const int ITUR_BT_601_CGY = 528482;
const int ITUR_BT_601_CBY = 102760;
const int ITUR_BT_601_CRU = -155188;
const int ITUR_BT_601_CGU = -305135;
const int ITUR_BT_601_CBU = 460324;
const int ITUR_BT_601_CGV = -385875;
const int ITUR_BT_601_CBV = -74448;
template<int bIdx, int uIdx>
struct YUV420sp2RGB888Invoker
{
@ -3078,54 +3088,80 @@ inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const ucha
///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
template<int bIdx>
struct RGB888toYUV420pInvoker: public ParallelLoopBody
{
RGB888toYUV420pInvoker( const Mat& src, Mat* dst, const int uIdx )
: src_(src),
dst_(dst),
uIdx_(uIdx) { }
void operator()(const Range& rowRange) const
{
const int w = src_.cols;
const int h = src_.rows;
const int cn = src_.channels();
for( int i = rowRange.start; i < rowRange.end; i++ )
{
const uchar* row0 = src_.ptr<uchar>(2 * i);
const uchar* row1 = src_.ptr<uchar>(2 * i + 1);
uchar* y = dst_->ptr<uchar>(2*i);
uchar* u = dst_->ptr<uchar>(h + i/2) + (i % 2) * (w/2);
uchar* v = dst_->ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2);
if( uIdx_ == 2 ) std::swap(u, v);
for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ )
{
int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j];
int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j];
int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j];
int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j];
const int shifted16 = (16 << ITUR_BT_601_SHIFT);
const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16;
int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16;
int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16;
int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16;
y[2*k + 0] = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT);
y[2*k + 1] = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT);
y[2*k + dst_->step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT);
y[2*k + dst_->step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT);
const int shifted128 = (128 << ITUR_BT_601_SHIFT);
int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128;
int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128;
u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT);
v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT);
}
}
}
static bool isFit( const Mat& src )
{
return (src.total() >= 320*240);
}
private:
RGB888toYUV420pInvoker& operator=(const RGB888toYUV420pInvoker&);
const Mat& src_;
Mat* const dst_;
const int uIdx_;
};
template<int bIdx, int uIdx>
static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
{
//const float coeffs[] = { 0.257f, 0.504f, 0.098f,
// -0.148f, -0.291f, 0.439f,
// -0.368f, -0.071f };
const int coeffs[] = { 269484, 528482, 102760,
-155188, -305135, 460324,
-385875, -74448 };
const int w = src.cols;
const int h = src.rows;
const int cn = src.channels();
for( int i = 0; i < h / 2; i++ )
{
const uchar* row0 = src.ptr<uchar>(2*i);
const uchar* row1 = src.ptr<uchar>(2*i + 1);
uchar* y = dst.ptr<uchar>(2*i);
uchar* u = dst.ptr<uchar>(h + i/2) + (i % 2) * (w/2);
uchar* v = dst.ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2);
if( uIdx == 2 ) std::swap(u, v);
for( int j = 0, k = 0; j < w * cn; j += 2*cn, k++ )
{
int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j];
int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j];
int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j];
int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j];
int y00 = coeffs[0]*r00 + coeffs[1]*g00 + coeffs[2]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT);
int y01 = coeffs[0]*r01 + coeffs[1]*g01 + coeffs[2]*b01 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT);
int y10 = coeffs[0]*r10 + coeffs[1]*g10 + coeffs[2]*b10 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT);
int y11 = coeffs[0]*r11 + coeffs[1]*g11 + coeffs[2]*b11 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT);
y[2*k + 0] = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT);
y[2*k + 1] = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT);
y[2*k + dst.step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT);
y[2*k + dst.step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT);
int u00 = coeffs[3]*r00 + coeffs[4]*g00 + coeffs[5]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (128 << ITUR_BT_601_SHIFT);
int v00 = coeffs[5]*r00 + coeffs[6]*g00 + coeffs[7]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (128 << ITUR_BT_601_SHIFT);
u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT);
v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT);
}
}
RGB888toYUV420pInvoker<bIdx> colorConverter(src, &dst, uIdx);
if( RGB888toYUV420pInvoker<bIdx>::isFit(src) )
parallel_for_(Range(0, src.rows/2), colorConverter);
else
colorConverter(Range(0, src.rows/2));
}
///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////