d3d11-nv12 interop

fixed issues with ocl nv12 cvt kernel

finisged ocl nv12-to-rgba kernel, update dx-interop samples. (ocl rgba-to-nv12 kernel will be added later)

an attempt to fix build issue

fix for non opencl build issue

fix typo

fix compilation warnings

fix compile issue for Mac (OpenCL)

add convertion from rgba to nv12 (still need to debug kernel)

remove empty line at the EOF

fixed compilation warning
This commit is contained in:
Vladimir Dudnik
2015-07-29 19:52:03 +03:00
parent c0b544af70
commit d4774ead43
7 changed files with 646 additions and 139 deletions

View File

@@ -135,7 +135,7 @@ public:
if (!m_cap.read(m_frame_bgr))
return -1;
cv::cvtColor(m_frame_bgr, m_frame_rgba, CV_RGB2BGRA);
cv::cvtColor(m_frame_bgr, m_frame_rgba, CV_BGR2RGBA);
UINT subResource = ::D3D10CalcSubresource(0, 0, 1);
@@ -166,6 +166,9 @@ public:
if (m_shutdown)
return 0;
// capture user input once
MODE mode = (m_mode == MODE_GPU_NV12) ? MODE_GPU_RGBA : m_mode;
HRESULT r;
ID3D10Texture2D* pSurface;
@@ -177,7 +180,7 @@ public:
m_timer.start();
switch (m_mode)
switch (mode)
{
case MODE_CPU:
{
@@ -214,7 +217,7 @@ public:
break;
}
case MODE_GPU:
case MODE_GPU_RGBA:
{
// process video frame on GPU
cv::UMat u;
@@ -227,7 +230,7 @@ public:
cv::blur(u, u, cv::Size(15, 15), cv::Point(-7, -7));
}
cv::String strMode = cv::format("mode: %s", m_modeStr[MODE_GPU].c_str());
cv::String strMode = cv::format("mode: %s", m_modeStr[MODE_GPU_RGBA].c_str());
cv::String strProcessing = m_demo_processing ? "blur frame" : "copy frame";
cv::String strTime = cv::format("time: %4.1f msec", m_timer.time(Timer::UNITS::MSEC));
cv::String strDevName = cv::format("OpenCL device: %s", m_oclDevName.c_str());

View File

@@ -57,23 +57,31 @@ public:
scd.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; // allow full-screen switching
r = ::D3D11CreateDeviceAndSwapChain(
NULL,
D3D_DRIVER_TYPE_HARDWARE,
NULL,
0,
NULL,
0,
D3D11_SDK_VERSION,
&scd,
&m_pD3D11SwapChain,
&m_pD3D11Dev,
NULL,
&m_pD3D11Ctx);
NULL,
D3D_DRIVER_TYPE_HARDWARE,
NULL,
0,
NULL,
0,
D3D11_SDK_VERSION,
&scd,
&m_pD3D11SwapChain,
&m_pD3D11Dev,
NULL,
&m_pD3D11Ctx);
if (FAILED(r))
{
throw std::runtime_error("D3D11CreateDeviceAndSwapChain() failed!");
}
m_nv12_available = true;
UINT fmt = 0;
r = m_pD3D11Dev->CheckFormatSupport(DXGI_FORMAT_NV12, &fmt);
if (FAILED(r))
{
m_nv12_available = false;
}
r = m_pD3D11SwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&m_pBackBuffer);
if (FAILED(r))
{
@@ -98,24 +106,70 @@ public:
m_pD3D11Ctx->RSSetViewports(1, &viewport);
D3D11_TEXTURE2D_DESC desc;
m_pSurfaceRGBA = 0;
m_pSurfaceNV12 = 0;
desc.Width = m_width;
desc.Height = m_height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.MiscFlags = 0;
D3D11_TEXTURE2D_DESC desc_rgba;
r = m_pD3D11Dev->CreateTexture2D(&desc, NULL, &m_pSurface);
desc_rgba.Width = m_width;
desc_rgba.Height = m_height;
desc_rgba.MipLevels = 1;
desc_rgba.ArraySize = 1;
desc_rgba.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc_rgba.SampleDesc.Count = 1;
desc_rgba.SampleDesc.Quality = 0;
desc_rgba.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc_rgba.Usage = D3D11_USAGE_DYNAMIC;
desc_rgba.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc_rgba.MiscFlags = 0;
r = m_pD3D11Dev->CreateTexture2D(&desc_rgba, 0, &m_pSurfaceRGBA);
if (FAILED(r))
{
throw std::runtime_error("Can't create texture with input image");
throw std::runtime_error("Can't create DX texture");
}
if(m_nv12_available)
{
D3D11_TEXTURE2D_DESC desc_nv12;
desc_nv12.Width = m_width;
desc_nv12.Height = m_height;
desc_nv12.MipLevels = 1;
desc_nv12.ArraySize = 1;
desc_nv12.Format = DXGI_FORMAT_NV12;
desc_nv12.SampleDesc.Count = 1;
desc_nv12.SampleDesc.Quality = 0;
desc_nv12.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc_nv12.Usage = D3D11_USAGE_DEFAULT;
desc_nv12.CPUAccessFlags = 0;
desc_nv12.MiscFlags = D3D11_RESOURCE_MISC_SHARED;
r = m_pD3D11Dev->CreateTexture2D(&desc_nv12, 0, &m_pSurfaceNV12);
if (FAILED(r))
{
throw std::runtime_error("Can't create DX NV12 texture");
}
D3D11_TEXTURE2D_DESC desc_nv12_cpu_copy;
desc_nv12_cpu_copy.Width = m_width;
desc_nv12_cpu_copy.Height = m_height;
desc_nv12_cpu_copy.MipLevels = 1;
desc_nv12_cpu_copy.ArraySize = 1;
desc_nv12_cpu_copy.Format = DXGI_FORMAT_NV12;
desc_nv12_cpu_copy.SampleDesc.Count = 1;
desc_nv12_cpu_copy.SampleDesc.Quality = 0;
desc_nv12_cpu_copy.BindFlags = 0;
desc_nv12_cpu_copy.Usage = D3D11_USAGE_STAGING;
desc_nv12_cpu_copy.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE | D3D11_CPU_ACCESS_READ;
desc_nv12_cpu_copy.MiscFlags = 0;
r = m_pD3D11Dev->CreateTexture2D(&desc_nv12_cpu_copy, 0, &m_pSurfaceNV12_cpu_copy);
if (FAILED(r))
{
throw std::runtime_error("Can't create DX NV12 texture");
}
}
// initialize OpenCL context of OpenCV lib from DirectX
@@ -133,31 +187,42 @@ public:
// get media data on DX surface for further processing
int get_surface(ID3D11Texture2D** ppSurface)
int get_surface(ID3D11Texture2D** ppSurface, bool use_nv12)
{
HRESULT r;
if (!m_cap.read(m_frame_bgr))
throw std::runtime_error("Can't get frame");
return -1;
cv::cvtColor(m_frame_bgr, m_frame_rgba, CV_RGB2BGRA);
UINT subResource = ::D3D11CalcSubresource(0, 0, 1);
D3D11_MAPPED_SUBRESOURCE mappedTex;
r = m_pD3D11Ctx->Map(m_pSurface, subResource, D3D11_MAP_WRITE_DISCARD, 0, &mappedTex);
if (FAILED(r))
if (use_nv12)
{
throw std::runtime_error("surface mapping failed!");
cv::cvtColor(m_frame_bgr, m_frame_i420, CV_BGR2YUV_I420);
convert_I420_to_NV12(m_frame_i420, m_frame_nv12, m_width, m_height);
m_pD3D11Ctx->UpdateSubresource(m_pSurfaceNV12, 0, 0, m_frame_nv12.data, (UINT)m_frame_nv12.step[0], (UINT)m_frame_nv12.total());
}
else
{
cv::cvtColor(m_frame_bgr, m_frame_rgba, CV_BGR2RGBA);
// process video frame on CPU
UINT subResource = ::D3D11CalcSubresource(0, 0, 1);
D3D11_MAPPED_SUBRESOURCE mappedTex;
r = m_pD3D11Ctx->Map(m_pSurfaceRGBA, subResource, D3D11_MAP_WRITE_DISCARD, 0, &mappedTex);
if (FAILED(r))
{
throw std::runtime_error("surface mapping failed!");
}
cv::Mat m(m_height, m_width, CV_8UC4, mappedTex.pData, mappedTex.RowPitch);
m_frame_rgba.copyTo(m);
m_pD3D11Ctx->Unmap(m_pSurfaceRGBA, subResource);
}
cv::Mat m(m_height, m_width, CV_8UC4, mappedTex.pData, (int)mappedTex.RowPitch);
// copy video frame data to surface
m_frame_rgba.copyTo(m);
m_pD3D11Ctx->Unmap(m_pSurface, subResource);
*ppSurface = m_pSurface;
*ppSurface = use_nv12 ? m_pSurfaceNV12 : m_pSurfaceRGBA;
return 0;
} // get_surface()
@@ -171,10 +236,13 @@ public:
if (m_shutdown)
return 0;
// capture user input once
MODE mode = (m_mode == MODE_GPU_NV12 && !m_nv12_available) ? MODE_GPU_RGBA : m_mode;
HRESULT r;
ID3D11Texture2D* pSurface = 0;
r = get_surface(&pSurface);
r = get_surface(&pSurface, mode == MODE_GPU_NV12);
if (FAILED(r))
{
throw std::runtime_error("get_surface() failed!");
@@ -182,70 +250,112 @@ public:
m_timer.start();
switch (m_mode)
switch (mode)
{
case MODE_CPU:
case MODE_CPU:
{
// process video frame on CPU
UINT subResource = ::D3D11CalcSubresource(0, 0, 1);
D3D11_MAPPED_SUBRESOURCE mappedTex;
r = m_pD3D11Ctx->Map(pSurface, subResource, D3D11_MAP_WRITE_DISCARD, 0, &mappedTex);
if (FAILED(r))
{
throw std::runtime_error("surface mapping failed!");
}
cv::Mat m(m_height, m_width, CV_8UC4, mappedTex.pData, (int)mappedTex.RowPitch);
if (m_demo_processing)
{
// blur data from D3D11 surface with OpenCV on CPU
cv::blur(m, m, cv::Size(15, 15), cv::Point(-7, -7));
}
cv::String strMode = cv::format("mode: %s", m_modeStr[MODE_CPU].c_str());
cv::String strProcessing = m_demo_processing ? "blur frame" : "copy frame";
cv::String strTime = cv::format("time: %4.1f msec", m_timer.time(Timer::UNITS::MSEC));
cv::String strDevName = cv::format("OpenCL device: %s", m_oclDevName.c_str());
cv::putText(m, strMode, cv::Point(0, 16), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(m, strProcessing, cv::Point(0, 32), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(m, strTime, cv::Point(0, 48), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(m, strDevName, cv::Point(0, 64), 1, 0.8, cv::Scalar(0, 0, 0));
m_pD3D11Ctx->Unmap(pSurface, subResource);
break;
}
case MODE_GPU_RGBA:
case MODE_GPU_NV12:
{
// process video frame on GPU
cv::UMat u;
cv::directx::convertFromD3D11Texture2D(pSurface, u);
if (m_demo_processing)
{
// blur data from D3D11 surface with OpenCV on GPU with OpenCL
cv::blur(u, u, cv::Size(15, 15), cv::Point(-7, -7));
}
cv::String strMode = cv::format("mode: %s", m_modeStr[mode].c_str());
cv::String strProcessing = m_demo_processing ? "blur frame" : "copy frame";
cv::String strTime = cv::format("time: %4.1f msec", m_timer.time(Timer::UNITS::MSEC));
cv::String strDevName = cv::format("OpenCL device: %s", m_oclDevName.c_str());
cv::putText(u, strMode, cv::Point(0, 16), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(u, strProcessing, cv::Point(0, 32), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(u, strTime, cv::Point(0, 48), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(u, strDevName, cv::Point(0, 64), 1, 0.8, cv::Scalar(0, 0, 0));
cv::directx::convertToD3D11Texture2D(u, pSurface);
if (mode == MODE_GPU_NV12)
{
// just for rendering, we need to convert NV12 to RGBA.
m_pD3D11Ctx->CopyResource(m_pSurfaceNV12_cpu_copy, m_pSurfaceNV12);
// process video frame on CPU
UINT subResource = ::D3D11CalcSubresource(0, 0, 1);
D3D11_MAPPED_SUBRESOURCE mappedTex;
r = m_pD3D11Ctx->Map(pSurface, subResource, D3D11_MAP_WRITE_DISCARD, 0, &mappedTex);
if (FAILED(r))
{
throw std::runtime_error("surface mapping failed!");
UINT subResource = ::D3D11CalcSubresource(0, 0, 1);
D3D11_MAPPED_SUBRESOURCE mappedTex;
r = m_pD3D11Ctx->Map(m_pSurfaceNV12_cpu_copy, subResource, D3D11_MAP_READ, 0, &mappedTex);
if (FAILED(r))
{
throw std::runtime_error("surface mapping failed!");
}
cv::Mat frame_nv12(m_height + (m_height / 2), m_width, CV_8UC1, mappedTex.pData, mappedTex.RowPitch);
cv::cvtColor(frame_nv12, m_frame_rgba, CV_YUV2RGBA_NV12);
m_pD3D11Ctx->Unmap(m_pSurfaceNV12_cpu_copy, subResource);
}
cv::Mat m(m_height, m_width, CV_8UC4, mappedTex.pData, (int)mappedTex.RowPitch);
if (m_demo_processing)
{
// blur data from D3D11 surface with OpenCV on CPU
cv::blur(m, m, cv::Size(15, 15), cv::Point(-7, -7));
UINT subResource = ::D3D11CalcSubresource(0, 0, 1);
D3D11_MAPPED_SUBRESOURCE mappedTex;
r = m_pD3D11Ctx->Map(m_pSurfaceRGBA, subResource, D3D11_MAP_WRITE_DISCARD, 0, &mappedTex);
if (FAILED(r))
{
throw std::runtime_error("surface mapping failed!");
}
cv::Mat m(m_height, m_width, CV_8UC4, mappedTex.pData, mappedTex.RowPitch);
m_frame_rgba.copyTo(m);
m_pD3D11Ctx->Unmap(m_pSurfaceRGBA, subResource);
}
cv::String strMode = cv::format("mode: %s", m_modeStr[MODE_CPU].c_str());
cv::String strProcessing = m_demo_processing ? "blur frame" : "copy frame";
cv::String strTime = cv::format("time: %4.1f msec", m_timer.time(Timer::UNITS::MSEC));
cv::String strDevName = cv::format("OpenCL device: %s", m_oclDevName.c_str());
cv::putText(m, strMode, cv::Point(0, 16), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(m, strProcessing, cv::Point(0, 32), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(m, strTime, cv::Point(0, 48), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(m, strDevName, cv::Point(0, 64), 1, 0.8, cv::Scalar(0, 0, 0));
m_pD3D11Ctx->Unmap(pSurface, subResource);
break;
pSurface = m_pSurfaceRGBA;
}
case MODE_GPU:
{
// process video frame on GPU
cv::UMat u;
cv::directx::convertFromD3D11Texture2D(pSurface, u);
if (m_demo_processing)
{
// blur data from D3D11 surface with OpenCV on GPU with OpenCL
cv::blur(u, u, cv::Size(15, 15), cv::Point(-7, -7));
}
cv::String strMode = cv::format("mode: %s", m_modeStr[MODE_GPU].c_str());
cv::String strProcessing = m_demo_processing ? "blur frame" : "copy frame";
cv::String strTime = cv::format("time: %4.1f msec", m_timer.time(Timer::UNITS::MSEC));
cv::String strDevName = cv::format("OpenCL device: %s", m_oclDevName.c_str());
cv::putText(u, strMode, cv::Point(0, 16), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(u, strProcessing, cv::Point(0, 32), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(u, strTime, cv::Point(0, 48), 1, 0.8, cv::Scalar(0, 0, 0));
cv::putText(u, strDevName, cv::Point(0, 64), 1, 0.8, cv::Scalar(0, 0, 0));
cv::directx::convertToD3D11Texture2D(u, pSurface);
break;
}
break;
}
} // switch
@@ -267,12 +377,14 @@ public:
catch (cv::Exception& e)
{
std::cerr << "Exception: " << e.what() << std::endl;
cleanup();
return 10;
}
catch (const std::exception& e)
{
std::cerr << "Exception: " << e.what() << std::endl;
cleanup();
return 11;
}
@@ -282,7 +394,9 @@ public:
int cleanup(void)
{
SAFE_RELEASE(m_pSurface);
SAFE_RELEASE(m_pSurfaceRGBA);
SAFE_RELEASE(m_pSurfaceNV12);
SAFE_RELEASE(m_pSurfaceNV12_cpu_copy);
SAFE_RELEASE(m_pBackBuffer);
SAFE_RELEASE(m_pD3D11SwapChain);
SAFE_RELEASE(m_pRenderTarget);
@@ -292,16 +406,74 @@ public:
return 0;
} // cleanup()
protected:
void convert_I420_to_NV12(cv::Mat& i420, cv::Mat& nv12, int width, int height)
{
nv12.create(i420.rows, i420.cols, CV_8UC1);
unsigned char* pSrcY = i420.data;
unsigned char* pDstY = nv12.data;
size_t srcStep = i420.step[0];
size_t dstStep = nv12.step[0];
{
unsigned char* src;
unsigned char* dst;
// copy Y plane
for (int i = 0; i < height; i++)
{
src = pSrcY + i*srcStep;
dst = pDstY + i*dstStep;
for (int j = 0; j < width; j++)
{
dst[j] = src[j];
}
}
}
{
// copy U/V planes to UV plane
unsigned char* pSrcU;
unsigned char* pSrcV;
unsigned char* pDstUV;
size_t uv_offset = height * dstStep;
for (int i = 0; i < height / 2; i++)
{
pSrcU = pSrcY + height*width + i*(width / 2);
pSrcV = pSrcY + height*width + (height / 2) * (width / 2) + i*(width / 2);
pDstUV = pDstY + uv_offset + i*dstStep;
for (int j = 0; j < width / 2; j++)
{
pDstUV[j*2 + 0] = pSrcU[j];
pDstUV[j*2 + 1] = pSrcV[j];
}
}
}
return;
}
private:
ID3D11Device* m_pD3D11Dev;
IDXGISwapChain* m_pD3D11SwapChain;
ID3D11DeviceContext* m_pD3D11Ctx;
ID3D11Texture2D* m_pBackBuffer;
ID3D11Texture2D* m_pSurface;
ID3D11Texture2D* m_pSurfaceRGBA;
ID3D11Texture2D* m_pSurfaceNV12;
ID3D11Texture2D* m_pSurfaceNV12_cpu_copy;
ID3D11RenderTargetView* m_pRenderTarget;
cv::ocl::Context m_oclCtx;
cv::String m_oclPlatformName;
cv::String m_oclDevName;
bool m_nv12_available;
cv::Mat m_frame_i420;
cv::Mat m_frame_nv12;
};

View File

@@ -108,7 +108,7 @@ public:
if (!m_cap.read(m_frame_bgr))
return -1;
cv::cvtColor(m_frame_bgr, m_frame_rgba, CV_RGB2RGBA);
cv::cvtColor(m_frame_bgr, m_frame_rgba, CV_BGR2BGRA);
D3DLOCKED_RECT memDesc = { 0, NULL };
RECT rc = { 0, 0, m_width, m_height };
@@ -143,6 +143,9 @@ public:
if (m_shutdown)
return 0;
// capture user input once
MODE mode = (m_mode == MODE_GPU_NV12) ? MODE_GPU_RGBA : m_mode;
HRESULT r;
LPDIRECT3DSURFACE9 pSurface;
@@ -154,7 +157,7 @@ public:
m_timer.start();
switch (m_mode)
switch (mode)
{
case MODE_CPU:
{
@@ -185,7 +188,7 @@ public:
break;
}
case MODE_GPU:
case MODE_GPU_RGBA:
{
// process video frame on GPU
cv::UMat u;
@@ -207,7 +210,7 @@ public:
m_timer.stop();
print_info(pSurface, m_mode, m_timer.time(Timer::UNITS::MSEC), m_oclDevName);
print_info(pSurface, mode, m_timer.time(Timer::UNITS::MSEC), m_oclDevName);
// traditional DX render pipeline:
// BitBlt surface to backBuffer and flip backBuffer to frontBuffer

View File

@@ -108,7 +108,7 @@ public:
if (!m_cap.read(m_frame_bgr))
return -1;
cv::cvtColor(m_frame_bgr, m_frame_rgba, CV_RGB2RGBA);
cv::cvtColor(m_frame_bgr, m_frame_rgba, CV_BGR2BGRA);
D3DLOCKED_RECT memDesc = { 0, NULL };
RECT rc = { 0, 0, m_width, m_height };
@@ -143,6 +143,9 @@ public:
if (m_shutdown)
return 0;
// capture user input once
MODE mode = m_mode == MODE_GPU_NV12 ? MODE_GPU_RGBA : m_mode;
HRESULT r;
LPDIRECT3DSURFACE9 pSurface;
@@ -154,7 +157,7 @@ public:
m_timer.start();
switch (m_mode)
switch (mode)
{
case MODE_CPU:
{
@@ -185,7 +188,7 @@ public:
break;
}
case MODE_GPU:
case MODE_GPU_RGBA:
{
// process video frame on GPU
cv::UMat u;

View File

@@ -67,7 +67,8 @@ public:
enum MODE
{
MODE_CPU,
MODE_GPU
MODE_GPU_RGBA,
MODE_GPU_NV12
};
D3DSample(int width, int height, std::string& window_name, cv::VideoCapture& cap) :
@@ -76,7 +77,8 @@ public:
m_shutdown = false;
m_mode = MODE_CPU;
m_modeStr[0] = cv::String("Processing on CPU");
m_modeStr[1] = cv::String("Processing on GPU");
m_modeStr[1] = cv::String("Processing on GPU RGBA");
m_modeStr[2] = cv::String("Processing on GPU NV12");
m_demo_processing = false;
m_cap = cap;
}
@@ -104,7 +106,12 @@ protected:
}
if (wParam == '2')
{
m_mode = MODE_GPU;
m_mode = MODE_GPU_RGBA;
return 0;
}
if (wParam == '3')
{
m_mode = MODE_GPU_NV12;
return 0;
}
else if (wParam == VK_SPACE)
@@ -136,7 +143,7 @@ protected:
bool m_shutdown;
bool m_demo_processing;
MODE m_mode;
cv::String m_modeStr[2];
cv::String m_modeStr[3];
cv::VideoCapture m_cap;
cv::Mat m_frame_bgr;
cv::Mat m_frame_rgba;
@@ -151,7 +158,8 @@ static void help()
"Hot keys: \n"
" SPACE - turn processing on/off\n"
" 1 - process DX surface through OpenCV on CPU\n"
" 2 - process DX surface through OpenCV on GPU (via OpenCL)\n"
" 2 - process DX RGBA surface through OpenCV on GPU (via OpenCL)\n"
" 3 - process DX NV12 surface through OpenCV on GPU (via OpenCL)\n"
" ESC - exit\n\n");
}