diff --git a/modules/gpu/test/main.cpp b/modules/gpu/test/main.cpp
index 6a8c67d79..6df7db0a1 100644
--- a/modules/gpu/test/main.cpp
+++ b/modules/gpu/test/main.cpp
@@ -49,35 +49,39 @@ using namespace cv::gpu;
 using namespace cvtest;
 using namespace testing;
 
-void print_info()
+void printInfo()
 {
-    printf("\n");
 #if defined _WIN32
 #   if defined _WIN64
-        puts("OS: Windows 64");
+        puts("OS: Windows x64");
 #   else
-        puts("OS: Windows 32");
+        puts("OS: Windows x32");
 #   endif
 #elif defined linux
 #   if defined _LP64
-        puts("OS: Linux 64");
+        puts("OS: Linux x64");
 #   else
-        puts("OS: Linux 32");
+        puts("OS: Linux x32");
 #   endif
 #elif defined __APPLE__
 #   if defined _LP64
-        puts("OS: Apple 64");
+        puts("OS: Apple x64");
 #   else
-        puts("OS: Apple 32");
+        puts("OS: Apple x32");
 #   endif
 #endif
 
-    int deviceCount = getCudaEnabledDeviceCount();
     int driver;
     cudaDriverGetVersion(&driver);
 
     printf("CUDA Driver  version: %d\n", driver);
     printf("CUDA Runtime version: %d\n", CUDART_VERSION);
+
+    puts("GPU module was compiled for the following GPU archs:");
+    printf("    BIN: %s\n", CUDA_ARCH_BIN);
+    printf("    PTX: %s\n\n", CUDA_ARCH_PTX);
+
+    int deviceCount = getCudaEnabledDeviceCount();
     printf("CUDA device count: %d\n\n", deviceCount);
 
     for (int i = 0; i < deviceCount; ++i)
@@ -87,17 +91,13 @@ void print_info()
         printf("Device %d:\n", i);
         printf("    Name: %s\n", info.name().c_str());
         printf("    Compute capability version: %d.%d\n", info.majorVersion(), info.minorVersion());
+        printf("    Multi Processor Count: %d\n", info.multiProcessorCount());
         printf("    Total memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0));
         printf("    Free  memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0));
-        if (info.isCompatible())
-            puts("    This device is compatible with current GPU module build\n");
-        else
-            puts("    This device is NOT compatible with current GPU module build\n");
+        if (!info.isCompatible())
+            puts("    !!! This device is NOT compatible with current GPU module build\n");
+        printf("\n");
     }
-
-    puts("GPU module was compiled for the following GPU archs:");
-    printf("    BIN: %s\n", CUDA_ARCH_BIN);
-    printf("    PTX: %s\n\n", CUDA_ARCH_PTX);
 }
 
 enum OutputLevel
@@ -111,25 +111,56 @@ extern OutputLevel nvidiaTestOutputLevel;
 
 int main(int argc, char** argv)
 {
-    TS::ptr()->init("gpu");
-    InitGoogleTest(&argc, argv);
+    try
+    {
+        CommandLineParser parser(argc, (const char**)argv,
+                                 "{ print_info_only | print_info_only | false | Print information about system and exit }"
+                                 "{ device | device | -1 | Device on which tests will be executed (-1 means all devices) }"
+                                 "{ nvtest_output_level | nvtest_output_level | compact | NVidia test verbosity level }");
 
-    const char* keys ="{ nvtest_output_level | nvtest_output_level | compact | NVidia test verbosity level }";
+        printInfo();
 
-    CommandLineParser parser(argc, (const char**)argv, keys);
+        if (parser.get<bool>("print_info_only"))
+            return 0;
 
-    string outputLevel = parser.get<string>("nvtest_output_level", "none");
+        int device = parser.get<int>("device");
+        if (device < 0)
+        {
+            DeviceManager::instance().loadAll();
+            std::cout << "Run tests on all supported devices\n" << std::endl;
+        }
+        else
+        {
+            DeviceManager::instance().load(device);
+            std::cout << "Run tests on device " << device << '\n' << std::endl;
+        }
 
-    if (outputLevel == "none")
-        nvidiaTestOutputLevel = OutputLevelNone;
-    else if (outputLevel == "compact")
-        nvidiaTestOutputLevel = OutputLevelCompact;
-    else if (outputLevel == "full")
-        nvidiaTestOutputLevel = OutputLevelFull;
+        string outputLevel = parser.get<string>("nvtest_output_level");
 
-    print_info();
+        if (outputLevel == "none")
+            nvidiaTestOutputLevel = OutputLevelNone;
+        else if (outputLevel == "compact")
+            nvidiaTestOutputLevel = OutputLevelCompact;
+        else if (outputLevel == "full")
+            nvidiaTestOutputLevel = OutputLevelFull;
 
-    return RUN_ALL_TESTS();
+        TS::ptr()->init("gpu");
+        InitGoogleTest(&argc, argv);
+
+        return RUN_ALL_TESTS();
+    }
+    catch (const exception& e)
+    {
+        cerr << e.what() << endl;
+        return -1;
+    }
+    catch (...)
+    {
+        cerr << "Unknown error" << endl;
+        return -1;
+    }
+
+    return 0;
 }
 
 #else // HAVE_CUDA
diff --git a/modules/gpu/test/precomp.hpp b/modules/gpu/test/precomp.hpp
index afc3be855..753367cce 100644
--- a/modules/gpu/test/precomp.hpp
+++ b/modules/gpu/test/precomp.hpp
@@ -56,6 +56,7 @@
 #include <limits>
 #include <algorithm>
 #include <iterator>
+#include <stdexcept>
 
 #include "cvconfig.h"
 #include "opencv2/core/core.hpp"
diff --git a/modules/gpu/test/utility.cpp b/modules/gpu/test/utility.cpp
index bc73d3003..148c9d202 100644
--- a/modules/gpu/test/utility.cpp
+++ b/modules/gpu/test/utility.cpp
@@ -46,6 +46,7 @@ using namespace cv;
 using namespace cv::gpu;
 using namespace cvtest;
 using namespace testing;
+using namespace testing::internal;
 
 //////////////////////////////////////////////////////////////////////
 // random generators
@@ -108,12 +109,12 @@ GpuMat loadMat(const Mat& m, bool useRoi)
 //////////////////////////////////////////////////////////////////////
 // Image load
 
-Mat readImage(const string& fileName, int flags)
+Mat readImage(const std::string& fileName, int flags)
 {
-    return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
+    return imread(TS::ptr()->get_data_path() + fileName, flags);
 }
 
-Mat readImageType(const string& fname, int type)
+Mat readImageType(const std::string& fname, int type)
 {
     Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
     if (CV_MAT_CN(type) == 4)
@@ -134,50 +135,150 @@ bool supportFeature(const DeviceInfo& info, FeatureSet feature)
     return TargetArchs::builtWith(feature) && info.supports(feature);
 }
 
-const vector<DeviceInfo>& devices()
+DeviceManager& DeviceManager::instance()
 {
-    static vector<DeviceInfo> devs;
-    static bool first = true;
-
-    if (first)
-    {
-        int deviceCount = getCudaEnabledDeviceCount();
-
-        devs.reserve(deviceCount);
-
-        for (int i = 0; i < deviceCount; ++i)
-        {
-            DeviceInfo info(i);
-            if (info.isCompatible())
-                devs.push_back(info);
-        }
-
-        first = false;
-    }
-
-    return devs;
+    static DeviceManager obj;
+    return obj;
 }
 
-vector<DeviceInfo> devices(FeatureSet feature)
+void DeviceManager::load(int i)
 {
-    const vector<DeviceInfo>& d = devices();
+    devices_.clear();
+    devices_.reserve(1);
 
-    vector<DeviceInfo> devs_filtered;
+    ostringstream msg;
 
-    if (TargetArchs::builtWith(feature))
+    if (i < 0 || i >= getCudaEnabledDeviceCount())
     {
-        devs_filtered.reserve(d.size());
-
-        for (size_t i = 0, size = d.size(); i < size; ++i)
-        {
-            const DeviceInfo& info = d[i];
-
-            if (info.supports(feature))
-                devs_filtered.push_back(info);
-        }
+        msg << "Incorrect device number - " << i;
+        throw runtime_error(msg.str());
     }
 
-    return devs_filtered;
+    DeviceInfo info(i);
+
+    if (!info.isCompatible())
+    {
+        msg << "Device " << i << " [" << info.name() << "] is NOT compatible with current GPU module build";
+        throw runtime_error(msg.str());
+    }
+
+    devices_.push_back(info);
+}
+
+void DeviceManager::loadAll()
+{
+    int deviceCount = getCudaEnabledDeviceCount();
+
+    devices_.clear();
+    devices_.reserve(deviceCount);
+
+    for (int i = 0; i < deviceCount; ++i)
+    {
+        DeviceInfo info(i);
+        if (info.isCompatible())
+        {
+            devices_.push_back(info);
+        }
+    }
+}
+
+class DevicesGenerator : public ParamGeneratorInterface<DeviceInfo>
+{
+public:
+    ~DevicesGenerator();
+
+    ParamIteratorInterface<DeviceInfo>* Begin() const;
+    ParamIteratorInterface<DeviceInfo>* End() const;
+
+private:
+    class Iterator : public ParamIteratorInterface<DeviceInfo>
+    {
+    public:
+        Iterator(const ParamGeneratorInterface<DeviceInfo>* base, vector<DeviceInfo>::const_iterator iterator);
+
+        virtual ~Iterator();
+
+        virtual const ParamGeneratorInterface<DeviceInfo>* BaseGenerator() const;
+
+        virtual void Advance();
+
+        virtual ParamIteratorInterface<DeviceInfo>* Clone() const;
+
+        virtual const DeviceInfo* Current() const;
+
+        virtual bool Equals(const ParamIteratorInterface<DeviceInfo>& other) const;
+
+    private:
+        Iterator(const Iterator& other);
+
+        const ParamGeneratorInterface<DeviceInfo>* const base_;
+        vector<DeviceInfo>::const_iterator iterator_;
+
+        mutable DeviceInfo value_;
+    };
+};
+
+DevicesGenerator::~DevicesGenerator()
+{
+}
+
+ParamIteratorInterface<DeviceInfo>* DevicesGenerator::Begin() const
+{
+    return new Iterator(this, DeviceManager::instance().values().begin());
+}
+
+ParamIteratorInterface<DeviceInfo>* DevicesGenerator::End() const
+{
+    return new Iterator(this, DeviceManager::instance().values().end());
+}
+
+DevicesGenerator::Iterator::Iterator(const ParamGeneratorInterface<DeviceInfo>* base, vector<DeviceInfo>::const_iterator iterator)
+    : base_(base), iterator_(iterator)
+{
+}
+
+DevicesGenerator::Iterator::~Iterator()
+{
+}
+
+const ParamGeneratorInterface<DeviceInfo>* DevicesGenerator::Iterator::BaseGenerator() const
+{
+    return base_;
+}
+
+void DevicesGenerator::Iterator::Advance()
+{
+    ++iterator_;
+}
+
+ParamIteratorInterface<DeviceInfo>* DevicesGenerator::Iterator::Clone() const
+{
+    return new Iterator(*this);
+}
+
+const DeviceInfo* DevicesGenerator::Iterator::Current() const
+{
+    value_ = *iterator_;
+    return &value_;
+}
+
+bool DevicesGenerator::Iterator::Equals(const ParamIteratorInterface<DeviceInfo>& other) const
+{
+    GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+        << "The program attempted to compare iterators "
+        << "from different generators." << endl;
+
+    return iterator_ == CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
+}
+
+DevicesGenerator::Iterator::Iterator(const Iterator& other) :
+    ParamIteratorInterface<DeviceInfo>(), base_(other.base_), iterator_(other.iterator_)
+{
+}
+
+ParamGenerator<DeviceInfo> DevicesGenerator_()
+{
+  return ParamGenerator<DeviceInfo>(new DevicesGenerator);
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -250,7 +351,7 @@ void minMaxLocGold(const Mat& src, double* minVal_, double* maxVal_, Point* minL
 
 namespace
 {
-    template <typename T, typename OutT> string printMatValImpl(const Mat& m, Point p)
+    template <typename T, typename OutT> std::string printMatValImpl(const Mat& m, Point p)
     {
         const int cn = m.channels();
 
@@ -269,9 +370,9 @@ namespace
         return ostr.str();
     }
 
-    string printMatVal(const Mat& m, Point p)
+    std::string printMatVal(const Mat& m, Point p)
     {
-        typedef string (*func_t)(const Mat& m, Point p);
+        typedef std::string (*func_t)(const Mat& m, Point p);
 
         static const func_t funcs[] =
         {
diff --git a/modules/gpu/test/utility.hpp b/modules/gpu/test/utility.hpp
index 3ad02decb..b36f177f6 100644
--- a/modules/gpu/test/utility.hpp
+++ b/modules/gpu/test/utility.hpp
@@ -80,14 +80,23 @@ cv::Mat readImageType(const std::string& fname, int type);
 //! return true if device supports specified feature and gpu module was built with support the feature.
 bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
 
-//! return all devices compatible with current gpu module build.
-const std::vector<cv::gpu::DeviceInfo>& devices();
+class DeviceManager
+{
+public:
+    static DeviceManager& instance();
 
-//! return all devices compatible with current gpu module build which support specified feature.
-std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
+    void load(int i);
+    void loadAll();
 
-#define ALL_DEVICES testing::ValuesIn(devices())
-#define DEVICES(feature) testing::ValuesIn(devices(feature))
+    const std::vector<cv::gpu::DeviceInfo>& values() const { return devices_; }
+
+private:
+    std::vector<cv::gpu::DeviceInfo> devices_;
+};
+
+testing::internal::ParamGenerator<cv::gpu::DeviceInfo> DevicesGenerator_();
+
+#define ALL_DEVICES DevicesGenerator_()
 
 //////////////////////////////////////////////////////////////////////
 // Additional assertion