prepared GpuMat for moving to core module

2011-10-31 08:51:00 +00:00 · 2011-10-31 08:51:00 +00:00 · 09ba133ddf
commit 09ba133ddf
parent 0b4e7d6057
4 changed files with 739 additions and 597 deletions
--- a/modules/gpu/include/opencv2/gpu/gpumat.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpumat.hpp
@ -48,21 +48,51 @@

 namespace cv { namespace gpu
 {
-    class Stream;
-    class CudaMem;
-
    //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
    class CV_EXPORTS GpuMat
    {
    public:
+        //! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
+        // Contains just image size, data ptr and step.
+        template <class T> operator DevMem2D_<T>() const;
+        template <class T> operator PtrStep_<T>() const;
+		template <class T> operator PtrStep<T>() const;
+
+
+
+
+
+        //! builds GpuMat from Mat. Perfom blocking upload to device.
+        explicit GpuMat(const Mat& m);
+
+        //! pefroms blocking upload data to GpuMat.
+        void upload(const cv::Mat& m);
+
+        //! downloads data from device to host memory. Blocking calls.
+        void download(cv::Mat& m) const;
+        operator Mat() const
+        {
+            Mat m;
+            download(m);
+            return m;
+        }
+
+
+
+
+
+
        //! default constructor
        GpuMat();
+
        //! constructs GpuMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
        GpuMat(int rows, int cols, int type);
        GpuMat(Size size, int type);
+
        //! constucts GpuMatrix and fills it with the specified value _s.
        GpuMat(int rows, int cols, int type, const Scalar& s);
        GpuMat(Size size, int type, const Scalar& s);
+
        //! copy constructor
        GpuMat(const GpuMat& m);

@ -74,35 +104,11 @@ namespace cv { namespace gpu
        GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange);
        GpuMat(const GpuMat& m, const Rect& roi);

-        //! builds GpuMat from Mat. Perfom blocking upload to device.
-        explicit GpuMat (const Mat& m);
-
        //! destructor - calls release()
        ~GpuMat();

        //! assignment operators
        GpuMat& operator = (const GpuMat& m);
-        //! assignment operator. Perfom blocking upload to device.
-        GpuMat& operator = (const Mat& m);
-
-        //! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
-        // Contains just image size, data ptr and step.
-        template <class T> operator DevMem2D_<T>() const;
-        template <class T> operator PtrStep_<T>() const;
-		template <class T> operator PtrStep<T>() const;
-
-        //! pefroms blocking upload data to GpuMat.
-        void upload(const cv::Mat& m);
-
-        //! upload async
-        void upload(const CudaMem& m, Stream& stream);
-
-        //! downloads data from device to host memory. Blocking calls.
-        operator Mat() const;
-        void download(cv::Mat& m) const;
-
-        //! download async
-        void download(CudaMem& m, Stream& stream) const;

        //! returns a new GpuMatrix header for the specified row
        GpuMat row(int y) const;
@ -119,13 +125,13 @@ namespace cv { namespace gpu
        GpuMat clone() const;
        //! copies the GpuMatrix content to "m".
        // It calls m.create(this->size(), this->type()).
-        void copyTo( GpuMat& m ) const;
+        void copyTo(GpuMat& m) const;
        //! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements.
-        void copyTo( GpuMat& m, const GpuMat& mask ) const;
+        void copyTo(GpuMat& m, const GpuMat& mask) const;
        //! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale.
-        void convertTo( GpuMat& m, int rtype, double alpha=1, double beta=0 ) const;
+        void convertTo(GpuMat& m, int rtype, double alpha = 1, double beta = 0) const;

-        void assignTo( GpuMat& m, int type=-1 ) const;
+        void assignTo(GpuMat& m, int type=-1) const;

        //! sets every GpuMatrix element to s
        GpuMat& operator = (const Scalar& s);
@ -147,13 +153,13 @@ namespace cv { namespace gpu
        void swap(GpuMat& mat);

        //! locates GpuMatrix header within a parent GpuMatrix. See below
-        void locateROI( Size& wholeSize, Point& ofs ) const;
+        void locateROI(Size& wholeSize, Point& ofs) const;
        //! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix.
-        GpuMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+        GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
        //! extracts a rectangular sub-GpuMatrix
        // (this is a generalized form of row, rowRange etc.)
-        GpuMat operator()( Range rowRange, Range colRange ) const;
-        GpuMat operator()( const Rect& roi ) const;
+        GpuMat operator()(Range rowRange, Range colRange) const;
+        GpuMat operator()(const Rect& roi) const;

        //! returns true iff the GpuMatrix data is continuous
        // (i.e. when there are no gaps between successive rows).
@ -186,9 +192,6 @@ namespace cv { namespace gpu
        template<typename _Tp> _Tp* ptr(int y = 0);
        template<typename _Tp> const _Tp* ptr(int y = 0) const;

-        //! matrix transposition
-        GpuMat t() const;
-
        /*! includes several bit-fields:
        - the magic signature
        - continuity flag
@ -196,10 +199,13 @@ namespace cv { namespace gpu
        - number of channels
        */
        int flags;
+
        //! the number of rows and columns
        int rows, cols;
+
        //! a distance between successive rows in bytes; includes the gap if any
        size_t step;
+
        //! pointer to the data
        uchar* data;

@ -223,23 +229,60 @@ namespace cv { namespace gpu
    CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
    CV_EXPORTS void ensureSizeIsEnough(Size size, int type, GpuMat& m);

-    ////////////////////////////////////////////////////////////////////////
-    //////////////////////////////// GpuMat ////////////////////////////////
    ////////////////////////////////////////////////////////////////////////

-    inline GpuMat::GpuMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}
+    template <class T> inline GpuMat::operator DevMem2D_<T>() const { return DevMem2D_<T>(rows, cols, (T*)data, step); }
+    template <class T> inline GpuMat::operator PtrStep_<T>() const { return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this)); }
+	template <class T> inline GpuMat::operator PtrStep<T>() const { return PtrStep<T>((T*)data, step); }	

-    inline GpuMat::GpuMat(int rows_, int cols_, int type_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+
+
+
+
+
+    inline GpuMat::GpuMat() 
+        : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) 
+    {
+    }
+
+    inline GpuMat::GpuMat(int rows_, int cols_, int type_) 
+        : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
    {
        if (rows_ > 0 && cols_ > 0)
            create(rows_, cols_, type_);
    }

-    inline GpuMat::~GpuMat() { release(); }
+    inline GpuMat::GpuMat(Size size_, int type_) 
+        : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+    {
+        if (size_.height > 0 && size_.width > 0)
+            create(size_.height, size_.width, type_);
+    }

-    template <class T> inline GpuMat::operator DevMem2D_<T>() const { return DevMem2D_<T>(rows, cols, (T*)data, step); }
-    template <class T> inline GpuMat::operator PtrStep_<T>() const { return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this)); }
-	template <class T> inline GpuMat::operator PtrStep<T>() const { return PtrStep<T>((T*)data, step); }	
+    inline GpuMat::GpuMat(int rows_, int cols_, int type_, const Scalar& s_) 
+        : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+    {
+        if (rows_ > 0 && cols_ > 0)
+        {
+            create(rows_, cols_, type_);
+            setTo(s_);
+        }
+    }
+
+    inline GpuMat::GpuMat(Size size_, int type_, const Scalar& s_) 
+        : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+    {
+        if (size_.height > 0 && size_.width > 0)
+        {
+            create(size_.height, size_.width, type_);
+            setTo(s_);
+        }
+    }
+
+    inline GpuMat::~GpuMat() 
+    { 
+        release(); 
+    }

    inline GpuMat GpuMat::clone() const
    {
@ -250,15 +293,21 @@ namespace cv { namespace gpu

    inline void GpuMat::assignTo(GpuMat& m, int type) const
    {
-        if( type < 0 )
+        if (type < 0)
            m = *this;
        else
            convertTo(m, type);
    }

-    inline size_t GpuMat::step1() const { return step/elemSize1(); }
+    inline size_t GpuMat::step1() const 
+    { 
+        return step / elemSize1(); 
+    }

-    inline bool GpuMat::empty() const { return data == 0; }
+    inline bool GpuMat::empty() const 
+    { 
+        return data == 0; 
+    }

    template<typename _Tp> inline _Tp* GpuMat::ptr(int y)
    {
@ -270,7 +319,148 @@ namespace cv { namespace gpu
        return (const _Tp*)ptr(y);
    }

-    inline void swap(GpuMat& a, GpuMat& b) { a.swap(b); }
+    inline void swap(GpuMat& a, GpuMat& b) 
+    { 
+        a.swap(b); 
+    }
+
+    inline GpuMat GpuMat::row(int y) const 
+    { 
+        return GpuMat(*this, Range(y, y+1), Range::all()); 
+    }
+
+    inline GpuMat GpuMat::col(int x) const 
+    { 
+        return GpuMat(*this, Range::all(), Range(x, x+1)); 
+    }
+
+    inline GpuMat GpuMat::rowRange(int startrow, int endrow) const 
+    { 
+        return GpuMat(*this, Range(startrow, endrow), Range::all()); 
+    }
+
+    inline GpuMat GpuMat::rowRange(const Range& r) const 
+    { 
+        return GpuMat(*this, r, Range::all()); 
+    }
+
+    inline GpuMat GpuMat::colRange(int startcol, int endcol) const 
+    { 
+        return GpuMat(*this, Range::all(), Range(startcol, endcol)); 
+    }
+
+    inline GpuMat GpuMat::colRange(const Range& r) const 
+    { 
+        return GpuMat(*this, Range::all(), r); 
+    }
+
+    inline void GpuMat::create(Size size_, int type_) 
+    { 
+        create(size_.height, size_.width, type_); 
+    }
+
+    inline GpuMat GpuMat::operator()(Range rowRange, Range colRange) const 
+    { 
+        return GpuMat(*this, rowRange, colRange); 
+    }
+
+    inline GpuMat GpuMat::operator()(const Rect& roi) const 
+    { 
+        return GpuMat(*this, roi); 
+    }
+
+    inline bool GpuMat::isContinuous() const 
+    { 
+        return (flags & Mat::CONTINUOUS_FLAG) != 0; 
+    }
+
+    inline size_t GpuMat::elemSize() const 
+    { 
+        return CV_ELEM_SIZE(flags); 
+    }
+
+    inline size_t GpuMat::elemSize1() const 
+    { 
+        return CV_ELEM_SIZE1(flags); 
+    }
+
+    inline int GpuMat::type() const 
+    { 
+        return CV_MAT_TYPE(flags); 
+    }
+
+    inline int GpuMat::depth() const 
+    { 
+        return CV_MAT_DEPTH(flags); 
+    }
+
+    inline int GpuMat::channels() const 
+    { 
+        return CV_MAT_CN(flags); 
+    }
+
+    inline Size GpuMat::size() const 
+    { 
+        return Size(cols, rows); 
+    }
+
+    inline unsigned char* GpuMat::ptr(int y)
+    {
+        CV_DbgAssert((unsigned)y < (unsigned)rows);
+        return data + step * y;
+    }
+
+    inline const unsigned char* GpuMat::ptr(int y) const
+    {
+        CV_DbgAssert((unsigned)y < (unsigned)rows);
+        return data + step * y;
+    }
+
+    inline GpuMat& GpuMat::operator = (const Scalar& s)
+    {
+        setTo(s);
+        return *this;
+    }
+
+    inline GpuMat createContinuous(int rows, int cols, int type)
+    {
+        GpuMat m;
+        createContinuous(rows, cols, type, m);
+        return m;
+    }
+
+    inline void createContinuous(Size size, int type, GpuMat& m)
+    {
+        createContinuous(size.height, size.width, type, m);
+    }
+
+    inline GpuMat createContinuous(Size size, int type)
+    {
+        GpuMat m;
+        createContinuous(size, type, m);
+        return m;
+    }
+
+    inline void ensureSizeIsEnough(Size size, int type, GpuMat& m)
+    {
+        ensureSizeIsEnough(size.height, size.width, type, m);
+    }
+
+    inline void createContinuous(int rows, int cols, int type, GpuMat& m)
+    {
+        int area = rows * cols;
+        if (!m.isContinuous() || m.type() != type || m.size().area() != area)
+            m.create(1, area, type);
+        m = m.reshape(0, rows);
+    }
+
+    inline void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
+    {
+        if (m.type() == type && m.rows >= rows && m.cols >= cols)
+            m = m(Rect(0, 0, cols, rows));
+        else
+            m.create(rows, cols, type);
+    }
 }}

 #endif // __OPENCV_GPUMAT_HPP__
--- a/modules/gpu/src/gpumat.cpp
+++ b/modules/gpu/src/gpumat.cpp
--- a/modules/stitching/src/blenders.cpp
+++ b/modules/stitching/src/blenders.cpp
@ -387,7 +387,7 @@ void createLaplacePyrGpu(const Mat &img, int num_levels, vector<Mat> &pyr)
    pyr.resize(num_levels + 1);

    vector<gpu::GpuMat> gpu_pyr(num_levels + 1);
-    gpu_pyr[0] = img;
+    gpu_pyr[0].upload(img);
    for (int i = 0; i < num_levels; ++i)
        gpu::pyrDown(gpu_pyr[i], gpu_pyr[i + 1]);

@ -396,10 +396,10 @@ void createLaplacePyrGpu(const Mat &img, int num_levels, vector<Mat> &pyr)
    {
        gpu::pyrUp(gpu_pyr[i + 1], tmp);
        gpu::subtract(gpu_pyr[i], tmp, gpu_pyr[i]);
-        pyr[i] = gpu_pyr[i];
+        gpu_pyr[i].download(pyr[i]);
    }

-    pyr[num_levels] = gpu_pyr[num_levels];
+    gpu_pyr[num_levels].download(pyr[num_levels]);
 #endif
 }

@ -425,7 +425,7 @@ void restoreImageFromLaplacePyrGpu(vector<Mat> &pyr)

    vector<gpu::GpuMat> gpu_pyr(pyr.size());
    for (size_t i = 0; i < pyr.size(); ++i)
-        gpu_pyr[i] = pyr[i];
+        gpu_pyr[i].upload(pyr[i]);

    gpu::GpuMat tmp;
    for (size_t i = pyr.size() - 1; i > 0; --i)
@ -434,7 +434,7 @@ void restoreImageFromLaplacePyrGpu(vector<Mat> &pyr)
        gpu::add(tmp, gpu_pyr[i - 1], gpu_pyr[i - 1]);
    }

-    pyr[0] = gpu_pyr[0];
+    gpu_pyr[0].download(pyr[0]);
 #endif
 }

--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@ -37,7 +37,7 @@ TEST(matchTemplate)
        matchTemplate(src, templ, dst, CV_TM_CCORR);
        CPU_OFF;

-        d_templ = templ;
+        d_templ.upload(templ);
        d_dst.create(d_src.rows - d_templ.rows + 1, d_src.cols - d_templ.cols + 1, CV_32F);

        GPU_ON;
@ -65,7 +65,7 @@ TEST(minMaxLoc)
        minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);

        GPU_ON;
        gpu::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
@ -107,9 +107,9 @@ TEST(remap)
        remap(src, dst, xmap, ymap, interpolation, borderMode);
        CPU_OFF;

-        d_src = src;
-        d_xmap = xmap;
-        d_ymap = ymap;
+        d_src.upload(src);
+        d_xmap.upload(xmap);
+        d_ymap.upload(ymap);
        d_dst.create(d_xmap.size(), d_src.type());

        GPU_ON;
@ -142,9 +142,9 @@ TEST(remap)
        remap(src, dst, xmap, ymap, interpolation, borderMode);
        CPU_OFF;

-        d_src = src;
-        d_xmap = xmap;
-        d_ymap = ymap;
+        d_src.upload(src);
+        d_xmap.upload(xmap);
+        d_ymap.upload(ymap);
        d_dst.create(d_xmap.size(), d_src.type());

        GPU_ON;
@ -177,9 +177,9 @@ TEST(remap)
        remap(src, dst, xmap, ymap, interpolation, borderMode);
        CPU_OFF;

-        d_src = src;
-        d_xmap = xmap;
-        d_ymap = ymap;
+        d_src.upload(src);
+        d_xmap.upload(xmap);
+        d_ymap.upload(ymap);
        d_dst.create(d_xmap.size(), d_src.type());

        GPU_ON;
@ -212,9 +212,9 @@ TEST(remap)
        remap(src, dst, xmap, ymap, interpolation, borderMode);
        CPU_OFF;

-        d_src = src;
-        d_xmap = xmap;
-        d_ymap = ymap;
+        d_src.upload(src);
+        d_xmap.upload(xmap);
+        d_ymap.upload(ymap);
        d_dst.create(d_xmap.size(), d_src.type());

        GPU_ON;
@ -240,7 +240,7 @@ TEST(dft)
        dft(src, dst);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(d_src.size(), d_src.type());

        GPU_ON;
@ -266,7 +266,7 @@ TEST(cornerHarris)
        cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(src.size(), src.type());

        GPU_ON;
@ -286,7 +286,7 @@ TEST(integral)
    gen(src, size, size, CV_8U, 0, 256);
    sum.create(size + 1, size + 1, CV_32S);

-    d_src = src;
+    d_src.upload(src);
    d_sum.create(size + 1, size + 1, CV_32S);

    for (int i = 0; i < 5; ++i)
@ -320,7 +320,7 @@ TEST(norm)
            norm(src, NORM_INF);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);

        GPU_ON;
        for (int i = 0; i < 5; ++i)
@ -350,7 +350,7 @@ TEST(meanShift)

        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(d_src.size(), d_src.type());

        GPU_ON;
@ -483,8 +483,8 @@ TEST(magnitude)
        magnitude(x, y, mag);
        CPU_OFF;

-        d_x = x;
-        d_y = y;
+        d_x.upload(x);
+        d_y.upload(y);
        d_mag.create(size, size, CV_32F);

        GPU_ON;
@ -511,8 +511,8 @@ TEST(add)
        add(src1, src2, dst);
        CPU_OFF;

-        d_src1 = src1;
-        d_src2 = src2;
+        d_src1.upload(src1);
+        d_src2.upload(src2);
        d_dst.create(size, size, CV_32F);

        GPU_ON;
@ -538,7 +538,7 @@ TEST(log)
        log(src, dst);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size, size, CV_32F);

        GPU_ON;
@ -564,7 +564,7 @@ TEST(exp)
        exp(src, dst);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size, size, CV_32F);

        GPU_ON;
@ -591,8 +591,8 @@ TEST(mulSpectrums)
        mulSpectrums(src1, src2, dst, 0, true);
        CPU_OFF;

-        d_src1 = src1;
-        d_src2 = src2;
+        d_src1.upload(src1);
+        d_src2.upload(src2);
        d_dst.create(size, size, CV_32FC2);

        GPU_ON;
@ -618,7 +618,7 @@ TEST(resize)
        resize(src, dst, dst.size());
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size * 2, size * 2, CV_8U);

        GPU_ON;
@ -636,7 +636,7 @@ TEST(resize)
        resize(src, dst, dst.size());
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size / 2, size / 2, CV_8U);

        GPU_ON;
@ -654,7 +654,7 @@ TEST(resize)
        resize(src, dst, dst.size());
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size * 2, size * 2, CV_8U);

        GPU_ON;
@ -672,7 +672,7 @@ TEST(resize)
        resize(src, dst, dst.size());
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size / 2, size / 2, CV_8U);

        GPU_ON;
@ -690,7 +690,7 @@ TEST(resize)
        resize(src, dst, dst.size());
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size * 2, size * 2, CV_8U);

        GPU_ON;
@ -708,7 +708,7 @@ TEST(resize)
        resize(src, dst, dst.size());
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size / 2, size / 2, CV_8U);

        GPU_ON;
@ -726,7 +726,7 @@ TEST(resize)
        resize(src, dst, dst.size());
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size * 2, size * 2, CV_8U);

        GPU_ON;
@ -744,7 +744,7 @@ TEST(resize)
        resize(src, dst, dst.size());
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size / 2, size / 2, CV_8U);

        GPU_ON;
@ -900,7 +900,7 @@ TEST(erode)
        erode(src, dst, ker);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(d_src.size(), d_src.type());

        GPU_ON;
@ -925,7 +925,7 @@ TEST(threshold)
        threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size, size, CV_8U);

        GPU_ON;
@ -944,7 +944,7 @@ TEST(threshold)
        threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size, size, CV_32F);

        GPU_ON;
@ -969,7 +969,7 @@ TEST(pow)
        pow(src, -2.0, dst);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(size, size, CV_32F);

        GPU_ON;
@ -1004,7 +1004,7 @@ TEST(projectPoints)
        projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst);
        CPU_OFF;

-        d_src = src;
+        d_src.upload(src);
        d_dst.create(1, size, CV_32FC2);

        GPU_ON;
@ -1491,9 +1491,9 @@ TEST(gemm)
        gemm(src1, src2, 1.0, src3, 1.0, dst);
        CPU_OFF;

-        d_src1 = src1;
-        d_src2 = src2;
-        d_src3 = src3;
+        d_src1.upload(src1);
+        d_src2.upload(src2);
+        d_src3.upload(src3);
        d_dst.create(d_src1.size(), d_src1.type());

        GPU_ON;