implemented faster transpose with SEE auto vectorization
This commit is contained in:
parent
baa946c390
commit
d126263983
@ -3,7 +3,7 @@ macro(listify OUT_LIST IN_STRING)
|
||||
endmacro()
|
||||
|
||||
listify(MEX_INCLUDE_DIRS_LIST ${MEX_INCLUDE_DIRS})
|
||||
set(MEX_CXXFLAGS "CXXFLAGS=\$CXXFLAGS -pedantic -Wall -Wextra -Weffc++ -Wno-unused -Wold-style-cast -Wshadow -Wmissing-declarations -Wmissing-include-dirs -Wnon-virtual-dtor -Wno-newline-eof")
|
||||
set(MEX_CXXFLAGS "CXXFLAGS=\$CXXFLAGS -msse -msse2 -msse3 -msse4.1 -msse4.2 -pedantic -Wall -Wextra -Weffc++ -Wno-unused-parameter -Wold-style-cast -Wshadow -Wmissing-declarations -Wmissing-include-dirs -Wnon-virtual-dtor -Wno-newline-eof")
|
||||
file(GLOB SOURCE_FILES "${CMAKE_CURRENT_BINARY_DIR}/src/*.cpp")
|
||||
foreach(SOURCE_FILE ${SOURCE_FILES})
|
||||
# strip out the filename
|
||||
|
@ -122,8 +122,8 @@ public:
|
||||
// --------------------------------------------------------------------------
|
||||
// MATLAB TYPES
|
||||
// --------------------------------------------------------------------------
|
||||
Bridge& operator=(const mxArray*) { return *this; }
|
||||
Bridge(const mxArray* obj) : ptr_(obj) {}
|
||||
Bridge& operator=(const mxArray* obj) { ptr_ = obj; return *this; }
|
||||
Bridge(const mxArray* obj) : ptr_(obj) {}
|
||||
MxArray toMxArray() { return ptr_; }
|
||||
|
||||
|
||||
|
@ -21,8 +21,6 @@
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void mkl_somatcopy(char, char, size_t, size_t, const float, const float*, size_t, float*, size_t);
|
||||
void mkl_domatcopy(char, char, size_t, size_t, const double, const double*, size_t, double*, size_t);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@ -224,6 +222,12 @@ public:
|
||||
* just encapsulate it
|
||||
*/
|
||||
MxArray(const mxArray* ptr) : ptr_(const_cast<mxArray *>(ptr)), owns_(false) {}
|
||||
MxArray& operator=(const mxArray* ptr) {
|
||||
dealloc();
|
||||
ptr_ = const_cast<mxArray *>(ptr);
|
||||
owns_ = false;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief explicit typed constructor
|
||||
@ -372,7 +376,7 @@ public:
|
||||
|
||||
template <typename Scalar>
|
||||
cv::Mat toMat() const {
|
||||
cv::Mat mat(cols(), rows(), CV_MAKETYPE(cv::DataType<Scalar>::type, channels()));
|
||||
cv::Mat mat(rows(), cols(), CV_MAKETYPE(cv::DataType<Scalar>::type, channels()));
|
||||
switch (ID()) {
|
||||
case mxINT8_CLASS: deepCopyAndTranspose<int8_t, Scalar>(*this, mat); break;
|
||||
case mxUINT8_CLASS: deepCopyAndTranspose<uint8_t, Scalar>(*this, mat); break;
|
||||
@ -397,7 +401,7 @@ public:
|
||||
Scalar* real() { return static_cast<Scalar *>(mxGetData(ptr_)); }
|
||||
|
||||
template <typename Scalar>
|
||||
Scalar* imag() { return static_cast<Scalar *>(mxGetData(ptr_)); }
|
||||
Scalar* imag() { return static_cast<Scalar *>(mxGetImagData(ptr_)); }
|
||||
|
||||
template <typename Scalar>
|
||||
const Scalar* real() const { return static_cast<const Scalar *>(mxGetData(ptr_)); }
|
||||
@ -413,6 +417,7 @@ public:
|
||||
std::string str;
|
||||
str.reserve(size()+1);
|
||||
mxGetString(ptr_, const_cast<char *>(str.data()), str.size());
|
||||
mexPrintf(str.c_str());
|
||||
return str;
|
||||
}
|
||||
|
||||
@ -491,21 +496,48 @@ cv::Mat MxArray::toMat<Matlab::InheritType>() const {
|
||||
// MATRIX TRANSPOSE
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
template <typename InputScalar, typename OutputScalar>
|
||||
void gemt(const char major, const size_t M, const size_t N, const InputScalar* a, size_t lda, OutputScalar* b, size_t ldb) {
|
||||
switch (major) {
|
||||
case 'R':
|
||||
for (size_t m = 0; m < M; ++m) {
|
||||
InputScalar const * arow = a + m*lda;
|
||||
InputScalar const * const aend = arow + N;
|
||||
OutputScalar * bcol = b + m;
|
||||
while (arow < aend) {
|
||||
*bcol = *arow;
|
||||
arow++;
|
||||
bcol+=ldb;
|
||||
}
|
||||
}
|
||||
return;
|
||||
case 'C':
|
||||
for (size_t n = 0; n < N; ++n) {
|
||||
InputScalar const * acol = a + n*lda;
|
||||
InputScalar const * const aend = acol + M;
|
||||
OutputScalar * brow = b + n;
|
||||
while (acol < aend) {
|
||||
*brow = *acol;
|
||||
acol++;
|
||||
brow+=ldb;
|
||||
}
|
||||
}
|
||||
return;
|
||||
default:
|
||||
error(std::string("Unknown ordering given: ").append(std::string(1,major)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <typename InputScalar, typename OutputScalar>
|
||||
void deepCopyAndTranspose(const cv::Mat& in, MxArray& out) {
|
||||
conditionalError(static_cast<size_t>(in.rows) == out.rows(), "Matrices must have the same number of rows");
|
||||
conditionalError(static_cast<size_t>(in.cols) == out.cols(), "Matrices must have the same number of cols");
|
||||
conditionalError(static_cast<size_t>(in.channels()) == out.channels(), "Matrices must have the same number of channels");
|
||||
const InputScalar* inp = in.ptr<InputScalar>(0);
|
||||
OutputScalar* outp = out.real<OutputScalar>();
|
||||
const size_t M = out.rows();
|
||||
const size_t N = out.cols();
|
||||
for (size_t m = 0; m < M; ++m) {
|
||||
const InputScalar* inp = in.ptr<InputScalar>(m);
|
||||
for (size_t n = 0; n < N; ++n) {
|
||||
// copy and transpose
|
||||
outp[m + n*M] = inp[n];
|
||||
}
|
||||
}
|
||||
gemt('R', out.rows(), out.cols(), inp, in.step1(), outp, out.rows());
|
||||
}
|
||||
|
||||
template <typename InputScalar, typename OutputScalar>
|
||||
@ -514,34 +546,10 @@ void deepCopyAndTranspose(const MxArray& in, cv::Mat& out) {
|
||||
conditionalError(in.cols() == static_cast<size_t>(out.cols), "Matrices must have the same number of cols");
|
||||
conditionalError(in.channels() == static_cast<size_t>(out.channels()), "Matrices must have the same number of channels");
|
||||
const InputScalar* inp = in.real<InputScalar>();
|
||||
const size_t M = in.rows();
|
||||
const size_t N = in.cols();
|
||||
for (size_t m = 0; m < M; ++m) {
|
||||
OutputScalar* outp = out.ptr<OutputScalar>(m);
|
||||
for (size_t n = 0; n < N; ++n) {
|
||||
// copy and transpose
|
||||
outp[n] = inp[m + n*M];
|
||||
}
|
||||
}
|
||||
OutputScalar* outp = out.ptr<OutputScalar>(0);
|
||||
gemt('C', in.rows(), in.cols(), inp, in.rows(), outp, out.step1());
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
void deepCopyAndTranspose<float, float>(const cv::Mat&, MxArray&) {
|
||||
}
|
||||
|
||||
template <>
|
||||
void deepCopyAndTranspose<double, double>(const cv::Mat&, MxArray&) {
|
||||
}
|
||||
|
||||
template <>
|
||||
void deepCopyAndTranspose<float, float>(const MxArray&, cv::Mat&) {
|
||||
// use mkl
|
||||
}
|
||||
|
||||
template <>
|
||||
void deepCopyAndTranspose<double, double>(const MxArray&, cv::Mat& ) {
|
||||
// use mkl
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user