Adding optimization for brief and android using NEON SIMD intrinsics
This commit is contained in:
parent
91d8b2aaac
commit
64f9f7f23c
@ -3,7 +3,11 @@ LOCAL_PATH := ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_MODULE := ${android_module_name}
|
||||
|
||||
|
||||
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
LOCAL_ARM_NEON := true
|
||||
endif
|
||||
|
||||
LOCAL_SRC_FILES := ${android_srcs}
|
||||
|
||||
LOCAL_CFLAGS := ${android_defs}
|
||||
|
@ -16,7 +16,16 @@ LOCAL_C_INCLUDES += $(OPENCV_INCLUDES)
|
||||
LOCAL_MODULE := android-opencv
|
||||
|
||||
LOCAL_SRC_FILES := gen/android_cv_wrap.cpp image_pool.cpp \
|
||||
yuv420sp2rgb.c gl_code.cpp Calibration.cpp
|
||||
gl_code.cpp Calibration.cpp
|
||||
|
||||
|
||||
#ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
# LOCAL_CFLAGS := -DHAVE_NEON=1
|
||||
# LOCAL_SRC_FILES += yuv2rgb_neon.c.neon
|
||||
#else
|
||||
LOCAL_SRC_FILES += yuv420sp2rgb.c
|
||||
#endif
|
||||
|
||||
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
|
||||
|
@ -18,17 +18,17 @@ JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved)
|
||||
return JNI_VERSION_1_4;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_com_opencv_jni_opencvJNI_addYUVtoPool(JNIEnv * env,
|
||||
jclass thiz, jlong ppool, jobject _jpool, jbyteArray jbuffer,
|
||||
jint jidx, jint jwidth, jint jheight, jboolean jgrey)
|
||||
JNIEXPORT void JNICALL Java_com_opencv_jni_opencvJNI_addYUVtoPool(JNIEnv * env, jclass thiz, jlong ppool,
|
||||
jobject _jpool, jbyteArray jbuffer, jint jidx,
|
||||
jint jwidth, jint jheight, jboolean jgrey)
|
||||
{
|
||||
int buff_height = jheight + (jheight/2);
|
||||
Size buff_size(jwidth,buff_height);
|
||||
image_pool *pool = (image_pool *) ppool;
|
||||
int buff_height = jheight + (jheight / 2);
|
||||
Size buff_size(jwidth, buff_height);
|
||||
image_pool *pool = (image_pool *)ppool;
|
||||
|
||||
Mat mat = pool->getYUV(jidx);
|
||||
|
||||
if (mat.empty() || mat.size() != buff_size )
|
||||
if (mat.empty() || mat.size() != buff_size)
|
||||
{
|
||||
mat.create(buff_size, CV_8UC1);
|
||||
}
|
||||
@ -36,7 +36,7 @@ JNIEXPORT void JNICALL Java_com_opencv_jni_opencvJNI_addYUVtoPool(JNIEnv * env,
|
||||
jsize sz = env->GetArrayLength(jbuffer);
|
||||
uchar* buff = mat.ptr<uchar> (0);
|
||||
|
||||
env->GetByteArrayRegion(jbuffer, 0, sz, (jbyte*) buff);
|
||||
env->GetByteArrayRegion(jbuffer, 0, sz, (jbyte*)buff);
|
||||
|
||||
pool->addYUVMat(jidx, mat);
|
||||
|
||||
@ -51,8 +51,7 @@ JNIEXPORT void JNICALL Java_com_opencv_jni_opencvJNI_addYUVtoPool(JNIEnv * env,
|
||||
}
|
||||
//doesn't work unfortunately..
|
||||
//TODO cvtColor(mat,color, CV_YCrCb2RGB);
|
||||
color_convert_common(buff, buff + jwidth * jheight, jwidth, jheight,
|
||||
color.ptr<uchar> (0), false);
|
||||
color_convert_common(buff, buff + jwidth * jheight, jwidth, jheight, color.ptr<uchar> (0), false);
|
||||
}
|
||||
|
||||
if (jgrey)
|
||||
@ -84,7 +83,7 @@ Mat image_pool::getGrey(int i)
|
||||
Mat tm = yuvImagesMap[i];
|
||||
if (tm.empty())
|
||||
return tm;
|
||||
return tm(Range(0, tm.rows * (2.0f/3)), Range::all());
|
||||
return tm(Range(0, tm.rows * (2.0f / 3)), Range::all());
|
||||
}
|
||||
Mat image_pool::getYUV(int i)
|
||||
{
|
||||
@ -99,3 +98,19 @@ void image_pool::addImage(int i, Mat mat)
|
||||
imagesmap[i] = mat;
|
||||
}
|
||||
|
||||
void image_pool::convertYUVtoColor(int i, cv::Mat& out)
|
||||
{
|
||||
|
||||
Mat yuv = getYUV(i);
|
||||
|
||||
if (yuv.empty())
|
||||
return;
|
||||
int width = yuv.cols;
|
||||
int height = yuv.rows * (2.0f / 3);
|
||||
out.create(height, width, CV_8UC3);
|
||||
const unsigned char* buff = yuv.ptr<unsigned char> (0);
|
||||
unsigned char* out_buff = out.ptr<unsigned char> (0);
|
||||
//doesn't work unfortunately..
|
||||
//TODO cvtColor(mat,color, CV_YCrCb2RGB);
|
||||
color_convert_common(buff, buff + width * height, width, height, out_buff, false);
|
||||
}
|
||||
|
@ -53,6 +53,8 @@ public:
|
||||
*/
|
||||
void addYUVMat(int i, cv::Mat mat);
|
||||
|
||||
void convertYUVtoColor(int i, cv::Mat& out);
|
||||
|
||||
// int addYUV(uchar* buffer, int size, int width, int height, bool grey,int idx);
|
||||
//
|
||||
// void getBitmap(int * outintarray, int size, int idx);
|
||||
|
@ -25,7 +25,7 @@
|
||||
#endif
|
||||
|
||||
const int bytes_per_pixel = 2;
|
||||
void color_convert_common(unsigned char *pY, unsigned char *pUV, int width, int height, unsigned char *buffer, int grey)
|
||||
void color_convert_common(const unsigned char *pY, const unsigned char *pUV, int width, int height, unsigned char *buffer, int grey)
|
||||
{
|
||||
|
||||
int i, j;
|
||||
|
@ -7,7 +7,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
void color_convert_common(
|
||||
unsigned char *pY, unsigned char *pUV,
|
||||
const unsigned char *pY, const unsigned char *pUV,
|
||||
int width, int height, unsigned char *buffer,
|
||||
int grey);
|
||||
|
||||
|
@ -24,7 +24,7 @@ public class CameraConfig extends Activity {
|
||||
// Restore preferences
|
||||
SharedPreferences settings = ctx.getSharedPreferences(CAMERA_SETTINGS,
|
||||
0);
|
||||
int mode = settings.getInt(CAMERA_MODE, CAMERA_MODE_COLOR);
|
||||
int mode = settings.getInt(CAMERA_MODE, CAMERA_MODE_BW);
|
||||
return mode;
|
||||
}
|
||||
|
||||
|
@ -44,6 +44,11 @@
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#if ANDROID && HAVE_NEON
|
||||
#include <cpu-features.h>
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
using namespace cv;
|
||||
|
||||
inline int smoothedSum(const Mat& sum, const KeyPoint& pt, int y, int x)
|
||||
@ -106,16 +111,39 @@ HammingLUT::ResultType HammingLUT::operator()( const unsigned char* a, const uns
|
||||
Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned char* b, int size) const
|
||||
{
|
||||
#if __GNUC__
|
||||
ResultType result = 0;
|
||||
for (int i = 0; i < size; i += sizeof(unsigned long))
|
||||
ResultType result = 0;
|
||||
#if ANDROID && HAVE_NEON
|
||||
static uint64_t features = android_getCpuFeatures();
|
||||
if ((features & ANDROID_CPU_ARM_FEATURE_NEON))
|
||||
{
|
||||
for (int i = 0; i < size; i += 16)
|
||||
{
|
||||
unsigned long a2 = *reinterpret_cast<const unsigned long*> (a + i);
|
||||
unsigned long b2 = *reinterpret_cast<const unsigned long*> (b + i);
|
||||
result += __builtin_popcountl(a2 ^ b2);
|
||||
uint8x16_t A_vec = vld1q_u8 (a + i);
|
||||
uint8x16_t B_vec = vld1q_u8 (b + i);
|
||||
//uint8x16_t veorq_u8 (uint8x16_t, uint8x16_t)
|
||||
uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
|
||||
|
||||
uint8x16_t bitsSet += vcntq_u8 (AxorB);
|
||||
//uint16x8_t vpadalq_u8 (uint16x8_t, uint8x16_t)
|
||||
uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
|
||||
uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
|
||||
|
||||
uint64x2_t bitSet2 = vpaddlq_u32 (bitSet4);
|
||||
result += vgetq_lane_u64 (bitSet2,0);
|
||||
result += vgetq_lane_u64 (bitSet2,1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
for (int i = 0; i < size; i += sizeof(unsigned long))
|
||||
{
|
||||
unsigned long a2 = *reinterpret_cast<const unsigned long*> (a + i);
|
||||
unsigned long b2 = *reinterpret_cast<const unsigned long*> (b + i);
|
||||
result += __builtin_popcountl(a2 ^ b2);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
return HammingLUT()(a,b,size);
|
||||
return HammingLUT()(a,b,size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user