HoG and Hellinger-metric preprocess for digit recognition

line breaks in fitline.py description
2012-07-02 13:49:36 +00:00
parent efe139667b
commit 1543b46383
4 changed files with 69 additions and 24 deletions
--- a/samples/python2/digits.py
+++ b/samples/python2/digits.py
@@ -3,8 +3,19 @@ SVN and KNearest digit recognition.

 Sample loads a dataset of handwritten digits from 'digits.png'.
 Then it trains a SVN and KNearest classifiers on it and evaluates
-their accuracy. Moment-based image deskew is used to improve 
-the recognition accuracy.
+their accuracy. 
+
+Following preprocessing is applied to the dataset:
+ - Moment-based image deskew (see deskew())
+ - Digit images are split into 4 10x10 cells and 16-bin
+   histogram of oriented gradients is computed for each
+   cell
+ - Transform histograms to space with Hellinger metric (see [1] (RootSIFT))
+
+
+[1] R. Arandjelovic, A. Zisserman
+    "Three things everyone should know to improve object retrieval"
+    http://www.robots.ox.ac.uk/~vgg/publications/2012/Arandjelovic12/arandjelovic12.pdf

 Usage:
   digits.py
@@ -14,17 +25,25 @@ import numpy as np
 import cv2
 from multiprocessing.pool import ThreadPool
 from common import clock, mosaic
+from numpy.linalg import norm

 SZ = 20 # size of each digit is SZ x SZ
 CLASS_N = 10
 DIGITS_FN = 'digits.png'

+def split2d(img, cell_size, flatten=True):
+    h, w = img.shape[:2]
+    sx, sy = cell_size
+    cells = [np.hsplit(row, w//sx) for row in np.vsplit(img, h//sy)]
+    cells = np.array(cells)
+    if flatten:
+        cells = cells.reshape(-1, sy, sx)
+    return cells
+
 def load_digits(fn):
    print 'loading "%s" ...' % fn
    digits_img = cv2.imread(fn, 0)
-    h, w = digits_img.shape
-    digits = [np.hsplit(row, w/SZ) for row in np.vsplit(digits_img, h/SZ)]
-    digits = np.array(digits).reshape(-1, SZ, SZ)
+    digits = split2d(digits_img, (SZ, SZ))
    labels = np.repeat(np.arange(CLASS_N), len(digits)/CLASS_N)
    return digits, labels

@@ -92,6 +111,31 @@ def evaluate_model(model, digits, samples, labels):
        vis.append(img)
    return mosaic(25, vis)

+def preprocess_simple(digits):
+    return np.float32(digits).reshape(-1, SZ*SZ) / 255.0
+
+def preprocess_hog(digits):
+    samples = []
+    for img in digits:
+        gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
+        gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
+        mag, ang = cv2.cartToPolar(gx, gy)
+        bin_n = 16
+        bin = np.int32(bin_n*ang/(2*np.pi))
+        bin_cells = bin[:10,:10], bin[10:,:10], bin[:10,10:], bin[10:,10:]
+        mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
+        hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
+        hist = np.hstack(hists)
+
+        # transform to Hellinger kernel
+        eps = 1e-7 
+        hist /= hist.sum() + eps
+        hist = np.sqrt(hist)
+        hist /= norm(hist) + eps
+
+        samples.append(hist)
+    return np.float32(samples)
+

 if __name__ == '__main__':
    print __doc__
@@ -100,13 +144,13 @@ if __name__ == '__main__':
    
    print 'preprocessing...'
    # shuffle digits
-    rand = np.random.RandomState(12345)
+    rand = np.random.RandomState(321)
    shuffle = rand.permutation(len(digits))
    digits, labels = digits[shuffle], labels[shuffle]
    
    digits2 = map(deskew, digits)
-    samples = np.float32(digits2).reshape(-1, SZ*SZ) / 255.0
-
+    samples = preprocess_hog(digits2)
+    
    train_n = int(0.9*len(samples))
    cv2.imshow('test set', mosaic(25, digits[train_n:]))
    digits_train, digits_test = np.split(digits2, [train_n])
@@ -115,13 +159,13 @@ if __name__ == '__main__':

    
    print 'training KNearest...'
-    model = KNearest(k=1)
+    model = KNearest(k=4)
    model.train(samples_train, labels_train)
    vis = evaluate_model(model, digits_test, samples_test, labels_test)
    cv2.imshow('KNearest test', vis)

    print 'training SVM...'
-    model = SVM(C=4.66, gamma=0.08)
+    model = SVM(C=2.67, gamma=5.383)
    model.train(samples_train, labels_train)
    vis = evaluate_model(model, digits_test, samples_test, labels_test)
    cv2.imshow('SVM test', vis)