HoG and Hellinger-metric preprocess for digit recognition

line breaks in fitline.py description
2012-07-02 13:49:36 +00:00 · 2012-07-02 13:49:36 +00:00 · 1543b46383
commit 1543b46383
parent efe139667b
4 changed files with 69 additions and 24 deletions
--- a/samples/python2/digits.py
+++ b/samples/python2/digits.py
@ -3,8 +3,19 @@ SVN and KNearest digit recognition.

 Sample loads a dataset of handwritten digits from 'digits.png'.
 Then it trains a SVN and KNearest classifiers on it and evaluates
-their accuracy. Moment-based image deskew is used to improve 
-the recognition accuracy.
+their accuracy. 
+
+Following preprocessing is applied to the dataset:
+ - Moment-based image deskew (see deskew())
+ - Digit images are split into 4 10x10 cells and 16-bin
+   histogram of oriented gradients is computed for each
+   cell
+ - Transform histograms to space with Hellinger metric (see [1] (RootSIFT))
+
+
+[1] R. Arandjelovic, A. Zisserman
+    "Three things everyone should know to improve object retrieval"
+    http://www.robots.ox.ac.uk/~vgg/publications/2012/Arandjelovic12/arandjelovic12.pdf

 Usage:
   digits.py
@ -14,17 +25,25 @@ import numpy as np
 import cv2
 from multiprocessing.pool import ThreadPool
 from common import clock, mosaic
+from numpy.linalg import norm

 SZ = 20 # size of each digit is SZ x SZ
 CLASS_N = 10
 DIGITS_FN = 'digits.png'

+def split2d(img, cell_size, flatten=True):
+    h, w = img.shape[:2]
+    sx, sy = cell_size
+    cells = [np.hsplit(row, w//sx) for row in np.vsplit(img, h//sy)]
+    cells = np.array(cells)
+    if flatten:
+        cells = cells.reshape(-1, sy, sx)
+    return cells
+
 def load_digits(fn):
    print 'loading "%s" ...' % fn
    digits_img = cv2.imread(fn, 0)
-    h, w = digits_img.shape
-    digits = [np.hsplit(row, w/SZ) for row in np.vsplit(digits_img, h/SZ)]
-    digits = np.array(digits).reshape(-1, SZ, SZ)
+    digits = split2d(digits_img, (SZ, SZ))
    labels = np.repeat(np.arange(CLASS_N), len(digits)/CLASS_N)
    return digits, labels

@ -92,6 +111,31 @@ def evaluate_model(model, digits, samples, labels):
        vis.append(img)
    return mosaic(25, vis)

+def preprocess_simple(digits):
+    return np.float32(digits).reshape(-1, SZ*SZ) / 255.0
+
+def preprocess_hog(digits):
+    samples = []
+    for img in digits:
+        gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
+        gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
+        mag, ang = cv2.cartToPolar(gx, gy)
+        bin_n = 16
+        bin = np.int32(bin_n*ang/(2*np.pi))
+        bin_cells = bin[:10,:10], bin[10:,:10], bin[:10,10:], bin[10:,10:]
+        mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
+        hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
+        hist = np.hstack(hists)
+
+        # transform to Hellinger kernel
+        eps = 1e-7 
+        hist /= hist.sum() + eps
+        hist = np.sqrt(hist)
+        hist /= norm(hist) + eps
+
+        samples.append(hist)
+    return np.float32(samples)
+

 if __name__ == '__main__':
    print __doc__
@ -100,13 +144,13 @@ if __name__ == '__main__':
    
    print 'preprocessing...'
    # shuffle digits
-    rand = np.random.RandomState(12345)
+    rand = np.random.RandomState(321)
    shuffle = rand.permutation(len(digits))
    digits, labels = digits[shuffle], labels[shuffle]
    
    digits2 = map(deskew, digits)
-    samples = np.float32(digits2).reshape(-1, SZ*SZ) / 255.0
-
+    samples = preprocess_hog(digits2)
+    
    train_n = int(0.9*len(samples))
    cv2.imshow('test set', mosaic(25, digits[train_n:]))
    digits_train, digits_test = np.split(digits2, [train_n])
@ -115,13 +159,13 @@ if __name__ == '__main__':

    
    print 'training KNearest...'
-    model = KNearest(k=1)
+    model = KNearest(k=4)
    model.train(samples_train, labels_train)
    vis = evaluate_model(model, digits_test, samples_test, labels_test)
    cv2.imshow('KNearest test', vis)

    print 'training SVM...'
-    model = SVM(C=4.66, gamma=0.08)
+    model = SVM(C=2.67, gamma=5.383)
    model.train(samples_train, labels_train)
    vis = evaluate_model(model, digits_test, samples_test, labels_test)
    cv2.imshow('SVM test', vis)
--- a/samples/python2/digits_adjust.py
+++ b/samples/python2/digits_adjust.py
@ -76,7 +76,7 @@ class App(object):
        shuffle = np.random.permutation(len(digits))
        digits, labels = digits[shuffle], labels[shuffle]
        digits2 = map(deskew, digits)
-        samples = np.float32(digits2).reshape(-1, SZ*SZ) / 255.0
+        samples = preprocess_hog(digits2)
        return samples, labels

    def get_dataset(self):
@ -95,8 +95,8 @@ class App(object):
        return ires
            
    def adjust_SVM(self):
-        Cs = np.logspace(0, 5, 10, base=2)
-        gammas = np.logspace(-7, -2, 10, base=2)
+        Cs = np.logspace(0, 10, 15, base=2)
+        gammas = np.logspace(-7, 4, 15, base=2)
        scores = np.zeros((len(Cs), len(gammas)))
        scores[:] = np.nan

@ -114,6 +114,9 @@ class App(object):
            print '%d / %d (best error: %.2f %%, last: %.2f %%)' % (count+1, scores.size, np.nanmin(scores)*100, score*100)
        print scores

+        print 'writing score table to "svm_scores.npz"'
+        np.savez('svm_scores.npz', scores=scores, Cs=Cs, gammas=gammas)
+
        i, j = np.unravel_index(scores.argmin(), scores.shape)
        best_params = dict(C = Cs[i], gamma=gammas[j])
        print 'best params:', best_params
@ -142,7 +145,6 @@ if __name__ == '__main__':
    
    print __doc__

-
    args, _ = getopt.getopt(sys.argv[1:], '', ['model=', 'cloud', 'env='])
    args = dict(args)
    args.setdefault('--model', 'svm')
--- a/samples/python2/digits_video.py
+++ b/samples/python2/digits_video.py
@ -1,10 +1,10 @@
 import numpy as np
 import cv2
-import digits
 import os
 import video
 from common import mosaic

+from digits import *


 def main():
@ -15,11 +15,9 @@ def main():
        print '"%s" not found, run digits.py first' % classifier_fn
        return 
    
-    model = digits.SVM()
+    model = SVM()
    model.load('digits_svm.dat')

-    SZ = 20
-
    while True:
        ret, frame = cap.read()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
@ -55,13 +53,12 @@ def main():
            A[:,:2] = np.eye(2)*s
            A[:,2] = t
            sub1 = cv2.warpAffine(sub, A, (SZ, SZ), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
-            sub1 = digits.deskew(sub1)
+            sub1 = deskew(sub1)
            if x+w+SZ < frame.shape[1] and y+SZ < frame.shape[0]:
                frame[y:,x+w:][:SZ, :SZ] = sub1[...,np.newaxis]
                
-            sample = np.float32(sub1).reshape(1,SZ*SZ) / 255.0
+            sample = preprocess_hog([sub1])
            digit = model.predict(sample)[0]
-
            cv2.putText(frame, '%d'%digit, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (200, 0, 0), thickness = 1)


--- a/samples/python2/fitline.py
+++ b/samples/python2/fitline.py
@ -2,14 +2,16 @@
 Robust line fitting.
 ==================

-Example of using cv2.fitLine function for fitting line to points in presence of outliers.
+Example of using cv2.fitLine function for fitting line 
+to points in presence of outliers.

 Usage
 -----
 fitline.py

-Switch through different M-estimator functions and see, how well the robust functions
-fit the line even in case of ~50% of outliers.
+Switch through different M-estimator functions and see, 
+how well the robust functions fit the line even 
+in case of ~50% of outliers.

 Keys
 ----