comments for digits.py sample

This commit is contained in:
Alexander Mordvintsev
2012-06-06 05:52:28 +00:00
parent 03a14bb525
commit 455349a0eb
2 changed files with 60 additions and 51 deletions

View File

@@ -2,6 +2,7 @@ import numpy as np
import cv2 import cv2
import os import os
from contextlib import contextmanager from contextlib import contextmanager
import itertools as it
image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm'] image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm']
@@ -170,3 +171,22 @@ class RectSelector:
return return
x0, y0, x1, y1 = self.drag_rect x0, y0, x1, y1 = self.drag_rect
cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2) cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2)
def grouper(n, iterable, fillvalue=None):
'''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx'''
args = [iter(iterable)] * n
return it.izip_longest(fillvalue=fillvalue, *args)
def mosaic(w, imgs):
'''Make a grid from images.
w -- number of grid columns
imgs -- images (must have same size and format)
'''
imgs = iter(imgs)
img0 = imgs.next()
pad = np.zeros_like(img0)
imgs = it.chain([img0], imgs)
rows = grouper(w, imgs, pad)
return np.vstack(map(np.hstack, rows))

View File

@@ -1,89 +1,78 @@
'''
Neural network digit recognition sample.
Usage:
digits.py
Sample loads a dataset of handwritten digits from 'digits.png'.
Then it trains a neural network classifier on it and evaluates
its classification accuracy.
'''
import numpy as np import numpy as np
import cv2 import cv2
import itertools as it from common import mosaic
'''
from scipy.io import loadmat
m = loadmat('ex4data1.mat')
X = m['X'].reshape(-1, 20, 20)
X = np.transpose(X, (0, 2, 1))
img = np.vstack(map(np.hstack, X.reshape(-1, 100, 20, 20)))
img = np.uint8(np.clip(img, 0, 1)*255)
cv2.imwrite('digits.png', img)
'''
def unroll_responses(responses, class_n): def unroll_responses(responses, class_n):
'''[1, 0, 2, ...] -> [[0, 1, 0], [1, 0, 0], [0, 0, 1], ...]'''
sample_n = len(responses) sample_n = len(responses)
new_responses = np.zeros((sample_n, class_n), np.float32) new_responses = np.zeros((sample_n, class_n), np.float32)
new_responses[np.arange(sample_n), responses] = 1 new_responses[np.arange(sample_n), responses] = 1
return new_responses return new_responses
SZ = 20 SZ = 20 # size of each digit is SZ x SZ
CLASS_N = 10
digits_img = cv2.imread('digits.png', 0) digits_img = cv2.imread('digits.png', 0)
# prepare dataset
h, w = digits_img.shape h, w = digits_img.shape
digits = [np.hsplit(row, w/SZ) for row in np.vsplit(digits_img, h/SZ)] digits = [np.hsplit(row, w/SZ) for row in np.vsplit(digits_img, h/SZ)]
digits = np.float32(digits).reshape(-1, SZ*SZ) digits = np.float32(digits).reshape(-1, SZ*SZ)
N = len(digits) N = len(digits)
labels = np.repeat(np.arange(10), N/10) labels = np.repeat(np.arange(CLASS_N), N/CLASS_N)
# split it onto train and test subsets
shuffle = np.random.permutation(N) shuffle = np.random.permutation(N)
train_n = int(0.9*N) train_n = int(0.9*N)
digits_train, digits_test = np.split(digits[shuffle], [train_n]) digits_train, digits_test = np.split(digits[shuffle], [train_n])
labels_train, labels_test = np.split(labels[shuffle], [train_n]) labels_train, labels_test = np.split(labels[shuffle], [train_n])
labels_train_unrolled = unroll_responses(labels_train, 10) # train model
model = cv2.ANN_MLP() model = cv2.ANN_MLP()
layer_sizes = np.int32([SZ*SZ, 25, 10]) layer_sizes = np.int32([SZ*SZ, 25, CLASS_N])
model.create(layer_sizes) model.create(layer_sizes)
params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 100, 0.01),
# CvANN_MLP_TrainParams::BACKPROP,0.001
params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 300, 0.01),
train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP, train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP,
bp_dw_scale = 0.001, bp_dw_scale = 0.001,
bp_moment_scale = 0.0 ) bp_moment_scale = 0.0 )
print 'training...' print 'training...'
labels_train_unrolled = unroll_responses(labels_train, CLASS_N)
model.train(digits_train, labels_train_unrolled, None, params=params) model.train(digits_train, labels_train_unrolled, None, params=params)
model.save('dig_nn.dat') model.save('dig_nn.dat')
model.load('dig_nn.dat') model.load('dig_nn.dat')
ret, resp = model.predict(digits_test) def evaluate(model, samples, labels):
resp = resp.argmax(-1) '''Evaluates classifier preformance on a given labeled samples set.'''
error_mask = (resp == labels_test) ret, resp = model.predict(samples)
print error_mask.mean() resp = resp.argmax(-1)
error_mask = (resp == labels)
accuracy = error_mask.mean()
return accuracy, error_mask
def grouper(n, iterable, fillvalue=None): # evaluate model
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" train_accuracy, _ = evaluate(model, digits_train, labels_train)
args = [iter(iterable)] * n print 'train accuracy: ', train_accuracy
return it.izip_longest(fillvalue=fillvalue, *args) test_accuracy, test_error_mask = evaluate(model, digits_test, labels_test)
print 'test accuracy: ', test_accuracy
def mosaic(w, imgs): # visualize test results
imgs = iter(imgs) vis = []
img0 = imgs.next() for img, flag in zip(digits_test, test_error_mask):
pad = np.zeros_like(img0) img = np.uint8(img).reshape(SZ, SZ)
imgs = it.chain([img0], imgs)
rows = grouper(w, imgs, pad)
return np.vstack(map(np.hstack, rows))
test_img = np.uint8(digits_test).reshape(-1, SZ, SZ)
def vis_resp(img, flag):
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if not flag: if not flag:
img[...,:2] = 0 img[...,:2] = 0
return img vis.append(img)
vis = mosaic(25, vis)
test_img = mosaic(25, it.starmap(vis_resp, it.izip(test_img, error_mask))) cv2.imshow('test', vis)
cv2.imshow('test', test_img)
cv2.waitKey() cv2.waitKey()