 57d4c86b2b
			
		
	
	57d4c86b2b
	
	
	
		
			
			Also, removed the one from modules/python/src2/cv.py and cleared its executable bit, since it's not a script.
		
			
				
	
	
		
			183 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			183 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python
 | |
| 
 | |
| '''
 | |
| The sample demonstrates how to train Random Trees classifier
 | |
| (or Boosting classifier, or MLP, or Knearest, or Support Vector Machines) using the provided dataset.
 | |
| 
 | |
| We use the sample database letter-recognition.data
 | |
| from UCI Repository, here is the link:
 | |
| 
 | |
| Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
 | |
| UCI Repository of machine learning databases
 | |
| [http://www.ics.uci.edu/~mlearn/MLRepository.html].
 | |
| Irvine, CA: University of California, Department of Information and Computer Science.
 | |
| 
 | |
| The dataset consists of 20000 feature vectors along with the
 | |
| responses - capital latin letters A..Z.
 | |
| The first 10000 samples are used for training
 | |
| and the remaining 10000 - to test the classifier.
 | |
| ======================================================
 | |
| USAGE:
 | |
|   letter_recog.py [--model <model>]
 | |
|                   [--data <data fn>]
 | |
|                   [--load <model fn>] [--save <model fn>]
 | |
| 
 | |
|   Models: RTrees, KNearest, Boost, SVM, MLP
 | |
| '''
 | |
| 
 | |
| import numpy as np
 | |
| import cv2
 | |
| 
 | |
| def load_base(fn):
 | |
|     a = np.loadtxt(fn, np.float32, delimiter=',', converters={ 0 : lambda ch : ord(ch)-ord('A') })
 | |
|     samples, responses = a[:,1:], a[:,0]
 | |
|     return samples, responses
 | |
| 
 | |
| class LetterStatModel(object):
 | |
|     class_n = 26
 | |
|     train_ratio = 0.5
 | |
| 
 | |
|     def load(self, fn):
 | |
|         self.model.load(fn)
 | |
|     def save(self, fn):
 | |
|         self.model.save(fn)
 | |
| 
 | |
|     def unroll_samples(self, samples):
 | |
|         sample_n, var_n = samples.shape
 | |
|         new_samples = np.zeros((sample_n * self.class_n, var_n+1), np.float32)
 | |
|         new_samples[:,:-1] = np.repeat(samples, self.class_n, axis=0)
 | |
|         new_samples[:,-1] = np.tile(np.arange(self.class_n), sample_n)
 | |
|         return new_samples
 | |
| 
 | |
|     def unroll_responses(self, responses):
 | |
|         sample_n = len(responses)
 | |
|         new_responses = np.zeros(sample_n*self.class_n, np.int32)
 | |
|         resp_idx = np.int32( responses + np.arange(sample_n)*self.class_n )
 | |
|         new_responses[resp_idx] = 1
 | |
|         return new_responses
 | |
| 
 | |
| class RTrees(LetterStatModel):
 | |
|     def __init__(self):
 | |
|         self.model = cv2.RTrees()
 | |
| 
 | |
|     def train(self, samples, responses):
 | |
|         sample_n, var_n = samples.shape
 | |
|         var_types = np.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL], np.uint8)
 | |
|         #CvRTParams(10,10,0,false,15,0,true,4,100,0.01f,CV_TERMCRIT_ITER));
 | |
|         params = dict(max_depth=10 )
 | |
|         self.model.train(samples, cv2.CV_ROW_SAMPLE, responses, varType = var_types, params = params)
 | |
| 
 | |
|     def predict(self, samples):
 | |
|         return np.float32( [self.model.predict(s) for s in samples] )
 | |
| 
 | |
| 
 | |
| class KNearest(LetterStatModel):
 | |
|     def __init__(self):
 | |
|         self.model = cv2.KNearest()
 | |
| 
 | |
|     def train(self, samples, responses):
 | |
|         self.model.train(samples, responses)
 | |
| 
 | |
|     def predict(self, samples):
 | |
|         retval, results, neigh_resp, dists = self.model.find_nearest(samples, k = 10)
 | |
|         return results.ravel()
 | |
| 
 | |
| 
 | |
| class Boost(LetterStatModel):
 | |
|     def __init__(self):
 | |
|         self.model = cv2.Boost()
 | |
| 
 | |
|     def train(self, samples, responses):
 | |
|         sample_n, var_n = samples.shape
 | |
|         new_samples = self.unroll_samples(samples)
 | |
|         new_responses = self.unroll_responses(responses)
 | |
|         var_types = np.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL, cv2.CV_VAR_CATEGORICAL], np.uint8)
 | |
|         #CvBoostParams(CvBoost::REAL, 100, 0.95, 5, false, 0 )
 | |
|         params = dict(max_depth=5) #, use_surrogates=False)
 | |
|         self.model.train(new_samples, cv2.CV_ROW_SAMPLE, new_responses, varType = var_types, params=params)
 | |
| 
 | |
|     def predict(self, samples):
 | |
|         new_samples = self.unroll_samples(samples)
 | |
|         pred = np.array( [self.model.predict(s, returnSum = True) for s in new_samples] )
 | |
|         pred = pred.reshape(-1, self.class_n).argmax(1)
 | |
|         return pred
 | |
| 
 | |
| 
 | |
| class SVM(LetterStatModel):
 | |
|     def __init__(self):
 | |
|         self.model = cv2.SVM()
 | |
| 
 | |
|     def train(self, samples, responses):
 | |
|         params = dict( kernel_type = cv2.SVM_LINEAR,
 | |
|                        svm_type = cv2.SVM_C_SVC,
 | |
|                        C = 1 )
 | |
|         self.model.train(samples, responses, params = params)
 | |
| 
 | |
|     def predict(self, samples):
 | |
|         return self.model.predict_all(samples).ravel()
 | |
| 
 | |
| 
 | |
| class MLP(LetterStatModel):
 | |
|     def __init__(self):
 | |
|         self.model = cv2.ANN_MLP()
 | |
| 
 | |
|     def train(self, samples, responses):
 | |
|         sample_n, var_n = samples.shape
 | |
|         new_responses = self.unroll_responses(responses).reshape(-1, self.class_n)
 | |
| 
 | |
|         layer_sizes = np.int32([var_n, 100, 100, self.class_n])
 | |
|         self.model.create(layer_sizes)
 | |
| 
 | |
|         # CvANN_MLP_TrainParams::BACKPROP,0.001
 | |
|         params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 300, 0.01),
 | |
|                        train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP,
 | |
|                        bp_dw_scale = 0.001,
 | |
|                        bp_moment_scale = 0.0 )
 | |
|         self.model.train(samples, np.float32(new_responses), None, params = params)
 | |
| 
 | |
|     def predict(self, samples):
 | |
|         ret, resp = self.model.predict(samples)
 | |
|         return resp.argmax(-1)
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     import getopt
 | |
|     import sys
 | |
| 
 | |
|     print  __doc__
 | |
| 
 | |
|     models = [RTrees, KNearest, Boost, SVM, MLP] # NBayes
 | |
|     models = dict( [(cls.__name__.lower(), cls) for cls in models] )
 | |
| 
 | |
| 
 | |
|     args, dummy = getopt.getopt(sys.argv[1:], '', ['model=', 'data=', 'load=', 'save='])
 | |
|     args = dict(args)
 | |
|     args.setdefault('--model', 'rtrees')
 | |
|     args.setdefault('--data', '../cpp/letter-recognition.data')
 | |
| 
 | |
|     print 'loading data %s ...' % args['--data']
 | |
|     samples, responses = load_base(args['--data'])
 | |
|     Model = models[args['--model']]
 | |
|     model = Model()
 | |
| 
 | |
|     train_n = int(len(samples)*model.train_ratio)
 | |
|     if '--load' in args:
 | |
|         fn = args['--load']
 | |
|         print 'loading model from %s ...' % fn
 | |
|         model.load(fn)
 | |
|     else:
 | |
|         print 'training %s ...' % Model.__name__
 | |
|         model.train(samples[:train_n], responses[:train_n])
 | |
| 
 | |
|     print 'testing...'
 | |
|     train_rate = np.mean(model.predict(samples[:train_n]) == responses[:train_n])
 | |
|     test_rate  = np.mean(model.predict(samples[train_n:]) == responses[train_n:])
 | |
| 
 | |
|     print 'train rate: %f  test rate: %f' % (train_rate*100, test_rate*100)
 | |
| 
 | |
|     if '--save' in args:
 | |
|         fn = args['--save']
 | |
|         print 'saving model to %s ...' % fn
 | |
|         model.save(fn)
 | |
|     cv2.destroyAllWindows()
 |