diff --git a/samples/python2/digits.py b/samples/python2/digits.py index 1bfbf4dd6347d507d75bdc2f0f28c8777c913a8c..f39591e6f3b62dba689c5bf29bd35fae72b70a50 100644 --- a/samples/python2/digits.py +++ b/samples/python2/digits.py @@ -3,8 +3,19 @@ SVN and KNearest digit recognition. Sample loads a dataset of handwritten digits from 'digits.png'. Then it trains a SVN and KNearest classifiers on it and evaluates -their accuracy. Moment-based image deskew is used to improve -the recognition accuracy. +their accuracy. + +Following preprocessing is applied to the dataset: + - Moment-based image deskew (see deskew()) + - Digit images are split into 4 10x10 cells and 16-bin + histogram of oriented gradients is computed for each + cell + - Transform histograms to space with Hellinger metric (see [1] (RootSIFT)) + + +[1] R. Arandjelovic, A. Zisserman + "Three things everyone should know to improve object retrieval" + http://www.robots.ox.ac.uk/~vgg/publications/2012/Arandjelovic12/arandjelovic12.pdf Usage: digits.py @@ -14,17 +25,25 @@ import numpy as np import cv2 from multiprocessing.pool import ThreadPool from common import clock, mosaic +from numpy.linalg import norm SZ = 20 # size of each digit is SZ x SZ CLASS_N = 10 DIGITS_FN = 'digits.png' +def split2d(img, cell_size, flatten=True): + h, w = img.shape[:2] + sx, sy = cell_size + cells = [np.hsplit(row, w//sx) for row in np.vsplit(img, h//sy)] + cells = np.array(cells) + if flatten: + cells = cells.reshape(-1, sy, sx) + return cells + def load_digits(fn): print 'loading "%s" ...' % fn digits_img = cv2.imread(fn, 0) - h, w = digits_img.shape - digits = [np.hsplit(row, w/SZ) for row in np.vsplit(digits_img, h/SZ)] - digits = np.array(digits).reshape(-1, SZ, SZ) + digits = split2d(digits_img, (SZ, SZ)) labels = np.repeat(np.arange(CLASS_N), len(digits)/CLASS_N) return digits, labels @@ -92,6 +111,31 @@ def evaluate_model(model, digits, samples, labels): vis.append(img) return mosaic(25, vis) +def preprocess_simple(digits): + return np.float32(digits).reshape(-1, SZ*SZ) / 255.0 + +def preprocess_hog(digits): + samples = [] + for img in digits: + gx = cv2.Sobel(img, cv2.CV_32F, 1, 0) + gy = cv2.Sobel(img, cv2.CV_32F, 0, 1) + mag, ang = cv2.cartToPolar(gx, gy) + bin_n = 16 + bin = np.int32(bin_n*ang/(2*np.pi)) + bin_cells = bin[:10,:10], bin[10:,:10], bin[:10,10:], bin[10:,10:] + mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:] + hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)] + hist = np.hstack(hists) + + # transform to Hellinger kernel + eps = 1e-7 + hist /= hist.sum() + eps + hist = np.sqrt(hist) + hist /= norm(hist) + eps + + samples.append(hist) + return np.float32(samples) + if __name__ == '__main__': print __doc__ @@ -100,13 +144,13 @@ if __name__ == '__main__': print 'preprocessing...' # shuffle digits - rand = np.random.RandomState(12345) + rand = np.random.RandomState(321) shuffle = rand.permutation(len(digits)) digits, labels = digits[shuffle], labels[shuffle] digits2 = map(deskew, digits) - samples = np.float32(digits2).reshape(-1, SZ*SZ) / 255.0 - + samples = preprocess_hog(digits2) + train_n = int(0.9*len(samples)) cv2.imshow('test set', mosaic(25, digits[train_n:])) digits_train, digits_test = np.split(digits2, [train_n]) @@ -115,13 +159,13 @@ if __name__ == '__main__': print 'training KNearest...' - model = KNearest(k=1) + model = KNearest(k=4) model.train(samples_train, labels_train) vis = evaluate_model(model, digits_test, samples_test, labels_test) cv2.imshow('KNearest test', vis) print 'training SVM...' - model = SVM(C=4.66, gamma=0.08) + model = SVM(C=2.67, gamma=5.383) model.train(samples_train, labels_train) vis = evaluate_model(model, digits_test, samples_test, labels_test) cv2.imshow('SVM test', vis) diff --git a/samples/python2/digits_adjust.py b/samples/python2/digits_adjust.py index e5a751a6be2546caca632553250c7344b9318c14..c2a238dafae7784a5f802ab3e5f4ea55f5549bf3 100644 --- a/samples/python2/digits_adjust.py +++ b/samples/python2/digits_adjust.py @@ -76,7 +76,7 @@ class App(object): shuffle = np.random.permutation(len(digits)) digits, labels = digits[shuffle], labels[shuffle] digits2 = map(deskew, digits) - samples = np.float32(digits2).reshape(-1, SZ*SZ) / 255.0 + samples = preprocess_hog(digits2) return samples, labels def get_dataset(self): @@ -95,8 +95,8 @@ class App(object): return ires def adjust_SVM(self): - Cs = np.logspace(0, 5, 10, base=2) - gammas = np.logspace(-7, -2, 10, base=2) + Cs = np.logspace(0, 10, 15, base=2) + gammas = np.logspace(-7, 4, 15, base=2) scores = np.zeros((len(Cs), len(gammas))) scores[:] = np.nan @@ -114,6 +114,9 @@ class App(object): print '%d / %d (best error: %.2f %%, last: %.2f %%)' % (count+1, scores.size, np.nanmin(scores)*100, score*100) print scores + print 'writing score table to "svm_scores.npz"' + np.savez('svm_scores.npz', scores=scores, Cs=Cs, gammas=gammas) + i, j = np.unravel_index(scores.argmin(), scores.shape) best_params = dict(C = Cs[i], gamma=gammas[j]) print 'best params:', best_params @@ -142,7 +145,6 @@ if __name__ == '__main__': print __doc__ - args, _ = getopt.getopt(sys.argv[1:], '', ['model=', 'cloud', 'env=']) args = dict(args) args.setdefault('--model', 'svm') diff --git a/samples/python2/digits_video.py b/samples/python2/digits_video.py index 0eddadf8546c696ff3f77afb9cffb3c43b12f29f..2a46daad32236ea750f30ad590ea07db245c2033 100644 --- a/samples/python2/digits_video.py +++ b/samples/python2/digits_video.py @@ -1,10 +1,10 @@ import numpy as np import cv2 -import digits import os import video from common import mosaic +from digits import * def main(): @@ -15,11 +15,9 @@ def main(): print '"%s" not found, run digits.py first' % classifier_fn return - model = digits.SVM() + model = SVM() model.load('digits_svm.dat') - SZ = 20 - while True: ret, frame = cap.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -55,13 +53,12 @@ def main(): A[:,:2] = np.eye(2)*s A[:,2] = t sub1 = cv2.warpAffine(sub, A, (SZ, SZ), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR) - sub1 = digits.deskew(sub1) + sub1 = deskew(sub1) if x+w+SZ < frame.shape[1] and y+SZ < frame.shape[0]: frame[y:,x+w:][:SZ, :SZ] = sub1[...,np.newaxis] - sample = np.float32(sub1).reshape(1,SZ*SZ) / 255.0 + sample = preprocess_hog([sub1]) digit = model.predict(sample)[0] - cv2.putText(frame, '%d'%digit, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (200, 0, 0), thickness = 1) diff --git a/samples/python2/fitline.py b/samples/python2/fitline.py index 71e2cb0550798ecdfe0947d4dee9db3447d99f70..9ba883937332784bef96e81849c5c7d829d41cc4 100644 --- a/samples/python2/fitline.py +++ b/samples/python2/fitline.py @@ -2,14 +2,16 @@ Robust line fitting. ================== -Example of using cv2.fitLine function for fitting line to points in presence of outliers. +Example of using cv2.fitLine function for fitting line +to points in presence of outliers. Usage ----- fitline.py -Switch through different M-estimator functions and see, how well the robust functions -fit the line even in case of ~50% of outliers. +Switch through different M-estimator functions and see, +how well the robust functions fit the line even +in case of ~50% of outliers. Keys ----