diff --git a/fluid/adversarial/README.md b/fluid/adversarial/README.md index 39953ef491efb92e59e2ba9208fa92ad9df5b384..91661f7e1675d59c7d38c4c09bc67d5b9339573d 100644 --- a/fluid/adversarial/README.md +++ b/fluid/adversarial/README.md @@ -54,6 +54,7 @@ The structure of Advbox module are as follows: | ├── mnist_tutorial_fgsm.py | ├── mnist_tutorial_bim.py | ├── mnist_tutorial_ilcm.py + | ├── mnist_tutorial_mifgsm.py | ├── mnist_tutorial_jsma.py | └── mnist_tutorial_deepfool.py └── README.md @@ -77,6 +78,7 @@ The `./tutorials/` folder provides some tutorials to generate adversarial exampl * [FGSM](https://arxiv.org/abs/1412.6572) * [BIM](https://arxiv.org/abs/1607.02533) * [ILCM](https://arxiv.org/abs/1607.02533) +* [MI-FGSM](https://arxiv.org/pdf/1710.06081.pdf) * [JSMA](https://arxiv.org/pdf/1511.07528) * [DeepFool](https://arxiv.org/abs/1511.04599) @@ -91,6 +93,7 @@ Benchmarks on a vanilla CNN model. |FGSM| 57.8% | 26.55% | 0.3 | One shot| *** | |BIM| 97.4% | --- | 0.1 | 100 | **** | |ILCM| --- | 100.0% | 0.1 | 100 | **** | +|MI-FGSM| 94.4% | 100.0% | 0.1 | 100 | **** | |JSMA| 96.8% | 90.4%| 0.1 | 2000 | *** | |DeepFool| 97.7% | 51.3% | --- | 100 | **** | @@ -101,8 +104,9 @@ Benchmarks on a vanilla CNN model. * [Intriguing properties of neural networks](https://arxiv.org/abs/1312.6199), C. Szegedy et al., arxiv 2014 * [Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572), I. Goodfellow et al., ICLR 2015 * [Adversarial Examples In The Physical World](https://arxiv.org/pdf/1607.02533v3.pdf), A. Kurakin et al., ICLR workshop 2017 +* [Boosting Adversarial Attacks with Momentum](https://arxiv.org/abs/1710.06081), Yinpeng Dong et al., arxiv 2018 * [The Limitations of Deep Learning in Adversarial Settings](https://arxiv.org/abs/1511.07528), N. Papernot et al., ESSP 2016 * [DeepFool: a simple and accurate method to fool deep neural networks](https://arxiv.org/abs/1511.04599), S. Moosavi-Dezfooli et al., CVPR 2016 -* [Foolbox: A Python toolbox to benchmark the robustness of machine learning models] (https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018 +* [Foolbox: A Python toolbox to benchmark the robustness of machine learning models](https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018 * [CleverHans: An adversarial example library for constructing attacks, building defenses, and benchmarking both](https://github.com/tensorflow/cleverhans#setting-up-cleverhans) * [Threat of Adversarial Attacks on Deep Learning in Computer Vision: A Survey](https://arxiv.org/abs/1801.00553), Naveed Akhtar, Ajmal Mian, arxiv 2018 diff --git a/fluid/adversarial/advbox/attacks/gradient_method.py b/fluid/adversarial/advbox/attacks/gradient_method.py index f4e1280972b4df887b02b54e272dde6484daf8a8..146b650c21464279f5527eb4a8bf44593e9dce29 100644 --- a/fluid/adversarial/advbox/attacks/gradient_method.py +++ b/fluid/adversarial/advbox/attacks/gradient_method.py @@ -14,7 +14,8 @@ __all__ = [ 'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM', 'FastGradientSignMethodTargetedAttack', 'FGSMT', 'BasicIterativeMethodAttack', 'BIM', - 'IterativeLeastLikelyClassMethodAttack', 'ILCM' + 'IterativeLeastLikelyClassMethodAttack', 'ILCM', 'MomentumIteratorAttack', + 'MIFGSM' ] @@ -76,9 +77,9 @@ class GradientMethodAttack(Attack): for epsilon in epsilons[:]: step = 1 adv_img = adversary.original + if epsilon == 0.0: + continue for i in range(steps): - if epsilon == 0.0: - continue if adversary.is_targeted_attack: gradient = -self.model.gradient(adv_img, adversary.target_label) @@ -175,7 +176,103 @@ class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack): super(BasicIterativeMethodAttack, self).__init__(model, False) +class MomentumIteratorAttack(GradientMethodAttack): + """ + The Momentum Iterative Fast Gradient Sign Method (Dong et al. 2017). + This method won the first places in NIPS 2017 Non-targeted Adversarial + Attacks and Targeted Adversarial Attacks. The original paper used + hard labels for this attack; no label smoothing. inf norm. + Paper link: https://arxiv.org/pdf/1710.06081.pdf + """ + + def __init__(self, model, support_targeted=True): + """ + :param model(model): The model to be attacked. + :param support_targeted(bool): Does this attack method support targeted. + """ + super(MomentumIteratorAttack, self).__init__(model) + self.support_targeted = support_targeted + + def _apply(self, + adversary, + norm_ord=np.inf, + epsilons=0.1, + steps=100, + epsilon_steps=100, + decay_factor=1): + """ + Apply the momentum iterative gradient attack method. + :param adversary(Adversary): + The Adversary object. + :param norm_ord(int): + Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. + :param epsilons(list|tuple|float): + Attack step size (input variation). + Largest step size if epsilons is not iterable. + :param epsilon_steps: + The number of Epsilons' iteration for each attack iteration. + :param steps: + The number of attack iteration. + :param decay_factor: + The decay factor for the momentum term. + :return: + adversary(Adversary): The Adversary object. + """ + if norm_ord == 0: + raise ValueError("L0 norm is not supported!") + + if not self.support_targeted: + if adversary.is_targeted_attack: + raise ValueError( + "This attack method doesn't support targeted attack!") + + assert self.model.channel_axis() == adversary.original.ndim + assert (self.model.channel_axis() == 1 or + self.model.channel_axis() == adversary.original.shape[0] or + self.model.channel_axis() == adversary.original.shape[-1]) + + if not isinstance(epsilons, Iterable): + epsilons = np.linspace(0, epsilons, num=epsilon_steps) + + min_, max_ = self.model.bounds() + pre_label = adversary.original_label + + for epsilon in epsilons[:]: + if epsilon == 0.0: + continue + step = 1 + adv_img = adversary.original + momentum = 0 + for i in range(steps): + if adversary.is_targeted_attack: + gradient = -self.model.gradient(adv_img, + adversary.target_label) + else: + gradient = self.model.gradient(adv_img, pre_label) + + # normalize gradient + velocity = gradient / self._norm(gradient, ord=1) + momentum = decay_factor * momentum + velocity + if norm_ord == np.inf: + normalized_grad = np.sign(momentum) + else: + normalized_grad = self._norm(momentum, ord=norm_ord) + perturbation = epsilon * normalized_grad + adv_img = adv_img + perturbation + adv_img = np.clip(adv_img, min_, max_) + adv_label = np.argmax(self.model.predict(adv_img)) + logging.info( + 'step={}, epsilon = {:.5f}, pre_label = {}, adv_label={}' + .format(step, epsilon, pre_label, adv_label)) + if adversary.try_accept_the_example(adv_img, adv_label): + return adversary + step += 1 + + return adversary + + FGSM = FastGradientSignMethodAttack FGSMT = FastGradientSignMethodTargetedAttack BIM = BasicIterativeMethodAttack ILCM = IterativeLeastLikelyClassMethodAttack +MIFGSM = MomentumIteratorAttack diff --git a/fluid/adversarial/tutorials/mnist_tutorial_mifgsm.py b/fluid/adversarial/tutorials/mnist_tutorial_mifgsm.py new file mode 100644 index 0000000000000000000000000000000000000000..ded7ef4b19cd4d99d2c3143f703e3d594058f705 --- /dev/null +++ b/fluid/adversarial/tutorials/mnist_tutorial_mifgsm.py @@ -0,0 +1,143 @@ +""" +MIFGSM tutorial on mnist using advbox tool. +MIFGSM is a broad class of momentum iterative gradient-based methods based on FSGM. +It supports non-targeted attack and targeted attack. +""" +import sys +sys.path.append("..") + +import matplotlib.pyplot as plt +import numpy as np +import paddle.fluid as fluid +import paddle.v2 as paddle + +from advbox.adversary import Adversary +from advbox.attacks.gradient_method import MIFGSM +from advbox.models.paddle import PaddleModel +from tutorials.mnist_model import mnist_cnn_model + + +def main(): + """ + Advbox demo which demonstrate how to use advbox. + """ + TOTAL_NUM = 500 + IMG_NAME = 'img' + LABEL_NAME = 'label' + + img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') + # gradient should flow + img.stop_gradient = False + label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') + logits = mnist_cnn_model(img) + cost = fluid.layers.cross_entropy(input=logits, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # use CPU + place = fluid.CPUPlace() + # use GPU + # place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + BATCH_SIZE = 1 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.test(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + fluid.io.load_params( + exe, "./mnist/", main_program=fluid.default_main_program()) + + # advbox demo + m = PaddleModel( + fluid.default_main_program(), + IMG_NAME, + LABEL_NAME, + logits.name, + avg_cost.name, (-1, 1), + channel_axis=1) + attack = MIFGSM(m) + attack_config = { + "norm_ord": np.inf, + "epsilons": 0.1, + "steps": 100, + "decay_factor": 1 + } + + # use train data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in train_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # MIFGSM non-targeted attack + adversary = attack(adversary, **attack_config) + + # MIFGSM targeted attack + # tlabel = 0 + # adversary.set_target(is_targeted_attack=True, target_label=tlabel) + # adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + + # use test data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in test_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # MIFGSM non-targeted attack + adversary = attack(adversary, **attack_config) + + # MIFGSM targeted attack + # tlabel = 0 + # adversary.set_target(is_targeted_attack=True, target_label=tlabel) + # adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + print("mifgsm attack done") + + +if __name__ == '__main__': + main() diff --git a/fluid/image_classification/caffe2fluid/README.md b/fluid/image_classification/caffe2fluid/README.md index 6aba34b9cafbd87b3474575fcbcee65819769c2f..64f6b9cf901337fced0ac4e7eb625012dfcf6d2c 100644 --- a/fluid/image_classification/caffe2fluid/README.md +++ b/fluid/image_classification/caffe2fluid/README.md @@ -2,20 +2,31 @@ This tool is used to convert a Caffe model to Fluid model ### Howto -1, Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here: +1. Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here: +- Generate pycaffe from caffe.proto +
bash ./proto/compile.sh
- 1) generate it from caffe.proto using protoc - bash ./proto/compile.sh +- download one from github directly +
cd proto/ && wget https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/caffe/caffepb.py
+
- 2) download one from github directly - cd proto/ && wget https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/caffe/caffepb.py +2. Convert the Caffe model to Fluid model +- generate fluid code and weight file +
python convert.py alexnet.prototxt \
+        --caffemodel alexnet.caffemodel \
+        --data-output-path alexnet.npy \
+        --code-output-path alexnet.py
+
-2, Convert the caffe model using 'convert.py' which will generate a python script and a weight(in .npy) file +- save weights as fluid model file +
python alexnet.py alexnet.npy ./fluid_model
+
-3, Use the converted model to predict - - see more detail info in 'examples/xxx' +3. Use the converted model to infer +- see more details in '*examples/imagenet/run.sh*' +4. compare the inference results with caffe +- see more details in '*examples/imagenet/diff.sh*' ### Tested models - Lenet @@ -33,4 +44,4 @@ This tool is used to convert a Caffe model to Fluid model [model addr](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) ### Notes -Some of this code come from here: https://github.com/ethereon/caffe-tensorflow +Some of this code come from here: [caffe-tensorflow](https://github.com/ethereon/caffe-tensorflow) diff --git a/fluid/image_classification/caffe2fluid/examples/imagenet/README.md b/fluid/image_classification/caffe2fluid/examples/imagenet/README.md index b82050859239be8804ddec8e2054edc38c4ac052..b9cf1941d29428c84c34df2a9ec00d7ae8e79014 100644 --- a/fluid/image_classification/caffe2fluid/examples/imagenet/README.md +++ b/fluid/image_classification/caffe2fluid/examples/imagenet/README.md @@ -1,10 +1,37 @@ -a demo to show converting caffe models on 'imagenet' using caffe2fluid +A demo to show converting caffe models on 'imagenet' using caffe2fluid --- # How to use -1. prepare python environment -2. download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt" -3. run the tool - eg: bash ./run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50 +1. Prepare python environment + +2. Download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt" + +3. Convert the Caffe model to Fluid model + - generate fluid code and weight file +
python convert.py alexnet.prototxt \
+        --caffemodel alexnet.caffemodel \
+        --data-output-path alexnet.npy \
+        --code-output-path alexnet.py
+    
+ + - save weights as fluid model file +
python alexnet.py alexnet.npy ./fluid_model
+    
+ +4. Do inference +
python infer.py infer ./fluid_mode data/65.jpeg
+
+ +5. convert model and do inference together +
bash ./run.sh alexnet ./models.caffe/alexnet ./models/alexnet
+
+ The Caffe model is stored in './models.caffe/alexnet/alexnet.prototxt|caffemodel' + and the Fluid model will be save in './models/alexnet/alexnet.py|npy' + +6. test the difference with caffe's results(need pycaffe installed) +
bash ./diff.sh resnet
+
+Make sure your caffemodel stored in './models.caffe/resnet'. +The results will be stored in './results/resnet.paddle|caffe' diff --git a/fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh b/fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh old mode 100644 new mode 100755 diff --git a/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py b/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py index bb75caa9e7364465042c5c88f471e8f6f5137237..099c0abb2e7773ca7ba5ccd8b90726d11613cc60 100644 --- a/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py +++ b/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py @@ -59,12 +59,12 @@ def build_model(net_file, net_name): inputs_dict = MyNet.input_shapes() input_name = inputs_dict.keys()[0] input_shape = inputs_dict[input_name] - images = fluid.layers.data(name='image', shape=input_shape, dtype='float32') + images = fluid.layers.data( + name=input_name, shape=input_shape, dtype='float32') #label = fluid.layers.data(name='label', shape=[1], dtype='int64') net = MyNet({input_name: images}) - input_shape = MyNet.input_shapes()[input_name] - return net, input_shape + return net, inputs_dict def dump_results(results, names, root): @@ -78,26 +78,27 @@ def dump_results(results, names, root): np.save(filename + '.npy', res) -def infer(net_file, net_name, model_file, imgfile, debug=True): - """ do inference using a model which consist 'xxx.py' and 'xxx.npy' +def load_model(exe, place, net_file, net_name, net_weight, debug): + """ load model using xxxnet.py and xxxnet.npy """ - fluid = import_fluid() #1, build model - net, input_shape = build_model(net_file, net_name) + net, input_map = build_model(net_file, net_name) + feed_names = input_map.keys() + feed_shapes = [v for k, v in input_map.items()] + prediction = net.get_output() #2, load weights for this model - place = fluid.CPUPlace() - exe = fluid.Executor(place) startup_program = fluid.default_startup_program() exe.run(startup_program) - if model_file.find('.npy') > 0: - net.load(data_path=model_file, exe=exe, place=place) + #place = fluid.CPUPlace() + if net_weight.find('.npy') > 0: + net.load(data_path=net_weight, exe=exe, place=place) else: - net.load(data_path=model_file, exe=exe) + raise ValueError('not found weight file') #3, test this model test_program = fluid.default_main_program().clone() @@ -111,10 +112,75 @@ def infer(net_file, net_name, model_file, imgfile, debug=True): fetch_list_var.append(v) fetch_list_name.append(k) + return { + 'program': test_program, + 'feed_names': feed_names, + 'fetch_vars': fetch_list_var, + 'fetch_names': fetch_list_name, + 'feed_shapes': feed_shapes + } + + +def get_shape(fluid, program, name): + for var in program.list_vars(): + if var.name == 'data': + return list(var.shape[1:]) + + raise ValueError('not found shape for input layer[%s], ' + 'you can specify by yourself' % (name)) + + +def load_inference_model(dirname, exe): + """ load fluid's inference model + """ + fluid = import_fluid() + model_fn = 'model' + params_fn = 'params' + if os.path.exists(os.path.join(dirname, model_fn)) \ + and os.path.exists(os.path.join(dirname, params_fn)): + program, feed_names, fetch_targets = fluid.io.load_inference_model(\ + dirname, exe, model_fn, params_fn) + else: + raise ValueError('not found model files in direcotry[%s]' % (dirname)) + + #print fluid.global_scope().find_var(feed_names[0]) + input_shape = get_shape(fluid, program, feed_names[0]) + feed_shapes = [input_shape] + + return program, feed_names, fetch_targets, feed_shapes + + +def infer(model_path, imgfile, net_file=None, net_name=None, debug=True): + """ do inference using a model which consist 'xxx.py' and 'xxx.npy' + """ + + fluid = import_fluid() + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + try: + ret = load_inference_model(model_path, exe) + program, feed_names, fetch_targets, feed_shapes = ret + debug = False + print('found a inference model for fluid') + except ValueError as e: + pass + print('try to load model using net file and weight file') + net_weight = model_path + ret = load_model(exe, place, net_file, net_name, net_weight, debug) + program = ret['program'] + feed_names = ret['feed_names'] + fetch_targets = ret['fetch_vars'] + fetch_list_name = ret['fetch_names'] + feed_shapes = ret['feed_shapes'] + + input_name = feed_names[0] + input_shape = feed_shapes[0] + np_images = load_data(imgfile, input_shape) - results = exe.run(program=test_program, - feed={'image': np_images}, - fetch_list=fetch_list_var) + results = exe.run(program=program, + feed={input_name: np_images}, + fetch_list=fetch_targets) if debug is True: dump_path = 'results.paddle' @@ -122,7 +188,7 @@ def infer(net_file, net_name, model_file, imgfile, debug=True): print('all result of layers dumped to [%s]' % (dump_path)) else: result = results[0] - print('predicted class:', np.argmax(result)) + print('succeed infer with results[class:%d]' % (np.argmax(result))) return 0 @@ -167,9 +233,12 @@ if __name__ == "__main__": weight_file = 'models/resnet50/resnet50.npy' datafile = 'data/65.jpeg' net_name = 'ResNet50' + model_file = 'models/resnet50/fluid' - argc = len(sys.argv) - if sys.argv[1] == 'caffe': + ret = None + if len(sys.argv) <= 2: + pass + elif sys.argv[1] == 'caffe': if len(sys.argv) != 5: print('usage:') print('\tpython %s caffe [prototxt] [caffemodel] [datafile]' % @@ -178,18 +247,34 @@ if __name__ == "__main__": prototxt = sys.argv[2] caffemodel = sys.argv[3] datafile = sys.argv[4] - sys.exit(caffe_infer(prototxt, caffemodel, datafile)) - elif argc == 5: - net_file = sys.argv[1] - weight_file = sys.argv[2] + ret = caffe_infer(prototxt, caffemodel, datafile) + elif sys.argv[1] == 'infer': + if len(sys.argv) != 4: + print('usage:') + print('\tpython %s infer [fluid_model] [datafile]' % (sys.argv[0])) + sys.exit(1) + model_path = sys.argv[2] datafile = sys.argv[3] - net_name = sys.argv[4] - elif argc > 1: + ret = infer(model_path, datafile) + elif sys.argv[1] == 'dump': + if len(sys.argv) != 6: + print('usage:') + print('\tpython %s dump [net_file] [weight_file] [datafile] [net_name]' \ + % (sys.argv[0])) + print('\teg:python dump %s %s %s %s %s' % (sys.argv[0],\ + net_file, weight_file, datafile, net_name)) + sys.exit(1) + + net_file = sys.argv[2] + weight_file = sys.argv[3] + datafile = sys.argv[4] + net_name = sys.argv[5] + ret = infer(weight_file, datafile, net_file, net_name) + + if ret is None: print('usage:') - print('\tpython %s [net_file] [weight_file] [datafile] [net_name]' % - (sys.argv[0])) - print('\teg:python %s %s %s %s %s' % (sys.argv[0], net_file, - weight_file, datafile, net_name)) + print(' python %s [infer] [fluid_model] [imgfile]' % (sys.argv[0])) + print(' eg:python %s infer %s %s' % (sys.argv[0], model_file, datafile)) sys.exit(1) - infer(net_file, net_name, weight_file, datafile) + sys.exit(ret) diff --git a/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh b/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh old mode 100644 new mode 100755 index ff3cc4ac44a8ccaeb0b33f1bcdbc46886fb7d7e9..2f0a0ba01d6161c031cbc6b452b826cbbb6002b3 --- a/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh +++ b/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh @@ -71,7 +71,7 @@ if [[ -z $only_convert ]];then if [[ -z $net_name ]];then net_name="MyNet" fi - $PYTHON ./infer.py $net_file $weight_file $imgfile $net_name + $PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name ret=$? fi exit $ret diff --git a/fluid/image_classification/caffe2fluid/examples/mnist/run.sh b/fluid/image_classification/caffe2fluid/examples/mnist/run.sh old mode 100644 new mode 100755 diff --git a/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py index 3697529971fa6ca01d1703375243d16f0a0c1edd..20155e992f1bb804b823880a20600d31675f029a 100644 --- a/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py +++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py @@ -216,7 +216,10 @@ class TensorFlowEmitter(object): def emit_convert_def(self, input_nodes): codes = [] inputs = {} + #codes.append('shapes = cls.input_shapes()') codes.append('shapes = cls.input_shapes()') + codes.append('input_name = shapes.keys()[0]') + codes.append('input_shape = shapes[input_name]') for n in input_nodes: name = n.name layer_var = name + '_layer' @@ -235,8 +238,14 @@ class TensorFlowEmitter(object): codes.append("exe = fluid.Executor(place)") codes.append("exe.run(fluid.default_startup_program())") codes.append("net.load(data_path=npy_model, exe=exe, place=place)") + codes.append("output_vars = [net.get_output()]") + codes.append("fluid.io.save_inference_model(" \ + "fluid_path, [input_name],output_vars," \ + "exe, main_program=None, model_filename='model'," \ + "params_filename='params')") codes.append( - "fluid.io.save_persistables(executor=exe, dirname=fluid_path)") + "print('save fluid model as [model] and [params] in directory [%s]' % (fluid_path))" + ) self.outdent() func_def = self.statement('@classmethod') @@ -254,8 +263,17 @@ class TensorFlowEmitter(object): self.prefix = '' main_def = self.statement('if __name__ == "__main__":') self.indent() - main_def += self.statement("#usage: python xxxnet.py xxx.npy ./model\n") + main_def += self.statement( + "#usage: save as an inference model for online service\n") main_def += self.statement("import sys") + main_def += self.statement("if len(sys.argv) != 3:") + self.indent() + main_def += self.statement("print('usage:')") + main_def += self.statement( + "print('\tpython %s [xxxnet.npy] [save_dir]' % (sys.argv[0]))") + main_def += self.statement("exit(1)") + + self.outdent() main_def += self.statement("npy_weight = sys.argv[1]") main_def += self.statement("fluid_model = sys.argv[2]") main_def += self.statement("%s.convert(npy_weight, fluid_model)" % diff --git a/fluid/image_classification/caffe2fluid/proto/compile.sh b/fluid/image_classification/caffe2fluid/proto/compile.sh old mode 100644 new mode 100755 diff --git a/fluid/object_detection/README.md b/fluid/object_detection/README.md index 4aa2c32865932bb949e20e32b63fc5cec2669dd0..67eccaed7303ad9bd6d5386729ae676eee52663f 100644 --- a/fluid/object_detection/README.md +++ b/fluid/object_detection/README.md @@ -2,7 +2,99 @@ The minimum PaddlePaddle version needed for the code sample in this directory is --- -# MobileNet-SSD +## SSD Object Detection -This model built with paddle fluid is still under active development and is not -the final version. We welcome feedbacks. +### Introduction + +[Single Shot MultiBox Detector (SSD)](https://arxiv.org/abs/1512.02325) framework for object detection is based on a feed-forward convolutional network. The early network is a standard convolutional architecture for image classification, such as VGG, ResNet, or MobileNet, which is als called base network. In this tutorial we used [MobileNet](https://arxiv.org/abs/1704.04861). + +### Data Preparation + +You can use [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) or [MS-COCO dataset](http://cocodataset.org/#download). + +#### PASCAL VOC Dataset + +If you want to train model on PASCAL VOC dataset, please download datset at first, skip this step if you already have one. + +```bash +cd data/pascalvoc +./download.sh +``` + +The command `download.sh` also will create training and testing file lists. + +#### MS-COCO Dataset + +If you want to train model on MS-COCO dataset, please download datset at first, skip this step if you already have one. + +``` +cd data/coco +./download.sh +``` + +### Train + +#### Download the Pre-trained Model. + +We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other dataset, like PASCAL VOC. Then other pre-trained model is MobileNet v1 trained on ImageNet 2012 dataset, but removed the last weights and bias in Fully-Connected layer. + +Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md). The MobileNet v1 model is converted [Caffe](https://github.com/shicai/MobileNet-Caffe). + + - Download MobileNet-v1 SSD: + ``` + ./pretrained/download_coco.sh + ``` + - Download MobileNet-v1: + ``` + ./pretrained/download_imagenet.sh + ``` + +#### Train on PASCAL VOC + - Train on one device (/GPU). + ```python + env CUDA_VISIABLE_DEVICES=0 python -u train.py --parallel=False --data='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/' + ``` + - Train on multi devices (/GPUs). + + ```python + env CUDA_VISIABLE_DEVICES=0,1 python -u train.py --batch_size=64 --data='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/' + ``` + +#### Train on MS-COCO + - Train on one device (/GPU). + ```python + env CUDA_VISIABLE_DEVICES=0 python -u train.py --parallel=False --data='coco' --pretrained_model='pretrained/mobilenet_imagenet/' + ``` + - Train on multi devices (/GPUs). + ```python + env CUDA_VISIABLE_DEVICES=0,1 python -u train.py --batch_size=64 --data='coco' --pretrained_model='pretrained/mobilenet_imagenet/' + ``` + +TBD + +### Evaluate + +```python +env CUDA_VISIABLE_DEVICES=0 python eval.py --model='model/90' --test_list='' +``` + +TBD + +### Infer and Visualize + +```python +env CUDA_VISIABLE_DEVICES=0 python infer.py --batch_size=2 --model='model/90' --test_list='' +``` + +TBD + +### Released Model + + +| Model | Pre-trained Model | Training data | Test data | mAP | +|:------------------------:|:------------------:|:----------------:|:------------:|:----:| +|MobileNet-v1-SSD 300x300 | COCO MobileNet SSD | VOC07+12 trainval| VOC07 test | xx% | +|MobileNet-v1-SSD 300x300 | ImageNet MobileNet | VOC07+12 trainval| VOC07 test | xx% | +|MobileNet-v1-SSD 300x300 | ImageNet MobileNet | MS-COCO trainval | MS-COCO test | xx% | + +TBD diff --git a/fluid/object_detection/data/pascalvoc/download.sh b/fluid/object_detection/data/pascalvoc/download.sh old mode 100644 new mode 100755 diff --git a/fluid/object_detection/pretrained/download_coco.sh b/fluid/object_detection/pretrained/download_coco.sh old mode 100644 new mode 100755 diff --git a/fluid/object_detection/pretrained/download_imagenet.sh b/fluid/object_detection/pretrained/download_imagenet.sh old mode 100644 new mode 100755 diff --git a/fluid/ocr_recognition/README.md b/fluid/ocr_recognition/README.md index e71386a8e9a5c94633d31ce9bf40e26dd483fa87..7d35846fb1b67ce4fec7f364a22dce9cb853bb24 100644 --- a/fluid/ocr_recognition/README.md +++ b/fluid/ocr_recognition/README.md @@ -1,4 +1,179 @@ -# OCR Model + +[toc] -This model built with paddle fluid is still under active development and is not -the final version. We welcome feedbacks. +运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求,请按照安装文档中的说明更新PaddlePaddle安装版本。 + +# Optical Character Recognition + +这里将介绍如何在PaddlePaddle fluid下使用CRNN-CTC 和 CRNN-Attention模型对图片中的文字内容进行识别。 + +## 1. CRNN-CTC + +本章的任务是识别含有单行汉语字符图片,首先采用卷积将图片转为`features map`, 然后使用`im2sequence op`将`features map`转为`sequence`,经过`双向GRU RNN`得到每个step的汉语字符的概率分布。训练过程选用的损失函数为CTC loss,最终的评估指标为`instance error rate`。 + +本路径下各个文件的作用如下: + +- **ctc_reader.py :** 下载、读取、处理数据。提供方法`train()` 和 `test()` 分别产生训练集和测试集的数据迭代器。 +- **crnn_ctc_model.py :** 在该脚本中定义了训练网络、预测网络和evaluate网络。 +- **ctc_train.py :** 用于模型的训练,可通过命令`python train.py --help` 获得使用方法。 +- **inference.py :** 加载训练好的模型文件,对新数据进行预测。可通过命令`python inference.py --help` 获得使用方法。 +- **eval.py :** 评估模型在指定数据集上的效果。可通过命令`python inference.py --help` 获得使用方法。 +- **utility.py :** 实现的一些通用方法,包括参数配置、tensor的构造等。 + + +### 1.1 数据 + +数据的下载和简单预处理都在`ctc_reader.py`中实现。 + +#### 1.1.1 数据格式 + +我们使用的训练和测试数据如`图1`所示,每张图片包含单行不定长的中文字符串,这些图片都是经过检测算法进行预框选处理的。 + +

+
+图 1 +

+ +在训练集中,每张图片对应的label是由若干数字组成的sequence。 Sequence中的每个数字表示一个字符在字典中的index。 `图1` 对应的label如下所示: +``` +3835,8371,7191,2369,6876,4162,1938,168,1517,4590,3793 +``` +在上边这个label中,`3835` 表示字符‘两’的index,`4590` 表示中文字符逗号的index。 + + +#### 1.1.2 数据准备 + +**A. 训练集** + +我们需要把所有参与训练的图片放入同一个文件夹,暂且记为`train_images`。然后用一个list文件存放每张图片的信息,包括图片大小、图片名称和对应的label,这里暂记该list文件为`train_list`,其格式如下所示: + +``` +185 48 00508_0215.jpg 7740,5332,2369,3201,4162 +48 48 00197_1893.jpg 6569 +338 48 00007_0219.jpg 4590,4788,3015,1994,3402,999,4553 +150 48 00107_4517.jpg 5936,3382,1437,3382 +... +157 48 00387_0622.jpg 2397,1707,5919,1278 +``` + +
文件train_list
+ +上述文件中的每一行表示一张图片,每行被空格分为四列,前两列分别表示图片的宽和高,第三列表示图片的名称,第四列表示该图片对应的sequence label。 +最终我们应有以下类似文件结构: + +``` +|-train_data + |- train_list + |- train_imags + |- 00508_0215.jpg + |- 00197_1893.jpg + |- 00007_0219.jpg + | ... +``` + +在训练时,我们通过选项`--train_images` 和 `--train_list` 分别设置准备好的`train_images` 和`train_list`。 + + +>**注:** 如果`--train_images` 和 `--train_list`都未设置或设置为None, ctc_reader.py会自动下载使用[示例数据](http://cloud.dlnel.org/filepub/?uuid=df937251-3c0b-480d-9a7b-0080dfeee65c),并将其缓存到`$HOME/.cache/paddle/dataset/ctc_data/data/` 路径下。 + + +**B. 测试集和评估集** + +测试集、评估集的准备方式与训练集相同。 +在训练阶段,测试集的路径通过train.py的选项`--test_images` 和 `--test_list` 来设置。 +在评估时,评估集的路径通过eval.py的选项`--input_images_dir` 和`--input_images_list` 来设置。 + +**C. 待预测数据集** + +预测支持三种形式的输入: + +第一种:设置`--input_images_dir`和`--input_images_list`, 与训练集类似, 只不过list文件中的最后一列可以放任意占位字符或字符串,如下所示: + +``` +185 48 00508_0215.jpg s +48 48 00197_1893.jpg s +338 48 00007_0219.jpg s +... +``` + +第二种:仅设置`--input_images_list`, 其中list文件中只需放图片的完整路径,如下所示: + +``` +data/test_images/00000.jpg +data/test_images/00001.jpg +data/test_images/00003.jpg +``` + +第三种:从stdin读入一张图片的path,然后进行一次inference. + +#### 1.2 训练 + +使用默认数据在GPU单卡上训练: + +``` +env CUDA_VISIABLE_DEVICES=0 python ctc_train.py +``` + +使用默认数据在GPU多卡上训练: + +``` +env CUDA_VISIABLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True +``` + +执行`python ctc_train.py --help`可查看更多使用方式和参数详细说明。 + +图2为使用默认参数和默认数据集训练的收敛曲线,其中横坐标轴为训练pass数,纵轴为在测试集上的sequence_error. + +

+
+图 2 +

+ + + +### 1.3 评估 + +通过以下命令调用评估脚本用指定数据集对模型进行评估: + +``` +env CUDA_VISIBLE_DEVICE=0 python eval.py \ + --model_path="./models/model_0" \ + --input_images_dir="./eval_data/images/" \ + --input_images_list="./eval_data/eval_list\" \ +``` + +执行`python ctc_train.py --help`可查看参数详细说明。 + + +### 1.4 预测 + +从标准输入读取一张图片的路径,并对齐进行预测: + +``` +env CUDA_VISIBLE_DEVICE=0 python inference.py \ + --model_path="models/model_00044_15000" +``` + +执行上述命令进行预测的效果如下: + +``` +----------- Configuration Arguments ----------- +use_gpu: True +input_images_dir: None +input_images_list: None +model_path: /home/work/models/fluid/ocr_recognition/models/model_00052_15000 +------------------------------------------------ +Init model from: /home/work/models/fluid/ocr_recognition/models/model_00052_15000. +Please input the path of image: /home/work/models/fluid/ocr_recognition/data/test_images/00001_0060.jpg +result: [3298 2371 4233 6514 2378 3298 2363] +Please input the path of image: /home/work/models/fluid/ocr_recognition/data/test_images/00001_0429.jpg +result: [2067 2067 8187 8477 5027 7191 2431 1462] +``` + +从文件中批量读取图片路径,并对其进行预测: + +``` +env CUDA_VISIBLE_DEVICE=0 python inference.py \ + --model_path="models/model_00044_15000" \ + --input_images_list="data/test.list" +``` diff --git a/fluid/ocr_recognition/crnn_ctc_model.py b/fluid/ocr_recognition/crnn_ctc_model.py index dd5aaa3f94c1e2668ec75d30735640d14ee8ef0e..8db4fab55dc3492d43d8d9aaa4e30d0619108c28 100644 --- a/fluid/ocr_recognition/crnn_ctc_model.py +++ b/fluid/ocr_recognition/crnn_ctc_model.py @@ -143,7 +143,7 @@ def ctc_train_net(images, label, args, num_classes): gradient_clip = None if args.parallel: places = fluid.layers.get_places() - pd = fluid.layers.ParallelDo(places) + pd = fluid.layers.ParallelDo(places, use_nccl=True) with pd.do(): images_ = pd.read_input(images) label_ = pd.read_input(label) diff --git a/fluid/ocr_recognition/ctc_reader.py b/fluid/ocr_recognition/ctc_reader.py index c9f75a0d523a7390b3814706cdad831d5900dbdb..245177cb6f21849c4a5f42d65543732aa32cb6bd 100644 --- a/fluid/ocr_recognition/ctc_reader.py +++ b/fluid/ocr_recognition/ctc_reader.py @@ -30,10 +30,10 @@ class DataGenerator(object): Reader interface for training. :param img_root_dir: The root path of the image for training. - :type file_list: str + :type img_root_dir: str :param img_label_list: The path of the file for training. - :type file_list: str + :type img_label_list: str ''' @@ -91,10 +91,10 @@ class DataGenerator(object): Reader interface for inference. :param img_root_dir: The root path of the images for training. - :type file_list: str + :type img_root_dir: str :param img_label_list: The path of the file for testing. - :type file_list: list + :type img_label_list: str ''' def reader(): @@ -111,6 +111,42 @@ class DataGenerator(object): return reader + def infer_reader(self, img_root_dir=None, img_label_list=None): + '''A reader interface for inference. + + :param img_root_dir: The root path of the images for training. + :type img_root_dir: str + + :param img_label_list: The path of the file for + inference. It should be the path of file if img_root_dir + was None. If img_label_list was set to None, it will read image path + from stdin. + :type img_root_dir: str + ''' + + def reader(): + if img_label_list is not None: + for line in open(img_label_list): + if img_root_dir is not None: + # h, w, img_name, labels + img_name = line.split(' ')[2] + img_path = os.path.join(img_root_dir, img_name) + else: + img_path = line.strip("\t\n\r") + img = Image.open(img_path).convert('L') + img = np.array(img) - 127.5 + img = img[np.newaxis, ...] + yield img, label + else: + while True: + img_path = raw_input("Please input the path of image: ") + img = Image.open(img_path).convert('L') + img = np.array(img) - 127.5 + img = img[np.newaxis, ...] + yield img, [[0]] + + return reader + def num_classes(): '''Get classes number of this dataset. @@ -124,21 +160,31 @@ def data_shape(): return DATA_SHAPE -def train(batch_size): +def train(batch_size, train_images_dir=None, train_list_file=None): generator = DataGenerator() - data_dir = download_data() - return generator.train_reader( - path.join(data_dir, TRAIN_DATA_DIR_NAME), - path.join(data_dir, TRAIN_LIST_FILE_NAME), batch_size) + if train_images_dir is None: + data_dir = download_data() + train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) + if train_list_file is None: + train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME) + return generator.train_reader(train_images_dir, train_list_file, batch_size) + + +def test(batch_size=1, test_images_dir=None, test_list_file=None): + generator = DataGenerator() + if test_images_dir is None: + data_dir = download_data() + test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME) + if test_list_file is None: + test_list_file = path.join(data_dir, TEST_LIST_FILE_NAME) + return paddle.batch( + generator.test_reader(test_images_dir, test_list_file), batch_size) -def test(batch_size=1): +def inference(infer_images_dir=None, infer_list_file=None): generator = DataGenerator() - data_dir = download_data() return paddle.batch( - generator.test_reader( - path.join(data_dir, TRAIN_DATA_DIR_NAME), - path.join(data_dir, TRAIN_LIST_FILE_NAME)), batch_size) + generator.infer_reader(infer_images_dir, infer_list_file), 1) def download_data(): diff --git a/fluid/ocr_recognition/ctc_train.py b/fluid/ocr_recognition/ctc_train.py index 2ac23f609779c5e653919fece6dbf661c79e859f..35db803506179d162226ae553fa25bfd4323d567 100644 --- a/fluid/ocr_recognition/ctc_train.py +++ b/fluid/ocr_recognition/ctc_train.py @@ -1,61 +1,82 @@ """Trainer for OCR CTC model.""" import paddle.fluid as fluid -import dummy_reader +from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data +from crnn_ctc_model import ctc_train_net import ctc_reader import argparse -from load_model import load_param import functools import sys -from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data -from crnn_ctc_model import ctc_train_net import time +import os parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('batch_size', int, 32, "Minibatch size.") -add_arg('pass_num', int, 100, "# of training epochs.") -add_arg('log_period', int, 1000, "Log period.") -add_arg('learning_rate', float, 1.0e-3, "Learning rate.") -add_arg('l2', float, 0.0004, "L2 regularizer.") -add_arg('max_clip', float, 10.0, "Max clip threshold.") -add_arg('min_clip', float, -10.0, "Min clip threshold.") -add_arg('momentum', float, 0.9, "Momentum.") -add_arg('rnn_hidden_size',int, 200, "Hidden size of rnn layers.") -add_arg('device', int, 0, "Device id.'-1' means running on CPU" - "while '0' means GPU-0.") -add_arg('min_average_window', int, 10000, "Min average window.") -add_arg('max_average_window', int, 15625, "Max average window.") -add_arg('average_window', float, 0.15, "Average window.") -add_arg('parallel', bool, False, "Whether use parallel training.") -# yapf: disable - -def load_parameter(place): - params = load_param('./name.map', './data/model/results_without_avg_window/pass-00000/') - for name in params: - t = fluid.global_scope().find_var(name).get_tensor() - t.set(params[name], place) +add_arg('batch_size', int, 32, "Minibatch size.") +add_arg('pass_num', int, 100, "Number of training epochs.") +add_arg('log_period', int, 1000, "Log period.") +add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.") +add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.") +add_arg('save_model_dir', str, "./models", "The directory the model to be saved to.") +add_arg('init_model', str, None, "The init model file of directory.") +add_arg('learning_rate', float, 1.0e-3, "Learning rate.") +add_arg('l2', float, 0.0004, "L2 regularizer.") +add_arg('momentum', float, 0.9, "Momentum.") +add_arg('rnn_hidden_size', int, 200, "Hidden size of rnn layers.") +add_arg('use_gpu', bool, True, "Whether use GPU to train.") +add_arg('min_average_window',int, 10000, "Min average window.") +add_arg('max_average_window',int, 15625, "Max average window. It is proposed to be set as the number of minibatch in a pass.") +add_arg('average_window', float, 0.15, "Average window.") +add_arg('parallel', bool, False, "Whether use parallel training.") +add_arg('train_images', str, None, "The directory of training images." + "None means using the default training images of reader.") +add_arg('train_list', str, None, "The list file of training images." + "None means using the default train_list file of reader.") +add_arg('test_images', str, None, "The directory of training images." + "None means using the default test images of reader.") +add_arg('test_list', str, None, "The list file of training images." + "None means using the default test_list file of reader.") +add_arg('num_classes', int, None, "The number of classes." + "None means using the default num_classes from reader.") +# yapf: enable -def train(args, data_reader=dummy_reader): +def train(args, data_reader=ctc_reader): """OCR CTC training""" - num_classes = data_reader.num_classes() + num_classes = data_reader.num_classes( + ) if args.num_classes is None else args.num_classes data_shape = data_reader.data_shape() # define network images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int32', lod_level=1) - sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(images, label, args, num_classes) + label = fluid.layers.data( + name='label', shape=[1], dtype='int32', lod_level=1) + sum_cost, error_evaluator, inference_program, model_average = ctc_train_net( + images, label, args, num_classes) # data reader - train_reader = data_reader.train(args.batch_size) - test_reader = data_reader.test() + train_reader = data_reader.train( + args.batch_size, + train_images_dir=args.train_images, + train_list_file=args.train_list) + test_reader = data_reader.test( + test_images_dir=args.test_images, test_list_file=args.test_list) + # prepare environment place = fluid.CPUPlace() - if args.device >= 0: - place = fluid.CUDAPlace(args.device) + if args.use_gpu: + place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - #load_parameter(place) + + # load init model + if args.init_model is not None: + model_dir = args.init_model + model_file_name = None + if not os.path.isdir(args.init_model): + model_dir = os.path.dirname(args.init_model) + model_file_name = os.path.basename(args.init_model) + fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + print "Init model from: %s." % args.init_model for pass_id in range(args.pass_num): error_evaluator.reset(exe) @@ -70,29 +91,41 @@ def train(args, data_reader=dummy_reader): fetch_list=[sum_cost] + error_evaluator.metrics) total_loss += batch_loss[0] total_seq_error += batch_seq_error[0] - if batch_id % 100 == 1: - print '.', - sys.stdout.flush() - if batch_id % args.log_period == 1: + # training log + if batch_id % args.log_period == 0: print "\nTime: %s; Pass[%d]-batch[%d]; Avg Warp-CTC loss: %s; Avg seq error: %s." % ( - time.time(), - pass_id, batch_id, total_loss / (batch_id * args.batch_size), total_seq_error / (batch_id * args.batch_size)) + time.time(), pass_id, batch_id, + total_loss / (batch_id * args.batch_size), + total_seq_error / (batch_id * args.batch_size)) sys.stdout.flush() - batch_id += 1 + # evaluate + if batch_id % args.eval_period == 0: + with model_average.apply(exe): + error_evaluator.reset(exe) + for data in test_reader(): + exe.run(inference_program, + feed=get_feeder_data(data, place)) + _, test_seq_error = error_evaluator.eval(exe) - with model_average.apply(exe): - error_evaluator.reset(exe) - for data in test_reader(): - exe.run(inference_program, feed=get_feeder_data(data, place)) - _, test_seq_error = error_evaluator.eval(exe) + print "\nTime: %s; Pass[%d]-batch[%d]; Test seq error: %s.\n" % ( + time.time(), pass_id, batch_id, str(test_seq_error[0])) + # save model + if batch_id % args.save_model_period == 0: + with model_average.apply(exe): + filename = "model_%05d_%d" % (pass_id, batch_id) + fluid.io.save_params( + exe, dirname=args.save_model_dir, filename=filename) + print "Saved model to: %s/%s." % (args.save_model_dir, + filename) + + batch_id += 1 - print "\nEnd pass[%d]; Test seq error: %s.\n" % ( - pass_id, str(test_seq_error[0])) def main(): args = parser.parse_args() print_arguments(args) train(args, data_reader=ctc_reader) + if __name__ == "__main__": main() diff --git a/fluid/ocr_recognition/dummy_reader.py b/fluid/ocr_recognition/dummy_reader.py deleted file mode 100644 index def91b1dd95857e7df740271cac486001da5f24b..0000000000000000000000000000000000000000 --- a/fluid/ocr_recognition/dummy_reader.py +++ /dev/null @@ -1,52 +0,0 @@ -"""A dummy reader for test.""" -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import numpy as np -import paddle.v2 as paddle - -DATA_SHAPE = [1, 512, 512] -NUM_CLASSES = 20 - - -def _read_creater(num_sample=1024, min_seq_len=1, max_seq_len=10): - def reader(): - for i in range(num_sample): - sequence_len = np.random.randint(min_seq_len, max_seq_len) - x = np.random.uniform(0.1, 1, DATA_SHAPE).astype("float32") - y = np.random.randint(0, NUM_CLASSES + 1, - [sequence_len]).astype("int32") - yield x, y - - return reader - - -def train(batch_size, num_sample=128): - """Get train dataset reader.""" - return paddle.batch(_read_creater(num_sample=num_sample), batch_size) - - -def test(batch_size=1, num_sample=16): - """Get test dataset reader.""" - return paddle.batch(_read_creater(num_sample=num_sample), batch_size) - - -def data_shape(): - """Get image shape in CHW order.""" - return DATA_SHAPE - - -def num_classes(): - """Get number of total classes.""" - return NUM_CLASSES diff --git a/fluid/ocr_recognition/eval.py b/fluid/ocr_recognition/eval.py index 342d0f16cd5f321d56988273cd6f47759e31bef0..be0a04380b62b274abfa954cbeed451afb441922 100644 --- a/fluid/ocr_recognition/eval.py +++ b/fluid/ocr_recognition/eval.py @@ -1,21 +1,24 @@ import paddle.v2 as paddle import paddle.fluid as fluid -from load_model import load_param -from utility import get_feeder_data +from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data +from crnn_ctc_model import ctc_infer from crnn_ctc_model import ctc_eval import ctc_reader -import dummy_reader +import argparse +import functools +import os +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('model_path', str, None, "The model path to be used for inference.") +add_arg('input_images_dir', str, None, "The directory of images.") +add_arg('input_images_list', str, None, "The list file of images.") +add_arg('use_gpu', bool, True, "Whether use GPU to eval.") +# yapf: enable -def load_parameter(place): - params = load_param('./name.map', './data/model/results/pass-00062/') - for name in params: - print "param: %s" % name - t = fluid.global_scope().find_var(name).get_tensor() - t.set(params[name], place) - -def evaluate(eval=ctc_eval, data_reader=dummy_reader): +def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): """OCR inference""" num_classes = data_reader.num_classes() data_shape = data_reader.data_shape() @@ -26,29 +29,41 @@ def evaluate(eval=ctc_eval, data_reader=dummy_reader): evaluator, cost = eval(images, label, num_classes) # data reader - test_reader = data_reader.test() + test_reader = data_reader.test( + test_images_dir=args.input_images_dir, + test_list_file=args.input_images_list) + # prepare environment - place = fluid.CUDAPlace(0) - #place = fluid.CPUPlace() + place = fluid.CPUPlace() + if use_gpu: + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - print fluid.default_main_program() - load_parameter(place) + + # load init model + model_dir = args.model_path + model_file_name = None + if not os.path.isdir(args.model_path): + model_dir = os.path.dirname(args.model_path) + model_file_name = os.path.basename(args.model_path) + fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + print "Init model from: %s." % args.model_path + evaluator.reset(exe) count = 0 for data in test_reader(): count += 1 - print 'Process samples: %d\r' % (count, ), - result, avg_distance, avg_seq_error = exe.run( - fluid.default_main_program(), - feed=get_feeder_data(data, place), - fetch_list=[cost] + evaluator.metrics) + exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place)) avg_distance, avg_seq_error = evaluator.eval(exe) - print "avg_distance: %s; avg_seq_error: %s" % (avg_distance, avg_seq_error) + print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % ( + count, avg_distance, avg_seq_error) def main(): - evaluate(data_reader=ctc_reader) + args = parser.parse_args() + print_arguments(args) + evaluate(args, data_reader=ctc_reader) if __name__ == "__main__": diff --git a/fluid/ocr_recognition/images/demo.jpg b/fluid/ocr_recognition/images/demo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..be5aee506f68861583903d04c526523afc299ab8 Binary files /dev/null and b/fluid/ocr_recognition/images/demo.jpg differ diff --git a/fluid/ocr_recognition/images/train.jpg b/fluid/ocr_recognition/images/train.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3d691f1cd6b44c99c1b89286573daf1abd6dcbfa Binary files /dev/null and b/fluid/ocr_recognition/images/train.jpg differ diff --git a/fluid/ocr_recognition/inference.py b/fluid/ocr_recognition/inference.py index 32bc59e9b04dd91e2060b55adbb6264e7797fbe5..04175bb15d7834b76818b330763054e0a519e508 100644 --- a/fluid/ocr_recognition/inference.py +++ b/fluid/ocr_recognition/inference.py @@ -1,47 +1,64 @@ import paddle.v2 as paddle -import paddle.v2.fluid as fluid -from load_model import load_param -from utility import get_feeder_data +import paddle.fluid as fluid +from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from crnn_ctc_model import ctc_infer +import numpy as np import ctc_reader -import dummy_reader +import argparse +import functools +import os +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('model_path', str, None, "The model path to be used for inference.") +add_arg('input_images_dir', str, None, "The directory of images.") +add_arg('input_images_list', str, None, "The list file of images.") +add_arg('use_gpu', bool, True, "Whether use GPU to infer.") +# yapf: enable -def load_parameter(place): - params = load_param('./name.map', './data/model/results/pass-00062/') - for name in params: - print "param: %s" % name - t = fluid.global_scope().find_var(name).get_tensor() - t.set(params[name], place) - -def inference(infer=ctc_infer, data_reader=dummy_reader): +def inference(args, infer=ctc_infer, data_reader=ctc_reader): """OCR inference""" num_classes = data_reader.num_classes() data_shape = data_reader.data_shape() # define network images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - sequence, tmp = infer(images, num_classes) - fluid.layers.Print(tmp) + sequence = infer(images, num_classes) # data reader - test_reader = data_reader.test() + infer_reader = data_reader.inference( + infer_images_dir=args.input_images_dir, + infer_list_file=args.input_images_list) # prepare environment - place = fluid.CUDAPlace(0) + place = fluid.CPUPlace() + if use_gpu: + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - load_parameter(place) + # load init model + model_dir = args.model_path + model_file_name = None + if not os.path.isdir(args.model_path): + model_dir = os.path.dirname(args.model_path) + model_file_name = os.path.basename(args.model_path) + fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + print "Init model from: %s." % args.model_path - for data in test_reader(): + for data in infer_reader(): result = exe.run(fluid.default_main_program(), feed=get_feeder_data( data, place, need_label=False), - fetch_list=[tmp]) - print "result: %s" % (list(result[0].flatten()), ) + fetch_list=[sequence], + return_numpy=False) + print "result: %s" % (np.array(result[0]).flatten(), ) def main(): - inference(data_reader=ctc_reader) + args = parser.parse_args() + print_arguments(args) + inference(args, data_reader=ctc_reader) if __name__ == "__main__": diff --git a/fluid/ocr_recognition/load_model.py b/fluid/ocr_recognition/load_model.py deleted file mode 100644 index fea9398866f3f3c276f6e998a18c6bdd0a2a488a..0000000000000000000000000000000000000000 --- a/fluid/ocr_recognition/load_model.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys -import numpy as np -import ast - - -def load_parameter(file_name): - with open(file_name, 'rb') as f: - f.read(16) # skip header. - return np.fromfile(f, dtype=np.float32) - - -def load_param(name_map_file, old_param_dir): - result = {} - name_map = {} - shape_map = {} - with open(name_map_file, 'r') as map_file: - for param in map_file: - old_name, new_name, shape = param.strip().split('=') - name_map[new_name] = old_name - shape_map[new_name] = ast.literal_eval(shape) - - for new_name in name_map: - result[new_name] = load_parameter("/".join( - [old_param_dir, name_map[new_name]])).reshape(shape_map[new_name]) - return result - - -if __name__ == "__main__": - name_map_file = "./name.map" - old_param_dir = "./data/model/results/pass-00062/" - result = load_param(name_map_file, old_param_dir) - for p in result: - print "name: %s; param.shape: %s" % (p, result[p].shape)