提交 f1789a58 编写于 作者: D Dang Qingqing

Update code and fix conflicts.

...@@ -54,6 +54,7 @@ The structure of Advbox module are as follows: ...@@ -54,6 +54,7 @@ The structure of Advbox module are as follows:
| ├── mnist_tutorial_fgsm.py | ├── mnist_tutorial_fgsm.py
| ├── mnist_tutorial_bim.py | ├── mnist_tutorial_bim.py
| ├── mnist_tutorial_ilcm.py | ├── mnist_tutorial_ilcm.py
| ├── mnist_tutorial_mifgsm.py
| ├── mnist_tutorial_jsma.py | ├── mnist_tutorial_jsma.py
| └── mnist_tutorial_deepfool.py | └── mnist_tutorial_deepfool.py
└── README.md └── README.md
...@@ -77,6 +78,7 @@ The `./tutorials/` folder provides some tutorials to generate adversarial exampl ...@@ -77,6 +78,7 @@ The `./tutorials/` folder provides some tutorials to generate adversarial exampl
* [FGSM](https://arxiv.org/abs/1412.6572) * [FGSM](https://arxiv.org/abs/1412.6572)
* [BIM](https://arxiv.org/abs/1607.02533) * [BIM](https://arxiv.org/abs/1607.02533)
* [ILCM](https://arxiv.org/abs/1607.02533) * [ILCM](https://arxiv.org/abs/1607.02533)
* [MI-FGSM](https://arxiv.org/pdf/1710.06081.pdf)
* [JSMA](https://arxiv.org/pdf/1511.07528) * [JSMA](https://arxiv.org/pdf/1511.07528)
* [DeepFool](https://arxiv.org/abs/1511.04599) * [DeepFool](https://arxiv.org/abs/1511.04599)
...@@ -91,6 +93,7 @@ Benchmarks on a vanilla CNN model. ...@@ -91,6 +93,7 @@ Benchmarks on a vanilla CNN model.
|FGSM| 57.8% | 26.55% | 0.3 | One shot| *** | |FGSM| 57.8% | 26.55% | 0.3 | One shot| *** |
|BIM| 97.4% | --- | 0.1 | 100 | **** | |BIM| 97.4% | --- | 0.1 | 100 | **** |
|ILCM| --- | 100.0% | 0.1 | 100 | **** | |ILCM| --- | 100.0% | 0.1 | 100 | **** |
|MI-FGSM| 94.4% | 100.0% | 0.1 | 100 | **** |
|JSMA| 96.8% | 90.4%| 0.1 | 2000 | *** | |JSMA| 96.8% | 90.4%| 0.1 | 2000 | *** |
|DeepFool| 97.7% | 51.3% | --- | 100 | **** | |DeepFool| 97.7% | 51.3% | --- | 100 | **** |
...@@ -101,8 +104,9 @@ Benchmarks on a vanilla CNN model. ...@@ -101,8 +104,9 @@ Benchmarks on a vanilla CNN model.
* [Intriguing properties of neural networks](https://arxiv.org/abs/1312.6199), C. Szegedy et al., arxiv 2014 * [Intriguing properties of neural networks](https://arxiv.org/abs/1312.6199), C. Szegedy et al., arxiv 2014
* [Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572), I. Goodfellow et al., ICLR 2015 * [Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572), I. Goodfellow et al., ICLR 2015
* [Adversarial Examples In The Physical World](https://arxiv.org/pdf/1607.02533v3.pdf), A. Kurakin et al., ICLR workshop 2017 * [Adversarial Examples In The Physical World](https://arxiv.org/pdf/1607.02533v3.pdf), A. Kurakin et al., ICLR workshop 2017
* [Boosting Adversarial Attacks with Momentum](https://arxiv.org/abs/1710.06081), Yinpeng Dong et al., arxiv 2018
* [The Limitations of Deep Learning in Adversarial Settings](https://arxiv.org/abs/1511.07528), N. Papernot et al., ESSP 2016 * [The Limitations of Deep Learning in Adversarial Settings](https://arxiv.org/abs/1511.07528), N. Papernot et al., ESSP 2016
* [DeepFool: a simple and accurate method to fool deep neural networks](https://arxiv.org/abs/1511.04599), S. Moosavi-Dezfooli et al., CVPR 2016 * [DeepFool: a simple and accurate method to fool deep neural networks](https://arxiv.org/abs/1511.04599), S. Moosavi-Dezfooli et al., CVPR 2016
* [Foolbox: A Python toolbox to benchmark the robustness of machine learning models] (https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018 * [Foolbox: A Python toolbox to benchmark the robustness of machine learning models](https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018
* [CleverHans: An adversarial example library for constructing attacks, building defenses, and benchmarking both](https://github.com/tensorflow/cleverhans#setting-up-cleverhans) * [CleverHans: An adversarial example library for constructing attacks, building defenses, and benchmarking both](https://github.com/tensorflow/cleverhans#setting-up-cleverhans)
* [Threat of Adversarial Attacks on Deep Learning in Computer Vision: A Survey](https://arxiv.org/abs/1801.00553), Naveed Akhtar, Ajmal Mian, arxiv 2018 * [Threat of Adversarial Attacks on Deep Learning in Computer Vision: A Survey](https://arxiv.org/abs/1801.00553), Naveed Akhtar, Ajmal Mian, arxiv 2018
...@@ -14,7 +14,8 @@ __all__ = [ ...@@ -14,7 +14,8 @@ __all__ = [
'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM', 'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM',
'FastGradientSignMethodTargetedAttack', 'FGSMT', 'FastGradientSignMethodTargetedAttack', 'FGSMT',
'BasicIterativeMethodAttack', 'BIM', 'BasicIterativeMethodAttack', 'BIM',
'IterativeLeastLikelyClassMethodAttack', 'ILCM' 'IterativeLeastLikelyClassMethodAttack', 'ILCM', 'MomentumIteratorAttack',
'MIFGSM'
] ]
...@@ -76,9 +77,9 @@ class GradientMethodAttack(Attack): ...@@ -76,9 +77,9 @@ class GradientMethodAttack(Attack):
for epsilon in epsilons[:]: for epsilon in epsilons[:]:
step = 1 step = 1
adv_img = adversary.original adv_img = adversary.original
for i in range(steps):
if epsilon == 0.0: if epsilon == 0.0:
continue continue
for i in range(steps):
if adversary.is_targeted_attack: if adversary.is_targeted_attack:
gradient = -self.model.gradient(adv_img, gradient = -self.model.gradient(adv_img,
adversary.target_label) adversary.target_label)
...@@ -175,7 +176,103 @@ class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack): ...@@ -175,7 +176,103 @@ class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack):
super(BasicIterativeMethodAttack, self).__init__(model, False) super(BasicIterativeMethodAttack, self).__init__(model, False)
class MomentumIteratorAttack(GradientMethodAttack):
"""
The Momentum Iterative Fast Gradient Sign Method (Dong et al. 2017).
This method won the first places in NIPS 2017 Non-targeted Adversarial
Attacks and Targeted Adversarial Attacks. The original paper used
hard labels for this attack; no label smoothing. inf norm.
Paper link: https://arxiv.org/pdf/1710.06081.pdf
"""
def __init__(self, model, support_targeted=True):
"""
:param model(model): The model to be attacked.
:param support_targeted(bool): Does this attack method support targeted.
"""
super(MomentumIteratorAttack, self).__init__(model)
self.support_targeted = support_targeted
def _apply(self,
adversary,
norm_ord=np.inf,
epsilons=0.1,
steps=100,
epsilon_steps=100,
decay_factor=1):
"""
Apply the momentum iterative gradient attack method.
:param adversary(Adversary):
The Adversary object.
:param norm_ord(int):
Order of the norm, such as np.inf, 1, 2, etc. It can't be 0.
:param epsilons(list|tuple|float):
Attack step size (input variation).
Largest step size if epsilons is not iterable.
:param epsilon_steps:
The number of Epsilons' iteration for each attack iteration.
:param steps:
The number of attack iteration.
:param decay_factor:
The decay factor for the momentum term.
:return:
adversary(Adversary): The Adversary object.
"""
if norm_ord == 0:
raise ValueError("L0 norm is not supported!")
if not self.support_targeted:
if adversary.is_targeted_attack:
raise ValueError(
"This attack method doesn't support targeted attack!")
assert self.model.channel_axis() == adversary.original.ndim
assert (self.model.channel_axis() == 1 or
self.model.channel_axis() == adversary.original.shape[0] or
self.model.channel_axis() == adversary.original.shape[-1])
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, epsilons, num=epsilon_steps)
min_, max_ = self.model.bounds()
pre_label = adversary.original_label
for epsilon in epsilons[:]:
if epsilon == 0.0:
continue
step = 1
adv_img = adversary.original
momentum = 0
for i in range(steps):
if adversary.is_targeted_attack:
gradient = -self.model.gradient(adv_img,
adversary.target_label)
else:
gradient = self.model.gradient(adv_img, pre_label)
# normalize gradient
velocity = gradient / self._norm(gradient, ord=1)
momentum = decay_factor * momentum + velocity
if norm_ord == np.inf:
normalized_grad = np.sign(momentum)
else:
normalized_grad = self._norm(momentum, ord=norm_ord)
perturbation = epsilon * normalized_grad
adv_img = adv_img + perturbation
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
logging.info(
'step={}, epsilon = {:.5f}, pre_label = {}, adv_label={}'
.format(step, epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
step += 1
return adversary
FGSM = FastGradientSignMethodAttack FGSM = FastGradientSignMethodAttack
FGSMT = FastGradientSignMethodTargetedAttack FGSMT = FastGradientSignMethodTargetedAttack
BIM = BasicIterativeMethodAttack BIM = BasicIterativeMethodAttack
ILCM = IterativeLeastLikelyClassMethodAttack ILCM = IterativeLeastLikelyClassMethodAttack
MIFGSM = MomentumIteratorAttack
"""
MIFGSM tutorial on mnist using advbox tool.
MIFGSM is a broad class of momentum iterative gradient-based methods based on FSGM.
It supports non-targeted attack and targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import MIFGSM
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = MIFGSM(m)
attack_config = {
"norm_ord": np.inf,
"epsilons": 0.1,
"steps": 100,
"decay_factor": 1
}
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# MIFGSM non-targeted attack
adversary = attack(adversary, **attack_config)
# MIFGSM targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# MIFGSM non-targeted attack
adversary = attack(adversary, **attack_config)
# MIFGSM targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("mifgsm attack done")
if __name__ == '__main__':
main()
...@@ -2,20 +2,31 @@ ...@@ -2,20 +2,31 @@
This tool is used to convert a Caffe model to Fluid model This tool is used to convert a Caffe model to Fluid model
### Howto ### Howto
1, Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here: 1. Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here:
- Generate pycaffe from caffe.proto
<pre><code>bash ./proto/compile.sh</code></pre>
1) generate it from caffe.proto using protoc - download one from github directly
bash ./proto/compile.sh <pre><code>cd proto/ && wget https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/caffe/caffepb.py
</code></pre>
2) download one from github directly 2. Convert the Caffe model to Fluid model
cd proto/ && wget https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/caffe/caffepb.py - generate fluid code and weight file
<pre><code>python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
</code></pre>
2, Convert the caffe model using 'convert.py' which will generate a python script and a weight(in .npy) file - save weights as fluid model file
<pre><code>python alexnet.py alexnet.npy ./fluid_model
</code></pre>
3, Use the converted model to predict 3. Use the converted model to infer
- see more details in '*examples/imagenet/run.sh*'
see more detail info in 'examples/xxx'
4. compare the inference results with caffe
- see more details in '*examples/imagenet/diff.sh*'
### Tested models ### Tested models
- Lenet - Lenet
...@@ -33,4 +44,4 @@ This tool is used to convert a Caffe model to Fluid model ...@@ -33,4 +44,4 @@ This tool is used to convert a Caffe model to Fluid model
[model addr](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) [model addr](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet)
### Notes ### Notes
Some of this code come from here: https://github.com/ethereon/caffe-tensorflow Some of this code come from here: [caffe-tensorflow](https://github.com/ethereon/caffe-tensorflow)
a demo to show converting caffe models on 'imagenet' using caffe2fluid A demo to show converting caffe models on 'imagenet' using caffe2fluid
--- ---
# How to use # How to use
1. prepare python environment 1. Prepare python environment
2. download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
3. run the tool 2. Download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
eg: bash ./run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
3. Convert the Caffe model to Fluid model
- generate fluid code and weight file
<pre><code>python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
</code></pre>
- save weights as fluid model file
<pre><code>python alexnet.py alexnet.npy ./fluid_model
</code></pre>
4. Do inference
<pre><code>python infer.py infer ./fluid_mode data/65.jpeg
</code></pre>
5. convert model and do inference together
<pre><code>bash ./run.sh alexnet ./models.caffe/alexnet ./models/alexnet
</code></pre>
The Caffe model is stored in './models.caffe/alexnet/alexnet.prototxt|caffemodel'
and the Fluid model will be save in './models/alexnet/alexnet.py|npy'
6. test the difference with caffe's results(need pycaffe installed)
<pre><code>bash ./diff.sh resnet
</code></pre>
Make sure your caffemodel stored in './models.caffe/resnet'.
The results will be stored in './results/resnet.paddle|caffe'
...@@ -59,12 +59,12 @@ def build_model(net_file, net_name): ...@@ -59,12 +59,12 @@ def build_model(net_file, net_name):
inputs_dict = MyNet.input_shapes() inputs_dict = MyNet.input_shapes()
input_name = inputs_dict.keys()[0] input_name = inputs_dict.keys()[0]
input_shape = inputs_dict[input_name] input_shape = inputs_dict[input_name]
images = fluid.layers.data(name='image', shape=input_shape, dtype='float32') images = fluid.layers.data(
name=input_name, shape=input_shape, dtype='float32')
#label = fluid.layers.data(name='label', shape=[1], dtype='int64') #label = fluid.layers.data(name='label', shape=[1], dtype='int64')
net = MyNet({input_name: images}) net = MyNet({input_name: images})
input_shape = MyNet.input_shapes()[input_name] return net, inputs_dict
return net, input_shape
def dump_results(results, names, root): def dump_results(results, names, root):
...@@ -78,26 +78,27 @@ def dump_results(results, names, root): ...@@ -78,26 +78,27 @@ def dump_results(results, names, root):
np.save(filename + '.npy', res) np.save(filename + '.npy', res)
def infer(net_file, net_name, model_file, imgfile, debug=True): def load_model(exe, place, net_file, net_name, net_weight, debug):
""" do inference using a model which consist 'xxx.py' and 'xxx.npy' """ load model using xxxnet.py and xxxnet.npy
""" """
fluid = import_fluid() fluid = import_fluid()
#1, build model #1, build model
net, input_shape = build_model(net_file, net_name) net, input_map = build_model(net_file, net_name)
feed_names = input_map.keys()
feed_shapes = [v for k, v in input_map.items()]
prediction = net.get_output() prediction = net.get_output()
#2, load weights for this model #2, load weights for this model
place = fluid.CPUPlace()
exe = fluid.Executor(place)
startup_program = fluid.default_startup_program() startup_program = fluid.default_startup_program()
exe.run(startup_program) exe.run(startup_program)
if model_file.find('.npy') > 0: #place = fluid.CPUPlace()
net.load(data_path=model_file, exe=exe, place=place) if net_weight.find('.npy') > 0:
net.load(data_path=net_weight, exe=exe, place=place)
else: else:
net.load(data_path=model_file, exe=exe) raise ValueError('not found weight file')
#3, test this model #3, test this model
test_program = fluid.default_main_program().clone() test_program = fluid.default_main_program().clone()
...@@ -111,10 +112,75 @@ def infer(net_file, net_name, model_file, imgfile, debug=True): ...@@ -111,10 +112,75 @@ def infer(net_file, net_name, model_file, imgfile, debug=True):
fetch_list_var.append(v) fetch_list_var.append(v)
fetch_list_name.append(k) fetch_list_name.append(k)
return {
'program': test_program,
'feed_names': feed_names,
'fetch_vars': fetch_list_var,
'fetch_names': fetch_list_name,
'feed_shapes': feed_shapes
}
def get_shape(fluid, program, name):
for var in program.list_vars():
if var.name == 'data':
return list(var.shape[1:])
raise ValueError('not found shape for input layer[%s], '
'you can specify by yourself' % (name))
def load_inference_model(dirname, exe):
""" load fluid's inference model
"""
fluid = import_fluid()
model_fn = 'model'
params_fn = 'params'
if os.path.exists(os.path.join(dirname, model_fn)) \
and os.path.exists(os.path.join(dirname, params_fn)):
program, feed_names, fetch_targets = fluid.io.load_inference_model(\
dirname, exe, model_fn, params_fn)
else:
raise ValueError('not found model files in direcotry[%s]' % (dirname))
#print fluid.global_scope().find_var(feed_names[0])
input_shape = get_shape(fluid, program, feed_names[0])
feed_shapes = [input_shape]
return program, feed_names, fetch_targets, feed_shapes
def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
""" do inference using a model which consist 'xxx.py' and 'xxx.npy'
"""
fluid = import_fluid()
place = fluid.CPUPlace()
exe = fluid.Executor(place)
try:
ret = load_inference_model(model_path, exe)
program, feed_names, fetch_targets, feed_shapes = ret
debug = False
print('found a inference model for fluid')
except ValueError as e:
pass
print('try to load model using net file and weight file')
net_weight = model_path
ret = load_model(exe, place, net_file, net_name, net_weight, debug)
program = ret['program']
feed_names = ret['feed_names']
fetch_targets = ret['fetch_vars']
fetch_list_name = ret['fetch_names']
feed_shapes = ret['feed_shapes']
input_name = feed_names[0]
input_shape = feed_shapes[0]
np_images = load_data(imgfile, input_shape) np_images = load_data(imgfile, input_shape)
results = exe.run(program=test_program, results = exe.run(program=program,
feed={'image': np_images}, feed={input_name: np_images},
fetch_list=fetch_list_var) fetch_list=fetch_targets)
if debug is True: if debug is True:
dump_path = 'results.paddle' dump_path = 'results.paddle'
...@@ -122,7 +188,7 @@ def infer(net_file, net_name, model_file, imgfile, debug=True): ...@@ -122,7 +188,7 @@ def infer(net_file, net_name, model_file, imgfile, debug=True):
print('all result of layers dumped to [%s]' % (dump_path)) print('all result of layers dumped to [%s]' % (dump_path))
else: else:
result = results[0] result = results[0]
print('predicted class:', np.argmax(result)) print('succeed infer with results[class:%d]' % (np.argmax(result)))
return 0 return 0
...@@ -167,9 +233,12 @@ if __name__ == "__main__": ...@@ -167,9 +233,12 @@ if __name__ == "__main__":
weight_file = 'models/resnet50/resnet50.npy' weight_file = 'models/resnet50/resnet50.npy'
datafile = 'data/65.jpeg' datafile = 'data/65.jpeg'
net_name = 'ResNet50' net_name = 'ResNet50'
model_file = 'models/resnet50/fluid'
argc = len(sys.argv) ret = None
if sys.argv[1] == 'caffe': if len(sys.argv) <= 2:
pass
elif sys.argv[1] == 'caffe':
if len(sys.argv) != 5: if len(sys.argv) != 5:
print('usage:') print('usage:')
print('\tpython %s caffe [prototxt] [caffemodel] [datafile]' % print('\tpython %s caffe [prototxt] [caffemodel] [datafile]' %
...@@ -178,18 +247,34 @@ if __name__ == "__main__": ...@@ -178,18 +247,34 @@ if __name__ == "__main__":
prototxt = sys.argv[2] prototxt = sys.argv[2]
caffemodel = sys.argv[3] caffemodel = sys.argv[3]
datafile = sys.argv[4] datafile = sys.argv[4]
sys.exit(caffe_infer(prototxt, caffemodel, datafile)) ret = caffe_infer(prototxt, caffemodel, datafile)
elif argc == 5: elif sys.argv[1] == 'infer':
net_file = sys.argv[1] if len(sys.argv) != 4:
weight_file = sys.argv[2] print('usage:')
print('\tpython %s infer [fluid_model] [datafile]' % (sys.argv[0]))
sys.exit(1)
model_path = sys.argv[2]
datafile = sys.argv[3] datafile = sys.argv[3]
net_name = sys.argv[4] ret = infer(model_path, datafile)
elif argc > 1: elif sys.argv[1] == 'dump':
if len(sys.argv) != 6:
print('usage:') print('usage:')
print('\tpython %s [net_file] [weight_file] [datafile] [net_name]' % print('\tpython %s dump [net_file] [weight_file] [datafile] [net_name]' \
(sys.argv[0])) % (sys.argv[0]))
print('\teg:python %s %s %s %s %s' % (sys.argv[0], net_file, print('\teg:python dump %s %s %s %s %s' % (sys.argv[0],\
weight_file, datafile, net_name)) net_file, weight_file, datafile, net_name))
sys.exit(1)
net_file = sys.argv[2]
weight_file = sys.argv[3]
datafile = sys.argv[4]
net_name = sys.argv[5]
ret = infer(weight_file, datafile, net_file, net_name)
if ret is None:
print('usage:')
print(' python %s [infer] [fluid_model] [imgfile]' % (sys.argv[0]))
print(' eg:python %s infer %s %s' % (sys.argv[0], model_file, datafile))
sys.exit(1) sys.exit(1)
infer(net_file, net_name, weight_file, datafile) sys.exit(ret)
...@@ -71,7 +71,7 @@ if [[ -z $only_convert ]];then ...@@ -71,7 +71,7 @@ if [[ -z $only_convert ]];then
if [[ -z $net_name ]];then if [[ -z $net_name ]];then
net_name="MyNet" net_name="MyNet"
fi fi
$PYTHON ./infer.py $net_file $weight_file $imgfile $net_name $PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name
ret=$? ret=$?
fi fi
exit $ret exit $ret
文件模式从 100644 更改为 100755
...@@ -216,7 +216,10 @@ class TensorFlowEmitter(object): ...@@ -216,7 +216,10 @@ class TensorFlowEmitter(object):
def emit_convert_def(self, input_nodes): def emit_convert_def(self, input_nodes):
codes = [] codes = []
inputs = {} inputs = {}
#codes.append('shapes = cls.input_shapes()')
codes.append('shapes = cls.input_shapes()') codes.append('shapes = cls.input_shapes()')
codes.append('input_name = shapes.keys()[0]')
codes.append('input_shape = shapes[input_name]')
for n in input_nodes: for n in input_nodes:
name = n.name name = n.name
layer_var = name + '_layer' layer_var = name + '_layer'
...@@ -235,8 +238,14 @@ class TensorFlowEmitter(object): ...@@ -235,8 +238,14 @@ class TensorFlowEmitter(object):
codes.append("exe = fluid.Executor(place)") codes.append("exe = fluid.Executor(place)")
codes.append("exe.run(fluid.default_startup_program())") codes.append("exe.run(fluid.default_startup_program())")
codes.append("net.load(data_path=npy_model, exe=exe, place=place)") codes.append("net.load(data_path=npy_model, exe=exe, place=place)")
codes.append("output_vars = [net.get_output()]")
codes.append("fluid.io.save_inference_model(" \
"fluid_path, [input_name],output_vars," \
"exe, main_program=None, model_filename='model'," \
"params_filename='params')")
codes.append( codes.append(
"fluid.io.save_persistables(executor=exe, dirname=fluid_path)") "print('save fluid model as [model] and [params] in directory [%s]' % (fluid_path))"
)
self.outdent() self.outdent()
func_def = self.statement('@classmethod') func_def = self.statement('@classmethod')
...@@ -254,8 +263,17 @@ class TensorFlowEmitter(object): ...@@ -254,8 +263,17 @@ class TensorFlowEmitter(object):
self.prefix = '' self.prefix = ''
main_def = self.statement('if __name__ == "__main__":') main_def = self.statement('if __name__ == "__main__":')
self.indent() self.indent()
main_def += self.statement("#usage: python xxxnet.py xxx.npy ./model\n") main_def += self.statement(
"#usage: save as an inference model for online service\n")
main_def += self.statement("import sys") main_def += self.statement("import sys")
main_def += self.statement("if len(sys.argv) != 3:")
self.indent()
main_def += self.statement("print('usage:')")
main_def += self.statement(
"print('\tpython %s [xxxnet.npy] [save_dir]' % (sys.argv[0]))")
main_def += self.statement("exit(1)")
self.outdent()
main_def += self.statement("npy_weight = sys.argv[1]") main_def += self.statement("npy_weight = sys.argv[1]")
main_def += self.statement("fluid_model = sys.argv[2]") main_def += self.statement("fluid_model = sys.argv[2]")
main_def += self.statement("%s.convert(npy_weight, fluid_model)" % main_def += self.statement("%s.convert(npy_weight, fluid_model)" %
......
文件模式从 100644 更改为 100755
...@@ -2,7 +2,99 @@ The minimum PaddlePaddle version needed for the code sample in this directory is ...@@ -2,7 +2,99 @@ The minimum PaddlePaddle version needed for the code sample in this directory is
--- ---
# MobileNet-SSD ## SSD Object Detection
This model built with paddle fluid is still under active development and is not ### Introduction
the final version. We welcome feedbacks.
[Single Shot MultiBox Detector (SSD)](https://arxiv.org/abs/1512.02325) framework for object detection is based on a feed-forward convolutional network. The early network is a standard convolutional architecture for image classification, such as VGG, ResNet, or MobileNet, which is als called base network. In this tutorial we used [MobileNet](https://arxiv.org/abs/1704.04861).
### Data Preparation
You can use [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) or [MS-COCO dataset](http://cocodataset.org/#download).
#### PASCAL VOC Dataset
If you want to train model on PASCAL VOC dataset, please download datset at first, skip this step if you already have one.
```bash
cd data/pascalvoc
./download.sh
```
The command `download.sh` also will create training and testing file lists.
#### MS-COCO Dataset
If you want to train model on MS-COCO dataset, please download datset at first, skip this step if you already have one.
```
cd data/coco
./download.sh
```
### Train
#### Download the Pre-trained Model.
We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other dataset, like PASCAL VOC. Then other pre-trained model is MobileNet v1 trained on ImageNet 2012 dataset, but removed the last weights and bias in Fully-Connected layer.
Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md). The MobileNet v1 model is converted [Caffe](https://github.com/shicai/MobileNet-Caffe).
- Download MobileNet-v1 SSD:
```
./pretrained/download_coco.sh
```
- Download MobileNet-v1:
```
./pretrained/download_imagenet.sh
```
#### Train on PASCAL VOC
- Train on one device (/GPU).
```python
env CUDA_VISIABLE_DEVICES=0 python -u train.py --parallel=False --data='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/'
```
- Train on multi devices (/GPUs).
```python
env CUDA_VISIABLE_DEVICES=0,1 python -u train.py --batch_size=64 --data='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/'
```
#### Train on MS-COCO
- Train on one device (/GPU).
```python
env CUDA_VISIABLE_DEVICES=0 python -u train.py --parallel=False --data='coco' --pretrained_model='pretrained/mobilenet_imagenet/'
```
- Train on multi devices (/GPUs).
```python
env CUDA_VISIABLE_DEVICES=0,1 python -u train.py --batch_size=64 --data='coco' --pretrained_model='pretrained/mobilenet_imagenet/'
```
TBD
### Evaluate
```python
env CUDA_VISIABLE_DEVICES=0 python eval.py --model='model/90' --test_list=''
```
TBD
### Infer and Visualize
```python
env CUDA_VISIABLE_DEVICES=0 python infer.py --batch_size=2 --model='model/90' --test_list=''
```
TBD
### Released Model
| Model | Pre-trained Model | Training data | Test data | mAP |
|:------------------------:|:------------------:|:----------------:|:------------:|:----:|
|MobileNet-v1-SSD 300x300 | COCO MobileNet SSD | VOC07+12 trainval| VOC07 test | xx% |
|MobileNet-v1-SSD 300x300 | ImageNet MobileNet | VOC07+12 trainval| VOC07 test | xx% |
|MobileNet-v1-SSD 300x300 | ImageNet MobileNet | MS-COCO trainval | MS-COCO test | xx% |
TBD
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
# OCR Model 
[toc]
This model built with paddle fluid is still under active development and is not 运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求,请按照安装文档中的说明更新PaddlePaddle安装版本。
the final version. We welcome feedbacks.
# Optical Character Recognition
这里将介绍如何在PaddlePaddle fluid下使用CRNN-CTC 和 CRNN-Attention模型对图片中的文字内容进行识别。
## 1. CRNN-CTC
本章的任务是识别含有单行汉语字符图片,首先采用卷积将图片转为`features map`, 然后使用`im2sequence op``features map`转为`sequence`,经过`双向GRU RNN`得到每个step的汉语字符的概率分布。训练过程选用的损失函数为CTC loss,最终的评估指标为`instance error rate`
本路径下各个文件的作用如下:
- **ctc_reader.py :** 下载、读取、处理数据。提供方法`train()``test()` 分别产生训练集和测试集的数据迭代器。
- **crnn_ctc_model.py :** 在该脚本中定义了训练网络、预测网络和evaluate网络。
- **ctc_train.py :** 用于模型的训练,可通过命令`python train.py --help` 获得使用方法。
- **inference.py :** 加载训练好的模型文件,对新数据进行预测。可通过命令`python inference.py --help` 获得使用方法。
- **eval.py :** 评估模型在指定数据集上的效果。可通过命令`python inference.py --help` 获得使用方法。
- **utility.py :** 实现的一些通用方法,包括参数配置、tensor的构造等。
### 1.1 数据
数据的下载和简单预处理都在`ctc_reader.py`中实现。
#### 1.1.1 数据格式
我们使用的训练和测试数据如`图1`所示,每张图片包含单行不定长的中文字符串,这些图片都是经过检测算法进行预框选处理的。
<p align="center">
<img src="images/demo.jpg" width="620" hspace='10'/> <br/>
<strong>图 1</strong>
</p>
在训练集中,每张图片对应的label是由若干数字组成的sequence。 Sequence中的每个数字表示一个字符在字典中的index。 `图1` 对应的label如下所示:
```
3835,8371,7191,2369,6876,4162,1938,168,1517,4590,3793
```
在上边这个label中,`3835` 表示字符‘两’的index,`4590` 表示中文字符逗号的index。
#### 1.1.2 数据准备
**A. 训练集**
我们需要把所有参与训练的图片放入同一个文件夹,暂且记为`train_images`。然后用一个list文件存放每张图片的信息,包括图片大小、图片名称和对应的label,这里暂记该list文件为`train_list`,其格式如下所示:
```
185 48 00508_0215.jpg 7740,5332,2369,3201,4162
48 48 00197_1893.jpg 6569
338 48 00007_0219.jpg 4590,4788,3015,1994,3402,999,4553
150 48 00107_4517.jpg 5936,3382,1437,3382
...
157 48 00387_0622.jpg 2397,1707,5919,1278
```
<center>文件train_list</center>
上述文件中的每一行表示一张图片,每行被空格分为四列,前两列分别表示图片的宽和高,第三列表示图片的名称,第四列表示该图片对应的sequence label。
最终我们应有以下类似文件结构:
```
|-train_data
|- train_list
|- train_imags
|- 00508_0215.jpg
|- 00197_1893.jpg
|- 00007_0219.jpg
| ...
```
在训练时,我们通过选项`--train_images``--train_list` 分别设置准备好的`train_images``train_list`
>**注:** 如果`--train_images` 和 `--train_list`都未设置或设置为None, ctc_reader.py会自动下载使用[示例数据](http://cloud.dlnel.org/filepub/?uuid=df937251-3c0b-480d-9a7b-0080dfeee65c),并将其缓存到`$HOME/.cache/paddle/dataset/ctc_data/data/` 路径下。
**B. 测试集和评估集**
测试集、评估集的准备方式与训练集相同。
在训练阶段,测试集的路径通过train.py的选项`--test_images``--test_list` 来设置。
在评估时,评估集的路径通过eval.py的选项`--input_images_dir``--input_images_list` 来设置。
**C. 待预测数据集**
预测支持三种形式的输入:
第一种:设置`--input_images_dir``--input_images_list`, 与训练集类似, 只不过list文件中的最后一列可以放任意占位字符或字符串,如下所示:
```
185 48 00508_0215.jpg s
48 48 00197_1893.jpg s
338 48 00007_0219.jpg s
...
```
第二种:仅设置`--input_images_list`, 其中list文件中只需放图片的完整路径,如下所示:
```
data/test_images/00000.jpg
data/test_images/00001.jpg
data/test_images/00003.jpg
```
第三种:从stdin读入一张图片的path,然后进行一次inference.
#### 1.2 训练
使用默认数据在GPU单卡上训练:
```
env CUDA_VISIABLE_DEVICES=0 python ctc_train.py
```
使用默认数据在GPU多卡上训练:
```
env CUDA_VISIABLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True
```
执行`python ctc_train.py --help`可查看更多使用方式和参数详细说明。
图2为使用默认参数和默认数据集训练的收敛曲线,其中横坐标轴为训练pass数,纵轴为在测试集上的sequence_error.
<p align="center">
<img src="images/train.jpg" width="620" hspace='10'/> <br/>
<strong>图 2</strong>
</p>
### 1.3 评估
通过以下命令调用评估脚本用指定数据集对模型进行评估:
```
env CUDA_VISIBLE_DEVICE=0 python eval.py \
--model_path="./models/model_0" \
--input_images_dir="./eval_data/images/" \
--input_images_list="./eval_data/eval_list\" \
```
执行`python ctc_train.py --help`可查看参数详细说明。
### 1.4 预测
从标准输入读取一张图片的路径,并对齐进行预测:
```
env CUDA_VISIBLE_DEVICE=0 python inference.py \
--model_path="models/model_00044_15000"
```
执行上述命令进行预测的效果如下:
```
----------- Configuration Arguments -----------
use_gpu: True
input_images_dir: None
input_images_list: None
model_path: /home/work/models/fluid/ocr_recognition/models/model_00052_15000
------------------------------------------------
Init model from: /home/work/models/fluid/ocr_recognition/models/model_00052_15000.
Please input the path of image: /home/work/models/fluid/ocr_recognition/data/test_images/00001_0060.jpg
result: [3298 2371 4233 6514 2378 3298 2363]
Please input the path of image: /home/work/models/fluid/ocr_recognition/data/test_images/00001_0429.jpg
result: [2067 2067 8187 8477 5027 7191 2431 1462]
```
从文件中批量读取图片路径,并对其进行预测:
```
env CUDA_VISIBLE_DEVICE=0 python inference.py \
--model_path="models/model_00044_15000" \
--input_images_list="data/test.list"
```
...@@ -143,7 +143,7 @@ def ctc_train_net(images, label, args, num_classes): ...@@ -143,7 +143,7 @@ def ctc_train_net(images, label, args, num_classes):
gradient_clip = None gradient_clip = None
if args.parallel: if args.parallel:
places = fluid.layers.get_places() places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places) pd = fluid.layers.ParallelDo(places, use_nccl=True)
with pd.do(): with pd.do():
images_ = pd.read_input(images) images_ = pd.read_input(images)
label_ = pd.read_input(label) label_ = pd.read_input(label)
......
...@@ -30,10 +30,10 @@ class DataGenerator(object): ...@@ -30,10 +30,10 @@ class DataGenerator(object):
Reader interface for training. Reader interface for training.
:param img_root_dir: The root path of the image for training. :param img_root_dir: The root path of the image for training.
:type file_list: str :type img_root_dir: str
:param img_label_list: The path of the <image_name, label> file for training. :param img_label_list: The path of the <image_name, label> file for training.
:type file_list: str :type img_label_list: str
''' '''
...@@ -91,10 +91,10 @@ class DataGenerator(object): ...@@ -91,10 +91,10 @@ class DataGenerator(object):
Reader interface for inference. Reader interface for inference.
:param img_root_dir: The root path of the images for training. :param img_root_dir: The root path of the images for training.
:type file_list: str :type img_root_dir: str
:param img_label_list: The path of the <image_name, label> file for testing. :param img_label_list: The path of the <image_name, label> file for testing.
:type file_list: list :type img_label_list: str
''' '''
def reader(): def reader():
...@@ -111,6 +111,42 @@ class DataGenerator(object): ...@@ -111,6 +111,42 @@ class DataGenerator(object):
return reader return reader
def infer_reader(self, img_root_dir=None, img_label_list=None):
'''A reader interface for inference.
:param img_root_dir: The root path of the images for training.
:type img_root_dir: str
:param img_label_list: The path of the <image_name, label> file for
inference. It should be the path of <image_path> file if img_root_dir
was None. If img_label_list was set to None, it will read image path
from stdin.
:type img_root_dir: str
'''
def reader():
if img_label_list is not None:
for line in open(img_label_list):
if img_root_dir is not None:
# h, w, img_name, labels
img_name = line.split(' ')[2]
img_path = os.path.join(img_root_dir, img_name)
else:
img_path = line.strip("\t\n\r")
img = Image.open(img_path).convert('L')
img = np.array(img) - 127.5
img = img[np.newaxis, ...]
yield img, label
else:
while True:
img_path = raw_input("Please input the path of image: ")
img = Image.open(img_path).convert('L')
img = np.array(img) - 127.5
img = img[np.newaxis, ...]
yield img, [[0]]
return reader
def num_classes(): def num_classes():
'''Get classes number of this dataset. '''Get classes number of this dataset.
...@@ -124,21 +160,31 @@ def data_shape(): ...@@ -124,21 +160,31 @@ def data_shape():
return DATA_SHAPE return DATA_SHAPE
def train(batch_size): def train(batch_size, train_images_dir=None, train_list_file=None):
generator = DataGenerator() generator = DataGenerator()
if train_images_dir is None:
data_dir = download_data() data_dir = download_data()
return generator.train_reader( train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME)
path.join(data_dir, TRAIN_DATA_DIR_NAME), if train_list_file is None:
path.join(data_dir, TRAIN_LIST_FILE_NAME), batch_size) train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME)
return generator.train_reader(train_images_dir, train_list_file, batch_size)
def test(batch_size=1): def test(batch_size=1, test_images_dir=None, test_list_file=None):
generator = DataGenerator() generator = DataGenerator()
if test_images_dir is None:
data_dir = download_data() data_dir = download_data()
test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME)
if test_list_file is None:
test_list_file = path.join(data_dir, TEST_LIST_FILE_NAME)
return paddle.batch(
generator.test_reader(test_images_dir, test_list_file), batch_size)
def inference(infer_images_dir=None, infer_list_file=None):
generator = DataGenerator()
return paddle.batch( return paddle.batch(
generator.test_reader( generator.infer_reader(infer_images_dir, infer_list_file), 1)
path.join(data_dir, TRAIN_DATA_DIR_NAME),
path.join(data_dir, TRAIN_LIST_FILE_NAME)), batch_size)
def download_data(): def download_data():
......
"""Trainer for OCR CTC model.""" """Trainer for OCR CTC model."""
import paddle.fluid as fluid import paddle.fluid as fluid
import dummy_reader from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_train_net
import ctc_reader import ctc_reader
import argparse import argparse
from load_model import load_param
import functools import functools
import sys import sys
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_train_net
import time import time
import os
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.") add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('pass_num', int, 100, "# of training epochs.") add_arg('pass_num', int, 100, "Number of training epochs.")
add_arg('log_period', int, 1000, "Log period.") add_arg('log_period', int, 1000, "Log period.")
add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.")
add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.")
add_arg('save_model_dir', str, "./models", "The directory the model to be saved to.")
add_arg('init_model', str, None, "The init model file of directory.")
add_arg('learning_rate', float, 1.0e-3, "Learning rate.") add_arg('learning_rate', float, 1.0e-3, "Learning rate.")
add_arg('l2', float, 0.0004, "L2 regularizer.") add_arg('l2', float, 0.0004, "L2 regularizer.")
add_arg('max_clip', float, 10.0, "Max clip threshold.")
add_arg('min_clip', float, -10.0, "Min clip threshold.")
add_arg('momentum', float, 0.9, "Momentum.") add_arg('momentum', float, 0.9, "Momentum.")
add_arg('rnn_hidden_size',int, 200, "Hidden size of rnn layers.") add_arg('rnn_hidden_size', int, 200, "Hidden size of rnn layers.")
add_arg('device', int, 0, "Device id.'-1' means running on CPU" add_arg('use_gpu', bool, True, "Whether use GPU to train.")
"while '0' means GPU-0.") add_arg('min_average_window',int, 10000, "Min average window.")
add_arg('min_average_window', int, 10000, "Min average window.") add_arg('max_average_window',int, 15625, "Max average window. It is proposed to be set as the number of minibatch in a pass.")
add_arg('max_average_window', int, 15625, "Max average window.")
add_arg('average_window', float, 0.15, "Average window.") add_arg('average_window', float, 0.15, "Average window.")
add_arg('parallel', bool, False, "Whether use parallel training.") add_arg('parallel', bool, False, "Whether use parallel training.")
# yapf: disable add_arg('train_images', str, None, "The directory of training images."
"None means using the default training images of reader.")
def load_parameter(place): add_arg('train_list', str, None, "The list file of training images."
params = load_param('./name.map', './data/model/results_without_avg_window/pass-00000/') "None means using the default train_list file of reader.")
for name in params: add_arg('test_images', str, None, "The directory of training images."
t = fluid.global_scope().find_var(name).get_tensor() "None means using the default test images of reader.")
t.set(params[name], place) add_arg('test_list', str, None, "The list file of training images."
"None means using the default test_list file of reader.")
add_arg('num_classes', int, None, "The number of classes."
"None means using the default num_classes from reader.")
# yapf: enable
def train(args, data_reader=dummy_reader): def train(args, data_reader=ctc_reader):
"""OCR CTC training""" """OCR CTC training"""
num_classes = data_reader.num_classes() num_classes = data_reader.num_classes(
) if args.num_classes is None else args.num_classes
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
# define network # define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32', lod_level=1) label = fluid.layers.data(
sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(images, label, args, num_classes) name='label', shape=[1], dtype='int32', lod_level=1)
sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(
images, label, args, num_classes)
# data reader # data reader
train_reader = data_reader.train(args.batch_size) train_reader = data_reader.train(
test_reader = data_reader.test() args.batch_size,
train_images_dir=args.train_images,
train_list_file=args.train_list)
test_reader = data_reader.test(
test_images_dir=args.test_images, test_list_file=args.test_list)
# prepare environment # prepare environment
place = fluid.CPUPlace() place = fluid.CPUPlace()
if args.device >= 0: if args.use_gpu:
place = fluid.CUDAPlace(args.device) place = fluid.CUDAPlace(0)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
#load_parameter(place)
# load init model
if args.init_model is not None:
model_dir = args.init_model
model_file_name = None
if not os.path.isdir(args.init_model):
model_dir = os.path.dirname(args.init_model)
model_file_name = os.path.basename(args.init_model)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.init_model
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
error_evaluator.reset(exe) error_evaluator.reset(exe)
...@@ -70,29 +91,41 @@ def train(args, data_reader=dummy_reader): ...@@ -70,29 +91,41 @@ def train(args, data_reader=dummy_reader):
fetch_list=[sum_cost] + error_evaluator.metrics) fetch_list=[sum_cost] + error_evaluator.metrics)
total_loss += batch_loss[0] total_loss += batch_loss[0]
total_seq_error += batch_seq_error[0] total_seq_error += batch_seq_error[0]
if batch_id % 100 == 1: # training log
print '.', if batch_id % args.log_period == 0:
sys.stdout.flush()
if batch_id % args.log_period == 1:
print "\nTime: %s; Pass[%d]-batch[%d]; Avg Warp-CTC loss: %s; Avg seq error: %s." % ( print "\nTime: %s; Pass[%d]-batch[%d]; Avg Warp-CTC loss: %s; Avg seq error: %s." % (
time.time(), time.time(), pass_id, batch_id,
pass_id, batch_id, total_loss / (batch_id * args.batch_size), total_seq_error / (batch_id * args.batch_size)) total_loss / (batch_id * args.batch_size),
total_seq_error / (batch_id * args.batch_size))
sys.stdout.flush() sys.stdout.flush()
batch_id += 1 # evaluate
if batch_id % args.eval_period == 0:
with model_average.apply(exe): with model_average.apply(exe):
error_evaluator.reset(exe) error_evaluator.reset(exe)
for data in test_reader(): for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place)) exe.run(inference_program,
feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe) _, test_seq_error = error_evaluator.eval(exe)
print "\nEnd pass[%d]; Test seq error: %s.\n" % ( print "\nTime: %s; Pass[%d]-batch[%d]; Test seq error: %s.\n" % (
pass_id, str(test_seq_error[0])) time.time(), pass_id, batch_id, str(test_seq_error[0]))
# save model
if batch_id % args.save_model_period == 0:
with model_average.apply(exe):
filename = "model_%05d_%d" % (pass_id, batch_id)
fluid.io.save_params(
exe, dirname=args.save_model_dir, filename=filename)
print "Saved model to: %s/%s." % (args.save_model_dir,
filename)
batch_id += 1
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
train(args, data_reader=ctc_reader) train(args, data_reader=ctc_reader)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
"""A dummy reader for test."""
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import numpy as np
import paddle.v2 as paddle
DATA_SHAPE = [1, 512, 512]
NUM_CLASSES = 20
def _read_creater(num_sample=1024, min_seq_len=1, max_seq_len=10):
def reader():
for i in range(num_sample):
sequence_len = np.random.randint(min_seq_len, max_seq_len)
x = np.random.uniform(0.1, 1, DATA_SHAPE).astype("float32")
y = np.random.randint(0, NUM_CLASSES + 1,
[sequence_len]).astype("int32")
yield x, y
return reader
def train(batch_size, num_sample=128):
"""Get train dataset reader."""
return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
def test(batch_size=1, num_sample=16):
"""Get test dataset reader."""
return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
def data_shape():
"""Get image shape in CHW order."""
return DATA_SHAPE
def num_classes():
"""Get number of total classes."""
return NUM_CLASSES
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from load_model import load_param from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from utility import get_feeder_data from crnn_ctc_model import ctc_infer
from crnn_ctc_model import ctc_eval from crnn_ctc_model import ctc_eval
import ctc_reader import ctc_reader
import dummy_reader import argparse
import functools
import os
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('model_path', str, None, "The model path to be used for inference.")
add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.")
add_arg('use_gpu', bool, True, "Whether use GPU to eval.")
# yapf: enable
def load_parameter(place):
params = load_param('./name.map', './data/model/results/pass-00062/')
for name in params:
print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
def evaluate(eval=ctc_eval, data_reader=dummy_reader):
"""OCR inference""" """OCR inference"""
num_classes = data_reader.num_classes() num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
...@@ -26,29 +29,41 @@ def evaluate(eval=ctc_eval, data_reader=dummy_reader): ...@@ -26,29 +29,41 @@ def evaluate(eval=ctc_eval, data_reader=dummy_reader):
evaluator, cost = eval(images, label, num_classes) evaluator, cost = eval(images, label, num_classes)
# data reader # data reader
test_reader = data_reader.test() test_reader = data_reader.test(
test_images_dir=args.input_images_dir,
test_list_file=args.input_images_list)
# prepare environment # prepare environment
place = fluid.CPUPlace()
if use_gpu:
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
#place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
print fluid.default_main_program()
load_parameter(place) # load init model
model_dir = args.model_path
model_file_name = None
if not os.path.isdir(args.model_path):
model_dir = os.path.dirname(args.model_path)
model_file_name = os.path.basename(args.model_path)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.model_path
evaluator.reset(exe) evaluator.reset(exe)
count = 0 count = 0
for data in test_reader(): for data in test_reader():
count += 1 count += 1
print 'Process samples: %d\r' % (count, ), exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place))
result, avg_distance, avg_seq_error = exe.run(
fluid.default_main_program(),
feed=get_feeder_data(data, place),
fetch_list=[cost] + evaluator.metrics)
avg_distance, avg_seq_error = evaluator.eval(exe) avg_distance, avg_seq_error = evaluator.eval(exe)
print "avg_distance: %s; avg_seq_error: %s" % (avg_distance, avg_seq_error) print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
count, avg_distance, avg_seq_error)
def main(): def main():
evaluate(data_reader=ctc_reader) args = parser.parse_args()
print_arguments(args)
evaluate(args, data_reader=ctc_reader)
if __name__ == "__main__": if __name__ == "__main__":
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
from load_model import load_param from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from utility import get_feeder_data
from crnn_ctc_model import ctc_infer from crnn_ctc_model import ctc_infer
import numpy as np
import ctc_reader import ctc_reader
import dummy_reader import argparse
import functools
import os
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('model_path', str, None, "The model path to be used for inference.")
add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.")
add_arg('use_gpu', bool, True, "Whether use GPU to infer.")
# yapf: enable
def load_parameter(place):
params = load_param('./name.map', './data/model/results/pass-00062/')
for name in params:
print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def inference(args, infer=ctc_infer, data_reader=ctc_reader):
def inference(infer=ctc_infer, data_reader=dummy_reader):
"""OCR inference""" """OCR inference"""
num_classes = data_reader.num_classes() num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
# define network # define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
sequence, tmp = infer(images, num_classes) sequence = infer(images, num_classes)
fluid.layers.Print(tmp)
# data reader # data reader
test_reader = data_reader.test() infer_reader = data_reader.inference(
infer_images_dir=args.input_images_dir,
infer_list_file=args.input_images_list)
# prepare environment # prepare environment
place = fluid.CPUPlace()
if use_gpu:
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
load_parameter(place) # load init model
model_dir = args.model_path
model_file_name = None
if not os.path.isdir(args.model_path):
model_dir = os.path.dirname(args.model_path)
model_file_name = os.path.basename(args.model_path)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.model_path
for data in test_reader(): for data in infer_reader():
result = exe.run(fluid.default_main_program(), result = exe.run(fluid.default_main_program(),
feed=get_feeder_data( feed=get_feeder_data(
data, place, need_label=False), data, place, need_label=False),
fetch_list=[tmp]) fetch_list=[sequence],
print "result: %s" % (list(result[0].flatten()), ) return_numpy=False)
print "result: %s" % (np.array(result[0]).flatten(), )
def main(): def main():
inference(data_reader=ctc_reader) args = parser.parse_args()
print_arguments(args)
inference(args, data_reader=ctc_reader)
if __name__ == "__main__": if __name__ == "__main__":
......
import sys
import numpy as np
import ast
def load_parameter(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32)
def load_param(name_map_file, old_param_dir):
result = {}
name_map = {}
shape_map = {}
with open(name_map_file, 'r') as map_file:
for param in map_file:
old_name, new_name, shape = param.strip().split('=')
name_map[new_name] = old_name
shape_map[new_name] = ast.literal_eval(shape)
for new_name in name_map:
result[new_name] = load_parameter("/".join(
[old_param_dir, name_map[new_name]])).reshape(shape_map[new_name])
return result
if __name__ == "__main__":
name_map_file = "./name.map"
old_param_dir = "./data/model/results/pass-00062/"
result = load_param(name_map_file, old_param_dir)
for p in result:
print "name: %s; param.shape: %s" % (p, result[p].shape)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册