提交 bff7fbe3 编写于 作者: W wanghaoshuang

Restruct code.

1. Split data reader and train script.
2. Wrapper some function
上级 4e37cccb
import numpy as np
DATA_SHAPE = [1, 512, 512]
def _read_creater(num_sample=1024, num_class=20, min_seq_len=1, max_seq_len=10):
def reader():
for i in range(num_sample):
sequence_len = np.random.randint(min_seq_len, max_seq_len)
x = np.random.uniform(0.1, 1, DATA_SHAPE).astype("float32")
y = np.random.randint(0, num_class + 1,
[sequence_len]).astype("int32")
yield x, y
return reader
def train(num_sample=16):
return _read_creater(num_sample=num_sample)
def test(num_sample=16):
return _read_creater(num_sample=num_sample)
def data_shape():
return DATA_SHAPE
......@@ -12,22 +12,29 @@
#See the License for the specific language governing permissions and
#limitations under the License.
import sys
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
from paddle.v2.fluid import core
import numpy as np
import dummy_reader
def random_reader(num_class):
def reader():
sequence_len = np.random.randint(5, 10)
yield np.random.uniform(0.1, 1, [1, 512, 512]), np.random.randint(
0, num_class + 1, [sequence_len])
return reader
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int32")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = core.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def ocr_conv(input, num, with_bn):
def ocr_conv(input, num, with_bn, param_attrs):
assert (num % 4 == 0)
def conv_block(input, filter_size, group_size, with_bn):
......@@ -40,7 +47,8 @@ def ocr_conv(input, num, with_bn):
conv_filter_size=3,
conv_act='relu',
conv_with_batchnorm=with_bn,
pool_type='max')
pool_type='max',
param_attr=param_attrs)
conv1 = conv_block(input, 16, (num / 4), with_bn)
conv2 = conv_block(conv1, 32, (num / 4), with_bn)
......@@ -49,62 +57,101 @@ def ocr_conv(input, num, with_bn):
return conv4
num_classes = 9054
data_shape = [1, 512, 512]
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# encoder part
conv_features = ocr_conv(images, 8, True)
sliced_feature = fluid.layers.im2sequence(
input=conv_features, stride=[1, 1], filter_size=[1, 3])
# TODO(wanghaoshuang): repaced by GRU
gru_forward, _ = fluid.layers.dynamic_lstm(input=sliced_feature, size=3 * 128)
gru_backward, _ = fluid.layers.dynamic_lstm(
input=sliced_feature, size=3 * 128, is_reverse=True)
fc_out = fluid.layers.fc(input=[gru_forward, gru_backward],
size=num_classes + 1)
cost = fluid.layers.warpctc(
input=fc_out,
label=label,
size=num_classes + 1,
blank=num_classes,
norm_by_times=True)
avg_cost = fluid.layers.mean(x=cost)
# TODO(wanghaoshuang): set clipping
optimizer = fluid.optimizer.Momentum(
learning_rate=((1.0e-3) / 16), momentum=0.9)
opts = optimizer.minimize(cost)
decoded_out = fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)
error_evaluator = fluid.evaluator.EditDistance(input=decoded_out, label=label)
BATCH_SIZE = 16
PASS_NUM = 1
# TODO(wanghaoshuang): replaced by correct data reader
train_reader = paddle.batch(
paddle.reader.shuffle(
random_reader(num_classes), buf_size=128 * 10),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
error_evaluator.reset(exe)
for data in train_reader():
loss, error = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost] + error_evaluator.metrics)
pass_error = error_evaluator.eval(exe)
print "loss: %s; distance error: %s; pass_dis_error: %s;" % (
str(loss), str(error), str(pass_error))
def ocr_ctc_net(images, num_classes, param_attrs):
conv_features = ocr_conv(images, 8, True, param_attrs)
sliced_feature = fluid.layers.im2sequence(
input=conv_features, stride=[1, 1], filter_size=[1, 3])
gru_forward = fluid.layers.dynamic_gru(
input=sliced_feature, size=128, param_attr=param_attrs)
gru_backward = fluid.layers.dynamic_gru(
input=sliced_feature, size=128, is_reverse=True, param_attr=param_attrs)
fc_out = fluid.layers.fc(input=[gru_forward, gru_backward],
size=num_classes + 1,
param_attr=param_attrs)
return fc_out
def get_feeder_data(data, place):
pixel_tensor = core.LoDTensor()
pixel_data = np.concatenate(
map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32")
pixel_tensor.set(pixel_data, place)
label_tensor = to_lodtensor(map(lambda x: x[1], data), place)
return {"pixel": pixel_tensor, "label": label_tensor}
def train(num_classes=20,
l2=0.0005 * 16,
clip_threshold=10,
data_reader=dummy_reader,
learning_rate=((1.0e-3) / 16),
momentum=0.9,
batch_size=4,
pass_num=2):
param_attrs = fluid.ParamAttr(
regularizer=fluid.regularizer.L2Decay(l2),
gradient_clip=fluid.clip.GradientClipByValue(clip_threshold))
data_shape = data_reader.data_shape()
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int32', lod_level=1)
fc_out = ocr_ctc_net(images, num_classes, param_attrs)
cost = fluid.layers.warpctc(
input=fc_out,
label=label,
size=num_classes + 1,
blank=num_classes,
norm_by_times=True)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate, momentum=momentum)
opts = optimizer.minimize(cost)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)
casted_label = fluid.layers.cast(x=label, dtype='int64')
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)
train_reader = paddle.batch(data_reader.train(), batch_size=batch_size)
test_reader = paddle.batch(data_reader.test(), batch_size=batch_size)
#place = fluid.CPUPlace()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
exe.run(fluid.default_startup_program())
inference_program = fluid.io.get_inference_program(error_evaluator)
for pass_id in range(pass_num):
error_evaluator.reset(exe)
batch_id = 0
for data in train_reader():
loss, batch_edit_distance, _, _ = exe.run(
fluid.default_main_program(),
feed=get_feeder_data(data, place),
fetch_list=[avg_cost] + error_evaluator.metrics)
print "Pass[%d], batch[%d]; loss: %s; edit distance: %s" % (
pass_id, batch_id, loss[0], batch_edit_distance[0])
batch_id += 1
train_edit_distance = error_evaluator.eval(exe)
print "End pass[%d]; train data edit_distance: %s" % (
pass_id, str(train_edit_distance))
# test
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
test_edit_distance = error_evaluator.eval(exe)
print "End pass[%d]; test data edit_distance: %s" % (
pass_id, str(test_edit_distance))
if __name__ == "__main__":
train()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册