提交 f1050475 编写于 作者: H hong 提交者: Leo Chen

Update ocr to support remove build once (#4109)

* update ptb to support remove build once; test=develop

* update orc; test=develop

* polish code, test=develop
Co-authored-by: Nzhongpu <2013000149@qq.com>
上级 c0f8360e
...@@ -20,7 +20,7 @@ import paddle.fluid.profiler as profiler ...@@ -20,7 +20,7 @@ import paddle.fluid.profiler as profiler
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import data_reader import data_reader
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm, Embedding, GRUUnit from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, BatchNorm, Embedding, GRUUnit
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
import argparse import argparse
import functools import functools
...@@ -57,6 +57,8 @@ class Config(object): ...@@ -57,6 +57,8 @@ class Config(object):
''' '''
config for training config for training
''' '''
# encoder rnn hidden_size
encoder_size = 200
# decoder size for decoder stage # decoder size for decoder stage
decoder_size = 128 decoder_size = 128
# size for word embedding # size for word embedding
...@@ -84,7 +86,6 @@ class Config(object): ...@@ -84,7 +86,6 @@ class Config(object):
class ConvBNPool(fluid.dygraph.Layer): class ConvBNPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
group, group,
out_ch, out_ch,
channels, channels,
...@@ -92,7 +93,7 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -92,7 +93,7 @@ class ConvBNPool(fluid.dygraph.Layer):
is_test=False, is_test=False,
pool=True, pool=True,
use_cudnn=True): use_cudnn=True):
super(ConvBNPool, self).__init__(name_scope) super(ConvBNPool, self).__init__()
self.group = group self.group = group
self.pool = pool self.pool = pool
...@@ -106,7 +107,7 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -106,7 +107,7 @@ class ConvBNPool(fluid.dygraph.Layer):
initializer=fluid.initializer.Normal(0.0, conv_std_1)) initializer=fluid.initializer.Normal(0.0, conv_std_1))
self.conv_0_layer = Conv2D( self.conv_0_layer = Conv2D(
self.full_name(), channels[0],
out_ch[0], out_ch[0],
3, 3,
padding=1, padding=1,
...@@ -115,9 +116,9 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -115,9 +116,9 @@ class ConvBNPool(fluid.dygraph.Layer):
act=None, act=None,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.bn_0_layer = BatchNorm( self.bn_0_layer = BatchNorm(
self.full_name(), out_ch[0], act=act, is_test=is_test) out_ch[0], act=act, is_test=is_test)
self.conv_1_layer = Conv2D( self.conv_1_layer = Conv2D(
self.full_name(), out_ch[0],
num_filters=out_ch[1], num_filters=out_ch[1],
filter_size=3, filter_size=3,
padding=1, padding=1,
...@@ -126,12 +127,10 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -126,12 +127,10 @@ class ConvBNPool(fluid.dygraph.Layer):
act=None, act=None,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.bn_1_layer = BatchNorm( self.bn_1_layer = BatchNorm(
self.full_name(), out_ch[1], act=act, is_test=is_test) out_ch[1], act=act, is_test=is_test)
print( "pool", self.pool)
if self.pool: if self.pool:
self.pool_layer = Pool2D( self.pool_layer = Pool2D(
self.full_name(),
pool_size=2, pool_size=2,
pool_type='max', pool_type='max',
pool_stride=2, pool_stride=2,
...@@ -151,25 +150,21 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -151,25 +150,21 @@ class ConvBNPool(fluid.dygraph.Layer):
class OCRConv(fluid.dygraph.Layer): class OCRConv(fluid.dygraph.Layer):
def __init__(self, name_scope, is_test=False, use_cudnn=True): def __init__(self, is_test=False, use_cudnn=True):
super(OCRConv, self).__init__(name_scope) super(OCRConv, self).__init__()
self.conv_bn_pool_1 = ConvBNPool( self.conv_bn_pool_1 = ConvBNPool(
self.full_name(),
2, [16, 16], [1, 16], 2, [16, 16], [1, 16],
is_test=is_test, is_test=is_test,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.conv_bn_pool_2 = ConvBNPool( self.conv_bn_pool_2 = ConvBNPool(
self.full_name(),
2, [32, 32], [16, 32], 2, [32, 32], [16, 32],
is_test=is_test, is_test=is_test,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.conv_bn_pool_3 = ConvBNPool( self.conv_bn_pool_3 = ConvBNPool(
self.full_name(),
2, [64, 64], [32, 64], 2, [64, 64], [32, 64],
is_test=is_test, is_test=is_test,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.conv_bn_pool_4 = ConvBNPool( self.conv_bn_pool_4 = ConvBNPool(
self.full_name(),
2, [128, 128], [64, 128], 2, [128, 128], [64, 128],
is_test=is_test, is_test=is_test,
pool=False, pool=False,
...@@ -181,13 +176,11 @@ class OCRConv(fluid.dygraph.Layer): ...@@ -181,13 +176,11 @@ class OCRConv(fluid.dygraph.Layer):
inputs_3 = self.conv_bn_pool_3(inputs_2) inputs_3 = self.conv_bn_pool_3(inputs_2)
inputs_4 = self.conv_bn_pool_4(inputs_3) inputs_4 = self.conv_bn_pool_4(inputs_3)
#print( inputs_4.numpy() )
return inputs_4 return inputs_4
class DynamicGRU(fluid.dygraph.Layer): class DynamicGRU(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
scope_name,
size, size,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
...@@ -197,10 +190,9 @@ class DynamicGRU(fluid.dygraph.Layer): ...@@ -197,10 +190,9 @@ class DynamicGRU(fluid.dygraph.Layer):
h_0=None, h_0=None,
origin_mode=False, origin_mode=False,
init_size = None): init_size = None):
super(DynamicGRU, self).__init__(scope_name) super(DynamicGRU, self).__init__()
self.gru_unit = GRUUnit( self.gru_unit = GRUUnit(
self.full_name(),
size * 3, size * 3,
param_attr=param_attr, param_attr=param_attr,
bias_attr=bias_attr, bias_attr=bias_attr,
...@@ -239,11 +231,10 @@ class DynamicGRU(fluid.dygraph.Layer): ...@@ -239,11 +231,10 @@ class DynamicGRU(fluid.dygraph.Layer):
class EncoderNet(fluid.dygraph.Layer): class EncoderNet(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
scope_name, rnn_hidden_size=Config.encoder_size,
rnn_hidden_size=200,
is_test=False, is_test=False,
use_cudnn=True): use_cudnn=True):
super(EncoderNet, self).__init__(scope_name) super(EncoderNet, self).__init__()
self.rnn_hidden_size = rnn_hidden_size self.rnn_hidden_size = rnn_hidden_size
para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0,
0.02)) 0.02))
...@@ -259,27 +250,24 @@ class EncoderNet(fluid.dygraph.Layer): ...@@ -259,27 +250,24 @@ class EncoderNet(fluid.dygraph.Layer):
dtype='float32', dtype='float32',
value=0) value=0)
self.ocr_convs = OCRConv( self.ocr_convs = OCRConv(
self.full_name(), is_test=is_test, use_cudnn=use_cudnn) is_test=is_test, use_cudnn=use_cudnn)
self.fc_1_layer = FC(self.full_name(), self.fc_1_layer = Linear( 768,
rnn_hidden_size * 3, rnn_hidden_size * 3,
param_attr=para_attr, param_attr=para_attr,
bias_attr=False, bias_attr=False )
num_flatten_dims=2) print( "weight", self.fc_1_layer.weight.shape )
self.fc_2_layer = FC(self.full_name(), self.fc_2_layer = Linear( 768,
rnn_hidden_size * 3, rnn_hidden_size * 3,
param_attr=para_attr, param_attr=para_attr,
bias_attr=False, bias_attr=False )
num_flatten_dims=2)
self.gru_forward_layer = DynamicGRU( self.gru_forward_layer = DynamicGRU(
self.full_name(),
size=rnn_hidden_size, size=rnn_hidden_size,
h_0=h_0, h_0=h_0,
param_attr=para_attr, param_attr=para_attr,
bias_attr=bias_attr, bias_attr=bias_attr,
candidate_activation='relu') candidate_activation='relu')
self.gru_backward_layer = DynamicGRU( self.gru_backward_layer = DynamicGRU(
self.full_name(),
size=rnn_hidden_size, size=rnn_hidden_size,
h_0=h_0, h_0=h_0,
param_attr=para_attr, param_attr=para_attr,
...@@ -287,10 +275,9 @@ class EncoderNet(fluid.dygraph.Layer): ...@@ -287,10 +275,9 @@ class EncoderNet(fluid.dygraph.Layer):
candidate_activation='relu', candidate_activation='relu',
is_reverse=True) is_reverse=True)
self.encoded_proj_fc = FC(self.full_name(), self.encoded_proj_fc = Linear( rnn_hidden_size * 2,
Config.decoder_size, Config.decoder_size,
bias_attr=False, bias_attr=False )
num_flatten_dims=2)
def forward(self, inputs): def forward(self, inputs):
conv_features = self.ocr_convs(inputs) conv_features = self.ocr_convs(inputs)
...@@ -316,16 +303,15 @@ class EncoderNet(fluid.dygraph.Layer): ...@@ -316,16 +303,15 @@ class EncoderNet(fluid.dygraph.Layer):
class SimpleAttention(fluid.dygraph.Layer): class SimpleAttention(fluid.dygraph.Layer):
def __init__(self, scope_name, decoder_size): def __init__(self, decoder_size):
super(SimpleAttention, self).__init__(scope_name) super(SimpleAttention, self).__init__()
self.fc_1 = FC(self.full_name(), self.fc_1 = Linear( decoder_size,
decoder_size, decoder_size,
act=None, act=None,
bias_attr=False) bias_attr=False)
self.fc_2 = FC(self.full_name(), self.fc_2 = Linear( decoder_size,
1, 1,
num_flatten_dims = 2,
act=None, act=None,
bias_attr=False) bias_attr=False)
...@@ -354,23 +340,22 @@ class SimpleAttention(fluid.dygraph.Layer): ...@@ -354,23 +340,22 @@ class SimpleAttention(fluid.dygraph.Layer):
class GRUDecoderWithAttention(fluid.dygraph.Layer): class GRUDecoderWithAttention(fluid.dygraph.Layer):
def __init__(self, scope_name, decoder_size, num_classes): def __init__(self, decoder_size, num_classes):
super(GRUDecoderWithAttention, self).__init__(scope_name) super(GRUDecoderWithAttention, self).__init__()
self.simple_attention = SimpleAttention(self.full_name(), decoder_size) self.simple_attention = SimpleAttention(decoder_size)
self.fc_1_layer = FC(self.full_name(), self.fc_1_layer = Linear( input_dim = Config.encoder_size * 2,
size=decoder_size * 3, output_dim=decoder_size * 3,
bias_attr=False) bias_attr=False)
self.fc_2_layer = FC(self.full_name(), self.fc_2_layer = Linear( input_dim = decoder_size,
size=decoder_size * 3, output_dim=decoder_size * 3,
bias_attr=False) bias_attr=False)
self.gru_unit = GRUUnit( self.gru_unit = GRUUnit(
self.full_name(),
size=decoder_size * 3, size=decoder_size * 3,
param_attr=None, param_attr=None,
bias_attr=None) bias_attr=None)
self.out_layer = FC(self.full_name(), self.out_layer = Linear( input_dim = decoder_size,
size=num_classes + 2, output_dim =num_classes + 2,
bias_attr=None, bias_attr=None,
act='softmax') act='softmax')
...@@ -410,18 +395,18 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer): ...@@ -410,18 +395,18 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer):
class OCRAttention(fluid.dygraph.Layer): class OCRAttention(fluid.dygraph.Layer):
def __init__(self, scope_name): def __init__(self):
super(OCRAttention, self).__init__(scope_name) super(OCRAttention, self).__init__()
self.encoder_net = EncoderNet(self.full_name()) self.encoder_net = EncoderNet()
self.fc = FC(self.full_name(), self.fc = Linear( input_dim = Config.encoder_size,
size=Config.decoder_size, output_dim =Config.decoder_size,
bias_attr=False, bias_attr=False,
act='relu') act='relu')
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(), [Config.num_classes + 2, Config.word_vector_dim], [Config.num_classes + 2, Config.word_vector_dim],
dtype='float32') dtype='float32')
self.gru_decoder_with_attention = GRUDecoderWithAttention( self.gru_decoder_with_attention = GRUDecoderWithAttention(
self.full_name(), Config.decoder_size, Config.num_classes) Config.decoder_size, Config.num_classes)
def forward(self, inputs, label_in): def forward(self, inputs, label_in):
...@@ -451,14 +436,14 @@ def train(args): ...@@ -451,14 +436,14 @@ def train(args):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True backward_strategy.sort_sum_gradient = True
ocr_attention = OCRAttention("ocr_attention") ocr_attention = OCRAttention()
if Config.learning_rate_decay == "piecewise_decay": if Config.learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay( learning_rate = fluid.layers.piecewise_decay(
[50000], [Config.LR, Config.LR * 0.01]) [50000], [Config.LR, Config.LR * 0.01])
else: else:
learning_rate = Config.LR learning_rate = Config.LR
optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer = fluid.optimizer.Adam(learning_rate=0.001, parameter_list=ocr_attention.parameters())
dy_param_init_value = {} dy_param_init_value = {}
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(5.0 ) grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(5.0 )
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册