提交 1cda437c 编写于 作者: xuyang2233's avatar xuyang2233

modified pr

上级 bbca1e0d
......@@ -11,6 +11,7 @@ inference/
inference_results/
output/
train_data/
log/
*.DS_Store
*.vs
*.user
......
......@@ -61,7 +61,6 @@ Loss:
PostProcess:
name: SPINAttnLabelDecode
character_dict_path: ./ppocr/utils/dict/spin_dict.txt
use_space_char: False
......
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
......@@ -19,6 +20,9 @@ from __future__ import print_function
import paddle
from paddle import nn
'''This code is refer from:
https://github.com/hikopensource/DAVAR-Lab-OCR
'''
class SPINAttentionLoss(nn.Layer):
def __init__(self, reduction='mean', ignore_index=-100, **kwargs):
......
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -80,98 +80,6 @@ class SPINAttentionHead(nn.Layer):
return probs
class AttentionGRUCell(nn.Layer):
def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False):
super(AttentionGRUCell, self).__init__()
self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False)
self.h2h = nn.Linear(hidden_size, hidden_size)
self.score = nn.Linear(hidden_size, 1, bias_attr=False)
self.rnn = nn.GRUCell(
input_size=input_size + num_embeddings, hidden_size=hidden_size)
self.hidden_size = hidden_size
def forward(self, prev_hidden, batch_H, char_onehots):
batch_H_proj = self.i2h(batch_H)
prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1)
res = paddle.add(batch_H_proj, prev_hidden_proj)
res = paddle.tanh(res)
e = self.score(res)
alpha = F.softmax(e, axis=1)
alpha = paddle.transpose(alpha, [0, 2, 1])
context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1)
concat_context = paddle.concat([context, char_onehots], 1)
cur_hidden = self.rnn(concat_context, prev_hidden)
return cur_hidden, alpha
class AttentionLSTM(nn.Layer):
def __init__(self, in_channels, out_channels, hidden_size, **kwargs):
super(AttentionLSTM, self).__init__()
self.input_size = in_channels
self.hidden_size = hidden_size
self.num_classes = out_channels
self.attention_cell = AttentionLSTMCell(
in_channels, hidden_size, out_channels, use_gru=False)
self.generator = nn.Linear(hidden_size, out_channels)
def _char_to_onehot(self, input_char, onehot_dim):
input_ont_hot = F.one_hot(input_char, onehot_dim)
return input_ont_hot
def forward(self, inputs, targets=None, batch_max_length=25):
batch_size = inputs.shape[0]
num_steps = batch_max_length
hidden = (paddle.zeros((batch_size, self.hidden_size)), paddle.zeros(
(batch_size, self.hidden_size)))
output_hiddens = []
if targets is not None:
for i in range(num_steps):
# one-hot vectors for a i-th char
char_onehots = self._char_to_onehot(
targets[:, i], onehot_dim=self.num_classes)
hidden, alpha = self.attention_cell(hidden, inputs,
char_onehots)
hidden = (hidden[1][0], hidden[1][1])
output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1))
output = paddle.concat(output_hiddens, axis=1)
probs = self.generator(output)
else:
targets = paddle.zeros(shape=[batch_size], dtype="int32")
probs = None
for i in range(num_steps):
char_onehots = self._char_to_onehot(
targets, onehot_dim=self.num_classes)
hidden, alpha = self.attention_cell(hidden, inputs,
char_onehots)
probs_step = self.generator(hidden[0])
hidden = (hidden[1][0], hidden[1][1])
if probs is None:
probs = paddle.unsqueeze(probs_step, axis=1)
else:
probs = paddle.concat(
[probs, paddle.unsqueeze(
probs_step, axis=1)], axis=1)
next_input = probs_step.argmax(axis=1)
targets = next_input
return probs
class AttentionLSTMCell(nn.Layer):
def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False):
super(AttentionLSTMCell, self).__init__()
......
......@@ -70,17 +70,6 @@ class BidirectionalLSTM(nn.Layer):
self.linear = nn.Linear(hidden_size * 2, output_size)
def forward(self, input_feature):
"""
Args:
input_feature (Torch.Tensor): visual feature [batch_size x T x input_size]
Returns:
Torch.Tensor: LSTM output contextual feature [batch_size x T x output_size]
"""
# self.rnn.flatten_parameters() # error in export_model
recurrent, _ = self.rnn(input_feature) # batch_size x T x input_size -> batch_size x T x (2*hidden_size)
if self.with_linear:
output = self.linear(recurrent) # batch_size x T x output_size
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -71,14 +71,14 @@ class SP_TransformerNetwork(nn.Layer):
"""
Args:
batch_I (torch.Tensor): batch of input images [batch_size x nc x I_height x I_width]
batch_I (Tensor): batch of input images [batch_size x nc x I_height x I_width]
weights:
offsets: the predicted offset by AIN, a scalar
lambda_color: the learnable update gate \alpha in Equa. (5) as
g(x) = (1 - \alpha) \odot x + \alpha \odot x_{offsets}
Returns:
torch.Tensor: transformed images by SPN as Equa. (4) in Ref. [1]
Tensor: transformed images by SPN as Equa. (4) in Ref. [1]
[batch_size x I_channel_num x I_r_height x I_r_width]
"""
......@@ -114,8 +114,6 @@ class GA_SPIN_Transformer(nn.Layer):
in_channels (int): channel of input features,
set it to 1 if the grayscale images and 3 if RGB input
I_r_size (tuple): size of rectified images (used in STN transformations)
inputDataType (str): the type of input data,
only support 'torch.cuda.FloatTensor' this version
offsets (bool): set it to False if use SPN w.o. AIN,
and set it to True if use SPIN (both with SPN and AIN)
norm_type (str): the normalization type of the module,
......@@ -123,6 +121,7 @@ class GA_SPIN_Transformer(nn.Layer):
default_type (int): the K chromatic space,
set it to 3/5/6 depend on the complexity of transformation intensities
loc_lr (float): learning rate of location network
stn (bool): whther to use stn.
"""
super(GA_SPIN_Transformer, self).__init__()
......@@ -233,12 +232,12 @@ class GA_SPIN_Transformer(nn.Layer):
def forward(self, x, return_weight=False):
"""
Args:
x (torch.cuda.FloatTensor): input image batch
x (Tensor): input image batch
return_weight (bool): set to False by default,
if set to True return the predicted offsets of AIN, denoted as x_{offsets}
Returns:
torch.Tensor: rectified image [batch_size x I_channel_num x I_height x I_width], the same as the input size
Tensor: rectified image [batch_size x I_channel_num x I_height x I_width], the same as the input size
"""
if self.spt:
......
......@@ -73,12 +73,6 @@ def export_single_model(model, arch_config, save_path, logger, quanter=None):
shape=[None, 3, 64, 512], dtype="float32"),
]
model = to_static(model, input_spec=other_shape)
elif arch_config["algorithm"] == "SPIN":
other_shape = [
paddle.static.InputSpec(
shape=[None, 1, 32, 100], dtype="float32"),
]
model = to_static(model, input_spec=other_shape)
else:
infer_shape = [3, -1, -1]
if arch_config["model_type"] == "rec":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册