未验证 提交 a1319074 编写于 作者: Z zqw_1997 提交者: GitHub

remove paddle.fluid.contrib.layers.BasicLSTMUnit、basic_lstm、BasicGRUUnit、basic_gru (#49268)

* rm paddle.fluid.contrib.layers.BasicLSTMUnit basic_lstm BasicGRUUnit basic_gru

* rm dependency in __init__.py
上级 cb34ee0f
...@@ -15,11 +15,9 @@ ...@@ -15,11 +15,9 @@
from . import nn from . import nn
from .nn import * from .nn import *
from .rnn_impl import *
from . import metric_op from . import metric_op
from .metric_op import * from .metric_op import *
__all__ = [] __all__ = []
__all__ += nn.__all__ __all__ += nn.__all__
__all__ += rnn_impl.__all__
__all__ += metric_op.__all__ __all__ += metric_op.__all__
...@@ -444,10 +444,6 @@ list(REMOVE_ITEM TEST_OPS ...@@ -444,10 +444,6 @@ list(REMOVE_ITEM TEST_OPS
list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model) list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model)
list(REMOVE_ITEM TEST_OPS test_async_ssa_graph_executor_mnist) list(REMOVE_ITEM TEST_OPS test_async_ssa_graph_executor_mnist)
list(REMOVE_ITEM TEST_OPS test_install_check) list(REMOVE_ITEM TEST_OPS test_install_check)
list(REMOVE_ITEM TEST_OPS test_basic_gru_api)
list(REMOVE_ITEM TEST_OPS test_basic_gru_unit_op)
list(REMOVE_ITEM TEST_OPS test_basic_lstm_api)
list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op)
list(REMOVE_ITEM TEST_OPS test_fuse_all_reduce_pass) list(REMOVE_ITEM TEST_OPS test_fuse_all_reduce_pass)
list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass) list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass)
list(REMOVE_ITEM TEST_OPS test_fuse_bn_add_act_pass) list(REMOVE_ITEM TEST_OPS test_fuse_bn_add_act_pass)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
from paddle.fluid import framework
from paddle.fluid.contrib.layers import basic_gru
from paddle.fluid.executor import Executor
np.set_seed(123)
SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0
def sigmoid(x):
y = np.copy(x)
y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
return 1.0 / (1.0 + np.exp(-y))
def tanh(x):
y = -2.0 * x
y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
return (2.0 / (1.0 + np.exp(y))) - 1.0
def gru_np(
input,
init_h,
hidden_size,
gate_weight,
gate_bias,
candidate_weight,
candidate_bias,
num_layers=1,
batch_first=False,
is_bidirect=False,
sequence_length=None,
):
def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b):
concat_1 = np.concatenate([step_in, pre_hidden], 1)
gate_input = np.matmul(concat_1, gate_w)
gate_input += gate_b
gate_input = sigmoid(gate_input)
r, u = np.split(gate_input, indices_or_sections=2, axis=1)
r_hidden = r * pre_hidden
candidate = np.matmul(
np.concatenate([step_in, r_hidden], 1), candidate_w
)
candidate += candidate_b
c = tanh(candidate)
new_hidden = u * pre_hidden + (1 - u) * c
return new_hidden
if batch_first:
input = np.tranpose(input, [1, 0, 2])
batch_size = input.shape[1]
mask = None
if sequence_length is not None:
max_seq_len = input.shape[0]
mask = np.zeros([batch_size, max_seq_len])
for i, len in enumerate(sequence_length):
mask[i, :len] = 1.0
mask = np.transpose(mask, [1, 0])
direc_num = 1
if is_bidirect:
direc_num = 2
if init_h:
init_h = np.reshape(
init_h, shape=[num_layers, direc_num, -1, hidden_size]
)
else:
init_h = np.zeros([num_layers, direc_num, batch_size, hidden_size])
def get_single_direction_output(rnn_input, mask=None, direc_index=0):
seq_len = rnn_input.shape[0]
output = []
# init pre hidden
pre_hidden_array = []
for i in range(num_layers):
pre_hidden_array.append(init_h[i, direc_index])
for i in range(seq_len):
step_input = rnn_input[i]
if mask is not None:
step_mask = mask[i]
step_mask = np.reshape(step_mask, [-1, 1])
for i in range(num_layers):
new_hidden = step(
step_input,
pre_hidden_array[i],
gate_weight[direc_index * num_layers + i],
gate_bias[direc_index * num_layers + i],
candidate_weight[direc_index * num_layers + i],
candidate_bias[direc_index * num_layers + i],
)
if mask is not None:
new_hidden = (
new_hidden * step_mask
+ (1 - step_mask) * pre_hidden_array[i]
)
pre_hidden_array[i] = new_hidden
step_input = new_hidden
output.append(step_input)
rnn_out = np.concatenate(output, 0)
rnn_out = np.reshape(rnn_out, [seq_len, -1, hidden_size])
last_hidden_out = np.concatenate(pre_hidden_array, 0)
last_hidden_out = np.reshape(
last_hidden_out, [num_layers, -1, hidden_size]
)
return rnn_out, last_hidden_out
fw_rnn_out, fw_last_hidden = get_single_direction_output(
input, mask, direc_index=0
)
if is_bidirect:
bw_input = input[::-1]
bw_mask = None
if mask is not None:
bw_mask = mask[::-1]
bw_rnn_out, bw_last_hidden = get_single_direction_output(
bw_input, bw_mask, direc_index=1
)
bw_rnn_out = bw_rnn_out[::-1]
rnn_out = np.concatenate([fw_rnn_out, bw_rnn_out], 2)
last_hidden = np.concatenate([fw_last_hidden, bw_last_hidden], 1)
last_hidden = np.reshape(
last_hidden, [num_layers * direc_num, -1, hidden_size]
)
if batch_first:
rnn_out = np.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden
else:
rnn_out = fw_rnn_out
last_hidden = fw_last_hidden
if batch_first:
rnn_out = np.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden
class TestBasicGRUApi(unittest.TestCase):
def setUp(self):
self.hidden_size = 10
self.batch_size = 5
self.seq_len = 6
self.num_layers = 2
self.is_bidirect = True
self.batch_first = False
def test_run(self):
x = layers.data(
name='x',
shape=[-1, self.batch_size, self.hidden_size],
dtype='float32',
)
sequence_length = layers.data(
name="sequence_length", shape=[-1], dtype='float32'
)
rnn_out, last_hidden = basic_gru(
x,
None,
self.hidden_size,
num_layers=self.num_layers,
batch_first=self.batch_first,
bidirectional=self.is_bidirect,
sequence_length=sequence_length,
)
last_hidden.persisbale = True
rnn_out.persisbale = True
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
param_list = fluid.default_main_program().block(0).all_parameters()
# process weight and bias
gate_weight = []
gate_bias = []
candidate_weight = []
candidate_bias = []
for i in range(self.num_layers):
gate_w_name = "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.w_0"
gate_b_name = "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.b_0"
candidate_w_name = (
"basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.w_1"
)
candidate_b_name = (
"basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.b_1"
)
gate_w = np.array(
fluid.global_scope().find_var(gate_w_name).get_tensor()
)
gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_w_name).get_tensor().set(
gate_w, place
)
gate_b = np.array(
fluid.global_scope().find_var(gate_b_name).get_tensor()
)
gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_b_name).get_tensor().set(
gate_b, place
)
candidate_w = np.array(
fluid.global_scope().find_var(candidate_w_name).get_tensor()
)
candidate_w = np.random.uniform(
-0.1, 0.1, size=candidate_w.shape
).astype('float32')
fluid.global_scope().find_var(candidate_w_name).get_tensor().set(
candidate_w, place
)
candidate_b = np.array(
fluid.global_scope().find_var(candidate_b_name).get_tensor()
)
candidate_b = np.random.uniform(
-0.1, 0.1, size=candidate_b.shape
).astype('float32')
fluid.global_scope().find_var(candidate_b_name).get_tensor().set(
candidate_b, place
)
gate_weight.append(gate_w)
gate_bias.append(gate_b)
candidate_weight.append(candidate_w)
candidate_bias.append(candidate_b)
if self.is_bidirect:
for i in range(self.num_layers):
gate_w_name = (
"basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.w_0"
)
gate_b_name = (
"basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.b_0"
)
candidate_w_name = (
"basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.w_1"
)
candidate_b_name = (
"basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.b_1"
)
gate_w = np.array(
fluid.global_scope().find_var(gate_w_name).get_tensor()
)
gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_w_name).get_tensor().set(
gate_w, place
)
gate_b = np.array(
fluid.global_scope().find_var(gate_b_name).get_tensor()
)
gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_b_name).get_tensor().set(
gate_b, place
)
candidate_w = np.array(
fluid.global_scope().find_var(candidate_w_name).get_tensor()
)
candidate_w = np.random.uniform(
-0.1, 0.1, size=candidate_w.shape
).astype('float32')
fluid.global_scope().find_var(
candidate_w_name
).get_tensor().set(candidate_w, place)
candidate_b = np.array(
fluid.global_scope().find_var(candidate_b_name).get_tensor()
)
candidate_b = np.random.uniform(
-0.1, 0.1, size=candidate_b.shape
).astype('float32')
fluid.global_scope().find_var(
candidate_b_name
).get_tensor().set(candidate_b, place)
gate_weight.append(gate_w)
gate_bias.append(gate_b)
candidate_weight.append(candidate_w)
candidate_bias.append(candidate_b)
step_input_np = np.random.uniform(
-0.1, 0.1, (self.seq_len, self.batch_size, self.hidden_size)
).astype('float32')
sequence_length_np = np.random.randint(
self.seq_len // 2, self.seq_len, size=(self.batch_size)
).astype('int64')
out = exe.run(
feed={'x': step_input_np, 'sequence_length': sequence_length_np},
fetch_list=[rnn_out, last_hidden],
)
api_rnn_out = out[0]
api_last_hidden = out[1]
np_out = gru_np(
step_input_np,
None,
self.hidden_size,
gate_weight,
gate_bias,
candidate_weight,
candidate_bias,
num_layers=self.num_layers,
batch_first=self.batch_first,
is_bidirect=self.is_bidirect,
sequence_length=sequence_length_np,
)
np.testing.assert_allclose(api_rnn_out, np_out[0], rtol=0.0001, atol=0)
np.testing.assert_allclose(
api_last_hidden, np_out[1], rtol=0.0001, atol=0
)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
from paddle.fluid import framework
from paddle.fluid.contrib.layers import BasicGRUUnit
from paddle.fluid.executor import Executor
np.set_seed(123)
SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0
def sigmoid(x):
y = np.copy(x)
y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
return 1.0 / (1.0 + np.exp(-y))
def tanh(x):
y = -2.0 * x
y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
return (2.0 / (1.0 + np.exp(y))) - 1.0
def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b):
concat_1 = np.concatenate([step_in, pre_hidden], 1)
gate_input = np.matmul(concat_1, gate_w)
gate_input += gate_b
gate_input = sigmoid(gate_input)
r, u = np.split(gate_input, indices_or_sections=2, axis=1)
r_hidden = r * pre_hidden
candidate = np.matmul(np.concatenate([step_in, r_hidden], 1), candidate_w)
candidate += candidate_b
c = tanh(candidate)
new_hidden = u * pre_hidden + (1 - u) * c
return new_hidden
class TestBasicGRUUnit(unittest.TestCase):
def setUp(self):
self.hidden_size = 5
self.batch_size = 5
def test_run(self):
x = layers.data(name='x', shape=[-1, self.hidden_size], dtype='float32')
pre_hidden = layers.data(
name="pre_hidden", shape=[-1, self.hidden_size], dtype='float32'
)
gru_unit = BasicGRUUnit("gru_unit", self.hidden_size)
new_hidden = gru_unit(x, pre_hidden)
new_hidden.persisbale = True
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
param_list = fluid.default_main_program().block(0).all_parameters()
# process weight and bias
gate_w_name = "gru_unit/BasicGRUUnit_0.w_0"
gate_b_name = "gru_unit/BasicGRUUnit_0.b_0"
candidate_w_name = "gru_unit/BasicGRUUnit_0.w_1"
candidate_b_name = "gru_unit/BasicGRUUnit_0.b_1"
gate_w = np.array(
fluid.global_scope().find_var(gate_w_name).get_tensor()
)
gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_w_name).get_tensor().set(
gate_w, place
)
gate_b = np.array(
fluid.global_scope().find_var(gate_b_name).get_tensor()
)
gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_b_name).get_tensor().set(
gate_b, place
)
candidate_w = np.array(
fluid.global_scope().find_var(candidate_w_name).get_tensor()
)
candidate_w = np.random.uniform(
-0.1, 0.1, size=candidate_w.shape
).astype('float32')
fluid.global_scope().find_var(candidate_w_name).get_tensor().set(
candidate_w, place
)
candidate_b = np.array(
fluid.global_scope().find_var(candidate_b_name).get_tensor()
)
candidate_b = np.random.uniform(
-0.1, 0.1, size=candidate_b.shape
).astype('float32')
fluid.global_scope().find_var(candidate_b_name).get_tensor().set(
candidate_b, place
)
step_input_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
pre_hidden_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
out = exe.run(
feed={'x': step_input_np, 'pre_hidden': pre_hidden_np},
fetch_list=[new_hidden],
)
api_out = out[0]
np_out = step(
step_input_np,
pre_hidden_np,
gate_w,
gate_b,
candidate_w,
candidate_b,
)
np.testing.assert_allclose(api_out, np_out, rtol=0.0001, atol=0)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
from paddle.fluid import framework
from paddle.fluid.contrib.layers import basic_lstm
from paddle.fluid.executor import Executor
np.set_seed(123)
SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0
def sigmoid(x):
y = np.copy(x)
y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
return 1.0 / (1.0 + np.exp(-y))
def tanh(x):
y = -2.0 * x
y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
return (2.0 / (1.0 + np.exp(y))) - 1.0
def lstm_np(
input,
init_h,
init_c,
hidden_size,
gate_weight,
gate_bias,
num_layers=1,
batch_first=False,
is_bidirect=False,
sequence_length=None,
forget_bias=1.0,
):
def step(step_in, pre_hidden, pre_cell, gate_w, gate_b):
concat_1 = np.concatenate([step_in, pre_hidden], 1)
gate_input = np.matmul(concat_1, gate_w)
gate_input += gate_b
i, j, f, o = np.split(gate_input, indices_or_sections=4, axis=1)
new_cell = pre_cell * sigmoid(f + forget_bias) + sigmoid(i) * tanh(j)
new_hidden = tanh(new_cell) * sigmoid(o)
return new_hidden, new_cell
mask = None
if batch_first:
input = np.tranpose(input, [1, 0, 2])
if mask is not None:
mask = np.transpose(mask, [1, 0])
batch_size = input.shape[1]
if sequence_length is not None:
max_seq_len = input.shape[0]
mask = np.zeros([batch_size, max_seq_len])
for i, len in enumerate(sequence_length):
mask[i, :len] = 1.0
mask = np.transpose(mask, [1, 0])
direc_num = 1
if is_bidirect:
direc_num = 2
if init_h:
init_h = np.reshape(init_h, [num_layers, direc_num, -1, hidden_size])
init_c = np.reshape(init_c, [num_layers, direc_num, -1, hidden_size])
else:
init_h = np.zeros([num_layers, direc_num, batch_size, hidden_size])
init_c = np.zeros([num_layers, direc_num, batch_size, hidden_size])
def get_single_direction_output(rnn_input, mask=None, direc_index=0):
seq_len = rnn_input.shape[0]
output = []
# init pre hidden
pre_hidden_array = []
pre_cell_array = []
for i in range(num_layers):
pre_hidden_array.append(init_h[i, direc_index])
pre_cell_array.append(init_c[i, direc_index])
for i in range(seq_len):
step_input = rnn_input[i]
if mask is not None:
step_mask = mask[i]
step_mask = np.reshape(step_mask, [-1, 1])
# print("np mask", step_mask.shape )
for i in range(num_layers):
new_hidden, new_cell = step(
step_input,
pre_hidden_array[i],
pre_cell_array[i],
gate_weight[direc_index * num_layers + i],
gate_bias[direc_index * num_layers + i],
)
if mask is not None:
new_hidden = np.multiply(
new_hidden, step_mask
) - np.multiply(pre_hidden_array[i], (step_mask - 1.0))
# new_hidden = new_hidden * step_mask - pre_hidden_array[i] * ( step_mask -1 )
# new_cell = new_cell * step_mask - pre_cell_array[i] * (step_mask -1)
new_cell = np.multiply(new_cell, step_mask) - np.multiply(
pre_cell_array[i], (step_mask - 1.0)
)
pre_hidden_array[i] = new_hidden
pre_cell_array[i] = new_cell
step_input = new_hidden
output.append(step_input)
rnn_out = np.concatenate(output, 0)
rnn_out = np.reshape(rnn_out, [seq_len, -1, hidden_size])
last_hidden_out = np.concatenate(pre_hidden_array, 0)
last_hidden_out = np.reshape(
last_hidden_out, [num_layers, -1, hidden_size]
)
last_cell_out = np.concatenate(pre_cell_array, 0)
last_cell_out = np.reshape(last_cell_out, [num_layers, -1, hidden_size])
return rnn_out, last_hidden_out, last_cell_out
fw_rnn_out, fw_last_hidden, fw_last_cell = get_single_direction_output(
input, mask, direc_index=0
)
if is_bidirect:
bw_input = input[::-1]
bw_mask = None
if mask is not None:
bw_mask = mask[::-1]
bw_rnn_out, bw_last_hidden, bw_last_cell = get_single_direction_output(
bw_input, bw_mask, direc_index=1
)
bw_rnn_out = bw_rnn_out[::-1]
rnn_out = np.concatenate([fw_rnn_out, bw_rnn_out], 2)
last_hidden = np.concatenate([fw_last_hidden, bw_last_hidden], 1)
last_hidden = np.reshape(
last_hidden, [num_layers * direc_num, -1, hidden_size]
)
last_cell = np.concatenate([fw_last_cell, bw_last_cell], 1)
last_cell = np.reshape(
last_cell, [num_layers * direc_num, -1, hidden_size]
)
if batch_first:
rnn_out = np.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden, last_cell
else:
rnn_out = fw_rnn_out
last_hidden = fw_last_hidden
last_cell = fw_last_cell
if batch_first:
rnn_out = np.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden, last_cell
class TestBasicLSTMApi(unittest.TestCase):
def setUp(self):
self.hidden_size = 10
self.batch_size = 5
self.seq_len = 6
self.num_layers = 2
self.is_bidirect = True
self.batch_first = False
self.forget_bias = 1.0
def test_run(self):
x = layers.data(
name='x',
shape=[-1, self.batch_size, self.hidden_size],
dtype='float32',
)
sequence_length = layers.data(
name="sequence_length", shape=[-1], dtype='float32'
)
rnn_out, last_hidden, last_cell = basic_lstm(
x,
None,
None,
self.hidden_size,
num_layers=self.num_layers,
batch_first=self.batch_first,
bidirectional=self.is_bidirect,
sequence_length=sequence_length,
forget_bias=self.forget_bias,
)
last_hidden.persisbale = True
rnn_out.persisbale = True
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
param_list = fluid.default_main_program().block(0).all_parameters()
# process weight and bias
gate_weight = []
gate_bias = []
for i in range(self.num_layers):
gate_w_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.w_0"
gate_b_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.b_0"
gate_w = np.array(
fluid.global_scope().find_var(gate_w_name).get_tensor()
)
gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_w_name).get_tensor().set(
gate_w, place
)
gate_b = np.array(
fluid.global_scope().find_var(gate_b_name).get_tensor()
)
gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_b_name).get_tensor().set(
gate_b, place
)
gate_weight.append(gate_w)
gate_bias.append(gate_b)
if self.is_bidirect:
for i in range(self.num_layers):
gate_w_name = (
"basic_lstm_reverse_layers_"
+ str(i)
+ "/BasicLSTMUnit_0.w_0"
)
gate_b_name = (
"basic_lstm_reverse_layers_"
+ str(i)
+ "/BasicLSTMUnit_0.b_0"
)
gate_w = np.array(
fluid.global_scope().find_var(gate_w_name).get_tensor()
)
gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_w_name).get_tensor().set(
gate_w, place
)
gate_b = np.array(
fluid.global_scope().find_var(gate_b_name).get_tensor()
)
gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_b_name).get_tensor().set(
gate_b, place
)
gate_weight.append(gate_w)
gate_bias.append(gate_b)
step_input_np = np.random.uniform(
-0.1, 0.1, (self.seq_len, self.batch_size, self.hidden_size)
).astype('float32')
sequence_length_np = np.random.randint(
self.seq_len // 2, self.seq_len, size=(self.batch_size)
).astype('int64')
out = exe.run(
feed={'x': step_input_np, 'sequence_length': sequence_length_np},
fetch_list=[rnn_out, last_hidden, last_cell],
)
api_rnn_out = out[0]
api_last_hidden = out[1]
api_last_cell = out[2]
np_out = lstm_np(
step_input_np,
None,
None,
self.hidden_size,
gate_weight,
gate_bias,
num_layers=self.num_layers,
batch_first=self.batch_first,
is_bidirect=self.is_bidirect,
sequence_length=sequence_length_np,
)
np.testing.assert_allclose(api_rnn_out, np_out[0], rtol=0.0001, atol=0)
np.testing.assert_allclose(
api_last_hidden, np_out[1], rtol=0.0001, atol=0
)
np.testing.assert_allclose(
api_last_cell, np_out[2], rtol=0.0001, atol=0
)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
from paddle.fluid import framework
from paddle.fluid.contrib.layers import BasicLSTMUnit
from paddle.fluid.executor import Executor
np.set_seed(123)
SIGMOID_THRESHOLD_MIN = -40.0
SIGMOID_THRESHOLD_MAX = 13.0
EXP_MAX_INPUT = 40.0
def sigmoid(x):
y = np.copy(x)
y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN
y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX
return 1.0 / (1.0 + np.exp(-y))
def tanh(x):
y = -2.0 * x
y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT
return (2.0 / (1.0 + np.exp(y))) - 1.0
def step(step_in, pre_hidden, pre_cell, gate_w, gate_b, forget_bias=1.0):
concat_1 = np.concatenate([step_in, pre_hidden], 1)
gate_input = np.matmul(concat_1, gate_w)
gate_input += gate_b
i, j, f, o = np.split(gate_input, indices_or_sections=4, axis=1)
new_cell = pre_cell * sigmoid(f + forget_bias) + sigmoid(i) * tanh(j)
new_hidden = tanh(new_cell) * sigmoid(o)
return new_hidden, new_cell
class TestBasicGRUUnit(unittest.TestCase):
def setUp(self):
self.hidden_size = 5
self.batch_size = 5
def test_run(self):
x = layers.data(name='x', shape=[-1, self.hidden_size], dtype='float32')
pre_hidden = layers.data(
name="pre_hidden", shape=[-1, self.hidden_size], dtype='float32'
)
pre_cell = layers.data(
name="pre_cell", shape=[-1, self.hidden_size], dtype='float32'
)
lstm_unit = BasicLSTMUnit("lstm_unit", self.hidden_size)
new_hidden, new_cell = lstm_unit(x, pre_hidden, pre_cell)
new_hidden.persisbale = True
new_cell.persisbale = True
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
param_list = fluid.default_main_program().block(0).all_parameters()
# process weight and bias
gate_w_name = "lstm_unit/BasicLSTMUnit_0.w_0"
gate_b_name = "lstm_unit/BasicLSTMUnit_0.b_0"
gate_w = np.array(
fluid.global_scope().find_var(gate_w_name).get_tensor()
)
gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_w_name).get_tensor().set(
gate_w, place
)
gate_b = np.array(
fluid.global_scope().find_var(gate_b_name).get_tensor()
)
gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype(
'float32'
)
fluid.global_scope().find_var(gate_b_name).get_tensor().set(
gate_b, place
)
step_input_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
pre_hidden_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
pre_cell_np = np.random.uniform(
-0.1, 0.1, (self.batch_size, self.hidden_size)
).astype('float32')
out = exe.run(
feed={
'x': step_input_np,
'pre_hidden': pre_hidden_np,
'pre_cell': pre_cell_np,
},
fetch_list=[new_hidden, new_cell],
)
api_hidden_out = out[0]
api_cell_out = out[1]
np_hidden_out, np_cell_out = step(
step_input_np, pre_hidden_np, pre_cell_np, gate_w, gate_b
)
np.testing.assert_allclose(
api_hidden_out, np_hidden_out, rtol=0.0001, atol=0
)
np.testing.assert_allclose(
api_cell_out, np_cell_out, rtol=0.0001, atol=0
)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from test_imperative_base import new_program_scope
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.contrib.layers import basic_gru, basic_lstm
class TestBasicGRUApiName(unittest.TestCase):
def setUp(self):
self.name_set = set(
[
"test1_fw_w_0_gate",
"test1_fw_w_0_candidate",
"test1_fw_b_0_gate",
"test1_fw_b_0_candidate",
"test1_bw_w_0_gate",
"test1_bw_w_0_candidate",
"test1_bw_b_0_gate",
"test1_bw_b_0_candidate",
]
)
def test_name(self):
batch_size = 20
input_size = 128
hidden_size = 256
num_layers = 1
dropout = 0.5
bidirectional = True
batch_first = False
with new_program_scope():
input = layers.data(
name="input",
shape=[-1, batch_size, input_size],
dtype='float32',
)
pre_hidden = layers.data(
name="pre_hidden", shape=[-1, hidden_size], dtype='float32'
)
sequence_length = layers.data(
name="sequence_length", shape=[-1], dtype='int32'
)
rnn_out, last_hidden = basic_gru(
input,
pre_hidden,
hidden_size,
num_layers=num_layers,
sequence_length=sequence_length,
dropout_prob=dropout,
bidirectional=bidirectional,
batch_first=batch_first,
param_attr=fluid.ParamAttr(name="test1"),
bias_attr=fluid.ParamAttr(name="test1"),
name="basic_gru",
)
var_list = fluid.io.get_program_parameter(
fluid.default_main_program()
)
for var in var_list:
self.assertTrue(var.name in self.name_set)
class TestBasicLSTMApiName(unittest.TestCase):
def setUp(self):
self.name_set = set(
[
"test1_fw_w_0",
"test1_fw_b_0",
"test1_fw_w_1",
"test1_fw_b_1",
"test1_bw_w_0",
"test1_bw_b_0",
"test1_bw_w_1",
"test1_bw_b_1",
]
)
def test_name(self):
batch_size = 20
input_size = 128
hidden_size = 256
num_layers = 2
dropout = 0.5
bidirectional = True
batch_first = False
with new_program_scope():
input = layers.data(
name="input",
shape=[-1, batch_size, input_size],
dtype='float32',
)
pre_hidden = layers.data(
name="pre_hidden", shape=[-1, hidden_size], dtype='float32'
)
pre_cell = layers.data(
name="pre_cell", shape=[-1, hidden_size], dtype='float32'
)
sequence_length = layers.data(
name="sequence_length", shape=[-1], dtype='int32'
)
rnn_out, last_hidden, last_cell = basic_lstm(
input,
pre_hidden,
pre_cell,
hidden_size,
num_layers=num_layers,
sequence_length=sequence_length,
dropout_prob=dropout,
bidirectional=bidirectional,
param_attr=fluid.ParamAttr(name="test1"),
bias_attr=fluid.ParamAttr(name="test1"),
batch_first=batch_first,
)
var_list = fluid.io.get_program_parameter(
fluid.default_main_program()
)
for var in var_list:
self.assertTrue(var.name in self.name_set)
if __name__ == '__main__':
unittest.main()
...@@ -20,8 +20,6 @@ import numpy as np ...@@ -20,8 +20,6 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid import ParamAttr
from paddle.fluid.contrib.layers import basic_lstm
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.fluid.layers.control_flow import StaticRNN as PaddingRNN from paddle.fluid.layers.control_flow import StaticRNN as PaddingRNN
...@@ -85,7 +83,7 @@ class RNNConfig: ...@@ -85,7 +83,7 @@ class RNNConfig:
else: else:
raise ValueError('Unsupported model_type.') raise ValueError('Unsupported model_type.')
if rnn_model not in ('static', 'padding', 'cudnn', 'basic_lstm'): if rnn_model not in ('static', 'padding', 'cudnn'):
raise ValueError('Unsupported rnn_model.') raise ValueError('Unsupported rnn_model.')
self.batch_size = 12 self.batch_size = 12
...@@ -406,23 +404,6 @@ def lm_model( ...@@ -406,23 +404,6 @@ def lm_model(
init_hidden=init_hidden_reshape, init_hidden=init_hidden_reshape,
init_cell=init_cell_reshape, init_cell=init_cell_reshape,
) )
elif rnn_model == "basic_lstm":
rnn_out, last_hidden, last_cell = basic_lstm(
x_emb,
init_hidden,
init_cell,
hidden_size,
num_layers=num_layers,
batch_first=True,
dropout_prob=dropout,
param_attr=ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale
)
),
bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0)),
forget_bias=0.0,
)
else: else:
print("type not support") print("type not support")
return return
......
...@@ -2329,33 +2329,6 @@ class TestBook(LayerTest): ...@@ -2329,33 +2329,6 @@ class TestBook(LayerTest):
) )
return output return output
def test_basic_gru(self):
input_size = 128
hidden_size = 256
with self.static_graph():
input = fluid.data(
name="input", shape=[None, None, input_size], dtype='float32'
)
pre_hidden = fluid.data(
name="pre_hidden", shape=[None, hidden_size], dtype='float32'
)
sequence_length = fluid.data(
name="sequence_length", shape=[None], dtype='int32'
)
for bidirectional in [True, False]:
for batch_first in [True, False]:
rnn_out, last_hidden = fluid.contrib.layers.basic_gru(
input,
pre_hidden,
hidden_size=256,
num_layers=2,
sequence_length=sequence_length,
dropout_prob=0.5,
bidirectional=bidirectional,
batch_first=batch_first,
)
class ExampleNet(paddle.nn.Layer): class ExampleNet(paddle.nn.Layer):
def __init__(self): def __init__(self):
......
...@@ -274,7 +274,6 @@ HIGH_PARALLEL_JOB_NEW = [ ...@@ -274,7 +274,6 @@ HIGH_PARALLEL_JOB_NEW = [
'test_mkldnn_op_nhwc', 'test_mkldnn_op_nhwc',
'test_fc_act_mkldnn_fuse_pass', 'test_fc_act_mkldnn_fuse_pass',
'test_fleet_base_3', 'test_fleet_base_3',
'test_basic_rnn_name',
'test_query_op', 'test_query_op',
'test_fleet_base_4', 'test_fleet_base_4',
'save_load_op_test', 'save_load_op_test',
...@@ -1980,7 +1979,6 @@ CPU_PARALLEL_JOB = [ ...@@ -1980,7 +1979,6 @@ CPU_PARALLEL_JOB = [
'test_beam_search_op', 'test_beam_search_op',
'test_batch_sampler', 'test_batch_sampler',
'test_batch_norm_act_fuse_pass', 'test_batch_norm_act_fuse_pass',
'test_basic_rnn_name',
'test_attention_lstm_op', 'test_attention_lstm_op',
'test_analyzer', 'test_analyzer',
'test_aligned_allocator', 'test_aligned_allocator',
......
...@@ -71,7 +71,6 @@ STATIC_MODE_TESTING_LIST = [ ...@@ -71,7 +71,6 @@ STATIC_MODE_TESTING_LIST = [
'test_auc_single_pred_op', 'test_auc_single_pred_op',
'test_avoid_twice_initialization', 'test_avoid_twice_initialization',
'test_backward', 'test_backward',
'test_basic_rnn_name',
'test_batch_norm_op', 'test_batch_norm_op',
'test_batch_norm_op_v2', 'test_batch_norm_op_v2',
'test_bce_loss', 'test_bce_loss',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册