提交 8ba8237a 编写于 作者: S sweetsky0901

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into my_unpool_max_2d

...@@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(sigmoid) .SetDefault(sigmoid)
.InEnum({identity, sigmoid, tanh, relu}); .InEnum({identity, sigmoid, tanh, relu});
AddComment(R"DOC( AddComment(R"DOC(
GRUUnit Operator. GRUUnit Operator implements partial calculations of the GRU unit as following:
This operator implements partial calculations of the GRU unit as follows:
$$ $$
update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\ update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\ reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\
output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\ output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev}) output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
$$ $$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. which is same as one time step of GRU Operator.
@note To implement the complete GRU unit, fully-connected operator must be
used before to feed xu, xr and xc as the Input of GRUUnit operator.
)DOC"); )DOC");
} }
...@@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { ...@@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
"ResetHiddenPrev"); "ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Hidden"), PADDLE_ENFORCE(ctx->HasInput("Hidden"),
"Input(%s) of GRUUnitGradOp should not be null.", "Hidden"); "Input(%s) of GRUUnitGradOp should not be null.", "Hidden");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Gate");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.", "Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Hidden"); "Hidden");
......
...@@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel<T> { ...@@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
auto c = g.slice(c_offsets, extents); // output candidate auto c = g.slice(c_offsets, extents); // output candidate
// calculate final output // calculate final output
h.device(place) = u * (h_p - c) + c; h.device(place) = u * (c - h_p) + h_p;
} }
}; };
...@@ -146,35 +146,27 @@ class GRUUnitGradKernel : public framework::OpKernel<T> { ...@@ -146,35 +146,27 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
auto* weight_grad = auto* weight_grad =
context.Output<Tensor>(framework::GradVarName("Weight")); context.Output<Tensor>(framework::GradVarName("Weight"));
auto* bias_grad = context.Output<Tensor>(framework::GradVarName("Bias")); auto* bias_grad = context.Output<Tensor>(framework::GradVarName("Bias"));
input_grad->mutable_data<T>(context.GetPlace());
hidden_prev_grad->mutable_data<T>(context.GetPlace());
weight_grad->mutable_data<T>(context.GetPlace());
Tensor gate_grad; Tensor gate_grad;
gate_grad.mutable_data<T>(input->dims(), context.GetPlace());
Tensor reset_hidden_prev_grad; Tensor reset_hidden_prev_grad;
reset_hidden_prev_grad.mutable_data<T>(reset_hidden_prev->dims(),
context.GetPlace());
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
const T* hidden_prev_data = hidden_prev->data<T>(); const T* hidden_prev_data = hidden_prev->data<T>();
T* hidden_prev_grad_data = hidden_prev_grad->data<T>();
const T* weight_data = weight->data<T>(); const T* weight_data = weight->data<T>();
T* weight_grad_data = weight_grad->data<T>(); T* gate_grad_data =
T* gate_grad_data = gate_grad.data<T>(); gate_grad.mutable_data<T>(input->dims(), context.GetPlace());
const T* reset_hidden_prev_data = reset_hidden_prev->data<T>(); const T* reset_hidden_prev_data = reset_hidden_prev->data<T>();
T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data<T>(); T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.mutable_data<T>(
reset_hidden_prev->dims(), context.GetPlace());
auto h_p = EigenMatrix<T>::From(*hidden_prev); auto h_p = EigenMatrix<T>::From(*hidden_prev);
auto g = EigenMatrix<T>::From(*gate); auto g = EigenMatrix<T>::From(*gate);
auto d_h = EigenMatrix<T>::From(*hidden_grad); auto d_h = EigenMatrix<T>::From(*hidden_grad);
auto d_x = EigenMatrix<T>::From(*input_grad);
auto d_h_p = EigenMatrix<T>::From(*hidden_prev_grad);
auto d_g = EigenMatrix<T>::From(gate_grad); auto d_g = EigenMatrix<T>::From(gate_grad);
auto d_r_h_p = EigenMatrix<T>::From(reset_hidden_prev_grad); auto d_r_h_p = EigenMatrix<T>::From(reset_hidden_prev_grad);
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
Eigen::array<int, 2> extents({{batch_size, frame_size}}); Eigen::array<int, 2> extents({{batch_size, frame_size}});
Eigen::array<int, 2> u_offsets({{0, 0}}); Eigen::array<int, 2> u_offsets({{0, 0}});
auto u = g.slice(u_offsets, extents); // update gate auto u = g.slice(u_offsets, extents); // update gate
...@@ -185,38 +177,52 @@ class GRUUnitGradKernel : public framework::OpKernel<T> { ...@@ -185,38 +177,52 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
// backward for unactivated update gate // backward for unactivated update gate
ActGradCompute(context.Attr<int>("gate_activation"), place, u, u, ActGradCompute(context.Attr<int>("gate_activation"), place, u, u,
d_g.slice(u_offsets, extents), d_h * (h_p - c)); d_g.slice(u_offsets, extents), d_h * (c - h_p));
// backward for unactivated output candidate // backward for unactivated output candidate
ActGradCompute(context.Attr<int>("activation"), place, c, c, ActGradCompute(context.Attr<int>("activation"), place, c, c,
d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u)); d_g.slice(c_offsets, extents), d_h * u);
// backward for reset_hidden_prev // backward for reset_hidden_prev
math::gemm<Place, T>(context.device_context(), false, true, batch_size, math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size, 1, frame_size, frame_size, 1,
gate_grad_data + frame_size * 2, frame_size * 3, gate_grad_data + frame_size * 2, frame_size * 3,
weight_data + frame_size * frame_size * 2, frame_size, weight_data + frame_size * frame_size * 2, frame_size,
0, reset_hidden_prev_grad_data, frame_size); 0, reset_hidden_prev_grad_data, frame_size);
// backward for unactivated reset gate
ActGradCompute(context.Attr<int>("gate_activation"), place, r, r,
d_g.slice(r_offsets, extents), d_r_h_p * h_p);
// backward for weight
if (weight_grad) {
T* weight_grad_data = weight_grad->mutable_data<T>(context.GetPlace());
// backward for state_weight // backward for state_weight
math::gemm<Place, T>( math::gemm<Place, T>(
context.device_context(), true, false, frame_size, frame_size, context.device_context(), true, false, frame_size, frame_size,
batch_size, 1, reset_hidden_prev_data, frame_size, batch_size, 1, reset_hidden_prev_data, frame_size,
gate_grad_data + frame_size * 2, frame_size * 3, 0, gate_grad_data + frame_size * 2, frame_size * 3, 0,
weight_grad_data + frame_size * frame_size * 2, frame_size); weight_grad_data + frame_size * frame_size * 2, frame_size);
// backward for unactivated reset gate
ActGradCompute(context.Attr<int>("gate_activation"), place, r, r,
d_g.slice(r_offsets, extents), d_r_h_p * h_p);
// backward for update_gate_weight and reset_gate_weight // backward for update_gate_weight and reset_gate_weight
math::gemm<Place, T>(context.device_context(), true, false, frame_size, math::gemm<Place, T>(context.device_context(), true, false, frame_size,
frame_size * 2, batch_size, 1, hidden_prev_data, frame_size * 2, batch_size, 1, hidden_prev_data,
frame_size, gate_grad_data, frame_size * 3, 0, frame_size, gate_grad_data, frame_size * 3, 0,
weight_grad_data, frame_size * 2); weight_grad_data, frame_size * 2);
}
// backward for hidden_prev // backward for hidden_prev
d_h_p.device(place) = d_r_h_p * r + d_h * u; if (hidden_prev_grad) {
T* hidden_prev_grad_data =
hidden_prev_grad->mutable_data<T>(context.GetPlace());
auto d_h_p = EigenMatrix<T>::From(*hidden_prev_grad);
d_h_p.device(place) = d_r_h_p * r + d_h * (u.constant(T(1)) - u);
math::gemm<Place, T>(context.device_context(), false, true, batch_size, math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size * 2, 1, gate_grad_data, frame_size, frame_size * 2, 1, gate_grad_data,
frame_size * 3, weight_data, frame_size * 2, 1, frame_size * 3, weight_data, frame_size * 2, 1,
hidden_prev_grad_data, frame_size); hidden_prev_grad_data, frame_size);
}
// backward for input // backward for input
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
auto d_x = EigenMatrix<T>::From(*input_grad);
d_x.device(place) = d_g; d_x.device(place) = d_g;
}
// backward for bias // backward for bias
if (bias_grad) { if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace()); bias_grad->mutable_data<T>(context.GetPlace());
......
...@@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
ll -= std::log(sum); ll -= std::log(sum);
// Now ll is equal to -log(Z). // Now ll is equal to -log(Z).
const int* lbl = label.data<int>(); const int64_t* lbl = label.data<int64_t>();
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
static_cast<size_t>(*std::max_element(lbl, lbl + seq_length)), tag_num, static_cast<size_t>(*std::max_element(lbl, lbl + seq_length)), tag_num,
"An invalid tag label that execesses the largest tag number."); "An invalid tag label that execesses the largest tag number.");
...@@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> { ...@@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
Tensor* emission_grad) const { Tensor* emission_grad) const {
const T* w_exps = transition_exps.data<T>(); const T* w_exps = transition_exps.data<T>();
const T* x_exps = emission_exps.data<T>(); const T* x_exps = emission_exps.data<T>();
const int* label_value = label.data<int>(); const int64_t* label_value = label.data<int64_t>();
T* beta_value = beta->data<T>(); T* beta_value = beta->data<T>();
auto x_dims = emission_exps.dims(); auto x_dims = emission_exps.dims();
......
...@@ -126,7 +126,10 @@ class LayerHelper(object): ...@@ -126,7 +126,10 @@ class LayerHelper(object):
self.startup_program.global_block().create_parameter( self.startup_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr_copy) dtype=dtype, shape=shape, **attr_copy)
return self.main_program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
name=attr_copy['name'], dtype=dtype, shape=shape) name=attr_copy['name'],
dtype=dtype,
shape=shape,
trainable=attr_copy.get('trainable', True))
def create_tmp_variable(self, dtype): def create_tmp_variable(self, dtype):
return self.main_program.current_block().create_var( return self.main_program.current_block().create_var(
......
...@@ -112,6 +112,7 @@ def fc(input, ...@@ -112,6 +112,7 @@ def fc(input,
def embedding(input, def embedding(input,
size, size,
is_sparse=False, is_sparse=False,
param_initializer=None,
param_attr=None, param_attr=None,
data_type='float32', data_type='float32',
main_program=None, main_program=None,
...@@ -136,9 +137,16 @@ def embedding(input, ...@@ -136,9 +137,16 @@ def embedding(input,
to the LayerHelper constructor. to the LayerHelper constructor.
""" """
def _get_default_param_initializer():
return XavierInitializer()
helper = LayerHelper('embedding', **locals()) helper = LayerHelper('embedding', **locals())
w = helper.create_parameter( w = helper.create_parameter(
attr=helper.param_attr, shape=size, dtype=data_type) attr=helper.param_attr,
shape=size,
dtype=data_type,
initializer=param_initializer or _get_default_param_initializer())
tmp = helper.create_tmp_variable(data_type) tmp = helper.create_tmp_variable(data_type)
helper.append_op( helper.append_op(
type='lookup_table', type='lookup_table',
...@@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None): ...@@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None):
return out return out
def linear_chain_crf(input,
label,
param_attr=None,
param_initializer=None,
main_program=None,
startup_program=None):
def _get_default_param_initializer():
return XavierInitializer()
helper = LayerHelper('linear_chain_crf', **locals())
size = input.shape[1]
transition = helper.create_parameter(
attr=helper.param_attr,
shape=[size + 2, size],
dtype=helper.input_dtype(),
initializer=param_initializer or _get_default_param_initializer())
alpha = helper.create_tmp_variable(dtype=helper.input_dtype())
emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
log_likelihood = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
type='linear_chain_crf',
inputs={"Emission": [input],
"Transition": transition,
"Label": label},
outputs={
"Alpha": [alpha],
"EmissionExps": [emission_exps],
"TransitionExps": transition_exps,
"LogLikelihood": log_likelihood
})
return log_likelihood
def assign(input, output, main_program=None, startup_program=None): def assign(input, output, main_program=None, startup_program=None):
helper = LayerHelper('assign', **locals()) helper = LayerHelper('assign', **locals())
helper.append_op( helper.append_op(
......
...@@ -170,7 +170,8 @@ class Optimizer(object): ...@@ -170,7 +170,8 @@ class Optimizer(object):
optimize_ops = [] optimize_ops = []
for param_and_grad in parameters_and_grads: for param_and_grad in parameters_and_grads:
if param_and_grad[1] is not None: if param_and_grad[0].trainable is True and param_and_grad[
1] is not None:
optimize_op = self._append_optimize_op(loss.block, optimize_op = self._append_optimize_op(loss.block,
param_and_grad) param_and_grad)
optimize_ops.append(optimize_op) optimize_ops.append(optimize_op)
......
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor, g_scope
from paddle.v2.fluid.optimizer import SGDOptimizer
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 1e-3
IS_SPARSE = True
PASS_NUM = 10
BATCH_SIZE = 20
embedding_name = 'emb'
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def db_lstm():
# 8 features
word = layers.data(name='word_data', shape=[1], data_type='int64')
predicate = layers.data(name='verb_data', shape=[1], data_type='int64')
ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], data_type='int64')
ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], data_type='int64')
ctx_0 = layers.data(name='ctx_0_data', shape=[1], data_type='int64')
ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], data_type='int64')
ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], data_type='int64')
mark = layers.data(name='mark_data', shape=[1], data_type='int64')
predicate_embedding = layers.embedding(
input=predicate,
size=[pred_len, word_dim],
data_type='float32',
is_sparse=IS_SPARSE,
param_attr={'name': 'vemb'})
mark_embedding = layers.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
data_type='float32',
is_sparse=IS_SPARSE)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
layers.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr={'name': embedding_name,
'trainable': False}) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0_layers = [
layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
]
hidden_0 = layers.sums(input=hidden_0_layers)
lstm_0 = layers.dynamic_lstm(
input=hidden_0,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid')
# stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = layers.sums(input=[
layers.fc(input=input_tmp[0], size=hidden_dim),
layers.fc(input=input_tmp[1], size=hidden_dim)
])
lstm = layers.dynamic_lstm(
input=mix_hidden,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid',
is_reverse=((i % 2) == 1))
input_tmp = [mix_hidden, lstm]
feature_out = layers.sums(input=[
layers.fc(input=input_tmp[0], size=label_dict_len),
layers.fc(input=input_tmp[1], size=label_dict_len)
])
return feature_out
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = core.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def main():
# define network topology
feature_out = db_lstm()
target = layers.data(name='target', shape=[1], data_type='int64')
crf_cost = layers.linear_chain_crf(
input=feature_out,
label=target,
param_attr={"name": 'crfw',
"learning_rate": mix_hidden_lr})
avg_cost = layers.mean(x=crf_cost)
# TODO(qiao)
# 1. add crf_decode_layer and evaluator
# 2. use other optimizer and check why out will be NAN
sgd_optimizer = SGDOptimizer(learning_rate=0.0001)
opts = sgd_optimizer.minimize(avg_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192),
batch_size=BATCH_SIZE)
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
embedding_param = g_scope.find_var(embedding_name).get_tensor()
embedding_param.set(
load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place)
batch_id = 0
for pass_id in xrange(PASS_NUM):
for data in train_data():
word_data = to_lodtensor(map(lambda x: x[0], data), place)
ctx_n2_data = to_lodtensor(map(lambda x: x[1], data), place)
ctx_n1_data = to_lodtensor(map(lambda x: x[2], data), place)
ctx_0_data = to_lodtensor(map(lambda x: x[3], data), place)
ctx_p1_data = to_lodtensor(map(lambda x: x[4], data), place)
ctx_p2_data = to_lodtensor(map(lambda x: x[5], data), place)
verb_data = to_lodtensor(map(lambda x: x[6], data), place)
mark_data = to_lodtensor(map(lambda x: x[7], data), place)
target = to_lodtensor(map(lambda x: x[8], data), place)
outs = exe.run(framework.default_main_program(),
feed={
'word_data': word_data,
'ctx_n2_data': ctx_n2_data,
'ctx_n1_data': ctx_n1_data,
'ctx_0_data': ctx_0_data,
'ctx_p1_data': ctx_p1_data,
'ctx_p2_data': ctx_p2_data,
'verb_data': verb_data,
'mark_data': mark_data,
'target': target
},
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])
if batch_id % 10 == 0:
print("avg_cost=" + str(avg_cost_val))
# exit early for CI
exit(0)
batch_id = batch_id + 1
if __name__ == '__main__':
main()
...@@ -28,8 +28,8 @@ def relu(x): ...@@ -28,8 +28,8 @@ def relu(x):
class TestGRUUnitOp(OpTest): class TestGRUUnitOp(OpTest):
batch_size = 3 batch_size = 5
frame_size = 5 frame_size = 10
activate = { activate = {
GRUActivationType.identity: identity, GRUActivationType.identity: identity,
GRUActivationType.sigmoid: sigmoid, GRUActivationType.sigmoid: sigmoid,
...@@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest): ...@@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest):
c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) +
g[:, frame_size * 2:]) g[:, frame_size * 2:])
g = np.hstack((u_r, c)) g = np.hstack((u_r, c))
h = u * h_p + (1 - u) * c h = u * c + (1 - u) * h_p
self.outputs = { self.outputs = {
'Gate': g.astype('float64'), 'Gate': g.astype('float64'),
'ResetHiddenPrev': r_h_p.astype('float64'), 'ResetHiddenPrev': r_h_p.astype('float64'),
...@@ -92,10 +92,7 @@ class TestGRUUnitOp(OpTest): ...@@ -92,10 +92,7 @@ class TestGRUUnitOp(OpTest):
self.check_output() self.check_output()
def test_check_grad(self): def test_check_grad(self):
self.check_grad( self.check_grad(['Input', 'HiddenPrev', 'Weight'], ['Hidden'])
['Input', 'HiddenPrev', 'Weight'],
['Hidden', 'ResetHiddenPrev', 'Gate'],
max_relative_error=0.007)
class TestGRUUnitOpWithBias(TestGRUUnitOp): class TestGRUUnitOpWithBias(TestGRUUnitOp):
...@@ -104,18 +101,20 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): ...@@ -104,18 +101,20 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
frame_size = self.frame_size frame_size = self.frame_size
super(TestGRUUnitOpWithBias, self).set_inputs() super(TestGRUUnitOpWithBias, self).set_inputs()
self.inputs['Bias'] = np.random.uniform( self.inputs['Bias'] = np.random.uniform(
-0.1, 0.1, (1, frame_size * 3)).astype('float32') -0.1, 0.1, (1, frame_size * 3)).astype('float64')
self.attrs = { self.attrs = {
'activation': GRUActivationType.identity, 'activation': GRUActivationType.identity,
'gate_activation': GRUActivationType.sigmoid 'gate_activation': GRUActivationType.sigmoid
} }
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'])
def test_check_grad_ingore_input(self):
self.check_grad( self.check_grad(
['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'], ['HiddenPrev', 'Weight', 'Bias'], ['Hidden'],
max_relative_error=0.007) no_grad_set=set('Input'))
if __name__ == '__main__': if __name__ == '__main__':
exit(0) # FIXME(yuyang18): This unittest is not pass. Fix it later
unittest.main() unittest.main()
import unittest
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.core as core
import unittest
class TestBook(unittest.TestCase): class TestBook(unittest.TestCase):
...@@ -20,7 +20,8 @@ class TestBook(unittest.TestCase): ...@@ -20,7 +20,8 @@ class TestBook(unittest.TestCase):
avg_cost = layers.mean(x=cost, main_program=program) avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
program.append_backward(avg_cost) program.append_backward(avg_cost)
print str(program)
# print str(program)
def test_recognize_digits_mlp(self): def test_recognize_digits_mlp(self):
program = Program() program = Program()
...@@ -49,7 +50,7 @@ class TestBook(unittest.TestCase): ...@@ -49,7 +50,7 @@ class TestBook(unittest.TestCase):
input=predict, label=label, main_program=program) input=predict, label=label, main_program=program)
avg_cost = layers.mean(x=cost, main_program=program) avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
print str(program) # print str(program)
def test_simple_conv2d(self): def test_simple_conv2d(self):
program = Program() program = Program()
...@@ -64,7 +65,7 @@ class TestBook(unittest.TestCase): ...@@ -64,7 +65,7 @@ class TestBook(unittest.TestCase):
filter_size=[4, 4], filter_size=[4, 4],
main_program=program) main_program=program)
print str(program) # print str(program)
def test_recognize_digits_conv(self): def test_recognize_digits_conv(self):
program = Program() program = Program()
...@@ -103,7 +104,7 @@ class TestBook(unittest.TestCase): ...@@ -103,7 +104,7 @@ class TestBook(unittest.TestCase):
program.append_backward(avg_cost) program.append_backward(avg_cost)
print str(program) # print str(program)
def test_word_embedding(self): def test_word_embedding(self):
program = Program() program = Program()
...@@ -164,7 +165,24 @@ class TestBook(unittest.TestCase): ...@@ -164,7 +165,24 @@ class TestBook(unittest.TestCase):
avg_cost = layers.mean(x=cost, main_program=program) avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
print str(program) # print str(program)
def test_linear_chain_crf(self):
program = Program()
# Change g_program, so the rest layers use `g_program`
images = layers.data(
name='pixel',
shape=[784],
data_type='float32',
main_program=program)
label = layers.data(
name='label', shape=[1], data_type='int32', main_program=program)
hidden = layers.fc(input=images, size=128, main_program=program)
crf = layers.linear_chain_crf(
input=hidden, label=label, main_program=program)
# print str(program)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -104,7 +104,7 @@ class TestLinearChainCrfOp(OpTest): ...@@ -104,7 +104,7 @@ class TestLinearChainCrfOp(OpTest):
transition_exps = np.exp(transition) transition_exps = np.exp(transition)
labels = np.random.randint( labels = np.random.randint(
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32") low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")
self.inputs = { self.inputs = {
"Emission": (emission, lod), "Emission": (emission, lod),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册