提交 680aec21 编写于 作者: W wanghaoshuang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into ctc_evaluator_py

...@@ -514,3 +514,8 @@ l2_normalize ...@@ -514,3 +514,8 @@ l2_normalize
------------ ------------
.. autofunction:: paddle.v2.fluid.layers.l2_normalize .. autofunction:: paddle.v2.fluid.layers.l2_normalize
:noindex: :noindex:
sequence_reshape
----------------
.. autofunction:: paddle.v2.fluid.layers.sequence_reshape
:noindex:
...@@ -485,9 +485,15 @@ void OperatorWithKernel::Run(const Scope& scope, ...@@ -485,9 +485,15 @@ void OperatorWithKernel::Run(const Scope& scope,
// } // }
auto expected_kernel_key = this->GetExpectedKernelType(ctx); auto expected_kernel_key = this->GetExpectedKernelType(ctx);
VLOG(3) << "expected_kernel_key:" << expected_kernel_key; VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key);
if (kernel_iter == kernels.end()) {
PADDLE_THROW("op %s does not have kernel for %s", type_,
KernelTypeToString(expected_kernel_key));
}
// do data transform
Scope& new_scope = scope.NewScope(); Scope& new_scope = scope.NewScope();
for (auto& var_name_item : this->Inputs()) { for (auto& var_name_item : this->Inputs()) {
...@@ -520,8 +526,6 @@ void OperatorWithKernel::Run(const Scope& scope, ...@@ -520,8 +526,6 @@ void OperatorWithKernel::Run(const Scope& scope,
} }
} }
auto kernel_iter = kernels.find(expected_kernel_key);
auto* new_dev_ctx = pool.Get(expected_kernel_key.place_); auto* new_dev_ctx = pool.Get(expected_kernel_key.place_);
kernel_iter->second->Compute( kernel_iter->second->Compute(
ExecutionContext(*this, new_scope, *new_dev_ctx)); ExecutionContext(*this, new_scope, *new_dev_ctx));
......
...@@ -34,9 +34,7 @@ limitations under the License. */ ...@@ -34,9 +34,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
constexpr int kCondStart = 0; constexpr char kOptimizeBlock[] = "OptimizeBlock";
constexpr int kCondRunning = 1;
constexpr int kCondDone = 2;
void RunServer(std::shared_ptr<detail::AsyncGRPCServer> service) { void RunServer(std::shared_ptr<detail::AsyncGRPCServer> service) {
service->RunSyncUpdate(); service->RunSyncUpdate();
...@@ -99,10 +97,8 @@ class RecvOp : public framework::OperatorBase { ...@@ -99,10 +97,8 @@ class RecvOp : public framework::OperatorBase {
auto fan_in = Attr<int>("Fanin"); auto fan_in = Attr<int>("Fanin");
size_t param_count = param_list.size(); size_t param_count = param_list.size();
std::string program_str = Attr<std::string>("OptimizeProgram"); auto *block = Attr<framework::BlockDesc *>(kOptimizeBlock);
framework::proto::ProgramDesc program_desc; auto *program = block->Program();
program_desc.ParseFromString(program_str);
framework::ProgramDesc program(program_desc);
framework::Executor executor(dev_place); framework::Executor executor(dev_place);
// TODO(typhoonzero): change this to a while_op for every cluster-batch. // TODO(typhoonzero): change this to a while_op for every cluster-batch.
...@@ -142,8 +138,9 @@ class RecvOp : public framework::OperatorBase { ...@@ -142,8 +138,9 @@ class RecvOp : public framework::OperatorBase {
if (exit_flag) { if (exit_flag) {
break; break;
} }
try { try {
executor.Run(program, &recv_scope, 0, /*global_block*/ executor.Run(*program, &recv_scope, block->ID(), /*global_block*/
false /*create_local_scope*/, false /*create_vars*/); false /*create_local_scope*/, false /*create_vars*/);
} catch (std::exception &e) { } catch (std::exception &e) {
LOG(ERROR) << "run sub program error " << e.what(); LOG(ERROR) << "run sub program error " << e.what();
...@@ -175,8 +172,8 @@ This operator will recv tensor from send_op ...@@ -175,8 +172,8 @@ This operator will recv tensor from send_op
"IP address to listen on.") "IP address to listen on.")
.SetDefault("127.0.0.1:6164") .SetDefault("127.0.0.1:6164")
.AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); .AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
AddAttr<std::string>("OptimizeProgram", "type string", AddAttr<framework::BlockDesc *>(
"Serialized ProgramDesc string for recv to run."); kOptimizeBlock, "Serialized ProgramDesc string for recv to run.");
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"ParamList", "type list of string", "ParamList", "type list of string",
"grad->param name mapping to find which param to optimize.") "grad->param name mapping to find which param to optimize.")
......
...@@ -48,7 +48,7 @@ Scale operator ...@@ -48,7 +48,7 @@ Scale operator
$$Out = scale*X$$ $$Out = scale*X$$
)DOC"); )DOC");
AddAttr<AttrType>("scale", AddAttr<AttrType>("scale",
"(float, default 0)" "(float, default 1.0)"
"The scaling factor of the scale operator.") "The scaling factor of the scale operator.")
.SetDefault(1.0); .SetDefault(1.0);
} }
......
...@@ -130,10 +130,7 @@ void StartServerNet(bool is_sparse) { ...@@ -130,10 +130,7 @@ void StartServerNet(bool is_sparse) {
attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
attrs.insert({"ParamList", std::vector<std::string>({"Out"})}); attrs.insert({"ParamList", std::vector<std::string>({"Out"})});
attrs.insert({"GradList", std::vector<std::string>({"x1"})}); attrs.insert({"GradList", std::vector<std::string>({"x1"})});
std::string program_proto; attrs.insert({"OptimizeBlock", block});
PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto));
attrs.insert({"OptimizeProgram", program_proto});
recv_op = f::OpRegistry::CreateOp("recv", {{"RX", {"x1"}}}, {}, attrs); recv_op = f::OpRegistry::CreateOp("recv", {{"RX", {"x1"}}}, {}, attrs);
recv_op->Run(scope, place); recv_op->Run(scope, place);
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sequence_reshape_op.h"
#include "paddle/framework/ddim.h"
namespace paddle {
namespace operators {
class SequenceReshapeOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequenceReshapeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequenceReshapeOp should not be null.");
auto x_dims = ctx->GetInputDim("X");
auto x_numel = product(x_dims);
PADDLE_ENFORCE_EQ(x_dims.size(), 2U, "Rank of Input(X) should be 2.");
int new_dim = ctx->Attrs().Get<int>("new_dim");
ctx->SetOutputDim("Out",
{x_numel / new_dim, static_cast<int64_t>(new_dim)});
}
};
class SequenceReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SequenceReshapeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(LoDTensor, default LoDTensor<float>) A 2-D LoDTensor with shape "
"being [N, M].");
AddOutput("Out",
"(LoDTensor, default LoDTensor<float>) A 2-D LoDTensor with "
"shape [T, new_dim] where T is calculated based on X.lod, M and "
"new_dim.");
AddAttr<int>("new_dim", "Sequence dimension of the output LoDTensor.");
AddComment(R"DOC(
Sequence Reshape Operator.
This operator will rearrange the input sequences. The new dimension is set by
attribute and length of each sequence may change longer or shorter which is
decided by original length, original dimension and new dimension. The following
example will help to illustrate the function of this operator:
x is a LoDTensor:
x.lod = [[0, 2, 6]]
x.data = [[1, 2], [3, 4],
[5, 6], [7, 8], [9, 10], [11, 12]]
x.dims = [6, 2]
set new_dim = 4
then out is a LoDTensor:
out.lod = [[0, 1, 3]]
out.data = [[1, 2, 3, 4],
[5, 6, 7, 8], [9, 10, 11, 12]]
out.dims = [3, 4]
Currently, only 1-level LoDTensor is supported and please make sure (original
length * original dimension) can be divided by new_dim with no remainder for
each sequence.
)DOC");
}
};
class SequenceReshapeGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(
ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) of SequenceReshapeGradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequenceReshapeGradOp should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ framework::GradVarName("X"));
}
};
class SequenceReshapeGradOpMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op_desc_ptr = new framework::OpDesc();
op_desc_ptr->SetType("sequence_reshape_grad");
op_desc_ptr->SetInput("X", Input("X"));
op_desc_ptr->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op_desc_ptr->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op_desc_ptr->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDesc>(op_desc_ptr);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(sequence_reshape, ops::SequenceReshapeOp,
ops::SequenceReshapeOpMaker, ops::SequenceReshapeGradOpMaker);
REGISTER_OPERATOR(sequence_reshape_grad, ops::SequenceReshapeGradOp);
REGISTER_OP_CPU_KERNEL(
sequence_reshape,
ops::SequenceReshapeKernel<paddle::platform::CPUDeviceContext, float>,
ops::SequenceReshapeKernel<paddle::platform::CPUDeviceContext, double>,
ops::SequenceReshapeKernel<paddle::platform::CPUDeviceContext, int>,
ops::SequenceReshapeKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
sequence_reshape_grad,
ops::SequenceReshapeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SequenceReshapeGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::SequenceReshapeGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::SequenceReshapeGradKernel<paddle::platform::CPUDeviceContext, int>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sequence_reshape_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
sequence_reshape,
ops::SequenceReshapeKernel<paddle::platform::CUDADeviceContext, float>,
ops::SequenceReshapeKernel<paddle::platform::CUDADeviceContext, double>,
ops::SequenceReshapeKernel<paddle::platform::CUDADeviceContext, int>,
ops::SequenceReshapeKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
sequence_reshape_grad,
ops::SequenceReshapeGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::SequenceReshapeGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::SequenceReshapeGradKernel<paddle::platform::CUDADeviceContext,
int64_t>,
ops::SequenceReshapeGradKernel<paddle::platform::CUDADeviceContext, int>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
using LoDTensor = framework::LoDTensor;
template <typename DeviceContext, typename T>
class SequenceReshapeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out");
int out_width = context.Attr<int>("new_dim");
auto in_dims = in->dims();
int64_t in_width = in_dims[1];
auto& in_lod = in->lod();
PADDLE_ENFORCE_EQ(in_lod.size(), 1UL,
"Only support one level sequence now.");
PADDLE_ENFORCE_EQ(
in_dims[0], in_lod[0].back(),
"Inconsistent size between X.shape[0] and X.lod()[0].back().");
auto in_lod_l0 = in_lod[0];
int seq_num = in_lod_l0.size() - 1;
if (in_width == out_width) {
out->set_lod(in->lod());
} else {
auto& out_lod = *out->mutable_lod();
out_lod.resize(1);
out_lod[0].resize(seq_num + 1);
out_lod[0][0] = 0;
for (int i = 0; i < seq_num; ++i) {
size_t seq_len = in_lod_l0[i + 1] - in_lod_l0[i];
size_t offset = 0;
offset = (seq_len * in_width) / out_width;
PADDLE_ENFORCE_EQ(offset * out_width, seq_len * in_width,
"Please make sure (sequence_length * dimension) can "
"be divided by new_dim with no remainder for each "
"sequence. The %dth sequence is invalid.",
i + 1);
out_lod[0][i + 1] = out_lod[0][i] + offset;
}
}
framework::Copy(*in, context.GetPlace(), out);
out->Resize({static_cast<int64_t>(out->lod()[0].back()), out_width});
}
};
template <typename DeviceContext, typename T>
class SequenceReshapeGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x_tensor_ptr = context.Input<LoDTensor>("X");
auto* outg_tensor_ptr =
context.Input<LoDTensor>(framework::GradVarName("Out"));
auto* xg_tensor_ptr =
context.Output<LoDTensor>(framework::GradVarName("X"));
xg_tensor_ptr->mutable_data<T>(context.GetPlace());
framework::Copy(*outg_tensor_ptr, context.GetPlace(), xg_tensor_ptr);
xg_tensor_ptr->Resize(x_tensor_ptr->dims());
}
};
} // namespace operators
} // namespace paddle
...@@ -452,7 +452,7 @@ class DistributeTranspiler: ...@@ -452,7 +452,7 @@ class DistributeTranspiler:
}, # grads to recv }, # grads to recv
outputs={}, outputs={},
attrs={ attrs={
"OptimizeProgram": optimize_sub_program.desc, "OptimizeBlock": optimize_sub_program.global_block(),
"endpoint": endpoint, "endpoint": endpoint,
"ParamList": [ "ParamList": [
p.name p.name
......
...@@ -243,7 +243,7 @@ class SimpleDistributeTranspiler: ...@@ -243,7 +243,7 @@ class SimpleDistributeTranspiler:
self.param_grad_map[endpoint]["grads"]}, # grads to recv self.param_grad_map[endpoint]["grads"]}, # grads to recv
outputs={}, outputs={},
attrs={ attrs={
"OptimizeProgram": optimize_sub_program.desc, "OptimizeBlock": optimize_sub_program.global_block(),
"endpoint": endpoint, "endpoint": endpoint,
"ParamList": "ParamList":
[p.name for p in self.param_grad_map[endpoint]["params"]], [p.name for p in self.param_grad_map[endpoint]["params"]],
......
...@@ -28,7 +28,8 @@ __all__ = [ ...@@ -28,7 +28,8 @@ __all__ = [
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand', 'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', 'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
'sequence_first_step', 'sequence_last_step', 'dropout', 'split', 'sequence_first_step', 'sequence_last_step', 'dropout', 'split',
'ctc_greedy_decoder', 'edit_distance', 'l2_normalize', 'matmul', 'warpctc' 'ctc_greedy_decoder', 'edit_distance', 'l2_normalize', 'matmul', 'warpctc',
'sequence_reshape'
] ]
...@@ -213,33 +214,33 @@ def dynamic_lstm(input, ...@@ -213,33 +214,33 @@ def dynamic_lstm(input,
(https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows: (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
.. math:: .. math::
i_t & = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
f_t & = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) i_t & = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
\\tilde{c_t} & = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) f_t & = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f)
o_t & = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) \\tilde{c_t} & = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} o_t & = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o)
c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
h_t & = o_t \odot act_h(c_t) h_t & = o_t \odot act_h(c_t)
where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is
the matrix of weights from the input gate to the input), :math:`W_{ic}, \ the matrix of weights from the input gate to the input), :math:`W_{ic}, \
W_{fc}, W_{oc}` are diagonal weight matrices for peephole connections. In W_{fc}, W_{oc}` are diagonal weight matrices for peephole connections. In
our implementation, we use vectors to reprenset these diagonal weight our implementation, we use vectors to reprenset these diagonal weight
matrices. The :math:`b` terms denote bias vectors (:math:`b_i` is the input matrices. The :math:`b` terms denote bias vectors (:math:`b_i` is the input
gate bias vector), :math:`\sigma` is the non-line activations, such as gate bias vector), :math:`\sigma` is the non-line activations, such as
logistic sigmoid function, and :math:`i, f, o` and :math:`c` are the input logistic sigmoid function, and :math:`i, f, o` and :math:`c` are the input
gate, forget gate, output gate, and cell activation vectors, respectively, gate, forget gate, output gate, and cell activation vectors, respectively,
all of which have the same size as the cell output activation vector :math:`h`. all of which have the same size as the cell output activation vector :math:`h`.
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g` The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
and :math:`act_h` are the cell input and cell output activation functions and :math:`act_h` are the cell input and cell output activation functions
and `tanh` is usually used for them. :math:`\\tilde{c_t}` is also called and `tanh` is usually used for them. :math:`\\tilde{c_t}` is also called
candidate hidden state, which is computed based on the current input and candidate hidden state, which is computed based on the current input and
the previous hidden state. the previous hidden state.
Set `use_peepholes` to `False` to disable peephole connection. The formula Set `use_peepholes` to `False` to disable peephole connection. The formula
...@@ -251,38 +252,38 @@ def dynamic_lstm(input, ...@@ -251,38 +252,38 @@ def dynamic_lstm(input,
Users can choose to use fully-connect layer before LSTM layer. Users can choose to use fully-connect layer before LSTM layer.
Args: Args:
input(Variable): The input of dynamic_lstm layer, which supports input(Variable): The input of dynamic_lstm layer, which supports
variable-time length input sequence. The underlying variable-time length input sequence. The underlying
tensor in this Variable is a matrix with shape tensor in this Variable is a matrix with shape
(T X 4D), where T is the total time steps in this (T X 4D), where T is the total time steps in this
mini-batch, D is the hidden size. mini-batch, D is the hidden size.
size(int): 4 * hidden size. size(int): 4 * hidden size.
param_attr(ParamAttr): The parameter attribute for the learnable param_attr(ParamAttr): The parameter attribute for the learnable
hidden-hidden weights. hidden-hidden weights.
- The shape is (D x 4D), where D is the hidden - The shape is (D x 4D), where D is the hidden
size. size.
- Weights = {:math:`W_{ch}, W_{ih}, \ - Weights = {:math:`W_{ch}, W_{ih}, \
W_{fh}, W_{oh}`} W_{fh}, W_{oh}`}
bias_attr(ParamAttr): The bias attribute for the learnable bias bias_attr(ParamAttr): The bias attribute for the learnable bias
weights, which contains two parts, input-hidden weights, which contains two parts, input-hidden
bias weights and peephole connections weights if bias weights and peephole connections weights if
setting `use_peepholes` to `True`. setting `use_peepholes` to `True`.
1. `use_peepholes = False` 1. `use_peepholes = False`
- The shape is (1 x 4D). - The shape is (1 x 4D).
- Biases = {:math:`b_c, b_i, b_f, b_o`}. - Biases = {:math:`b_c, b_i, b_f, b_o`}.
2. `use_peepholes = True` 2. `use_peepholes = True`
- The shape is (1 x 7D). - The shape is (1 x 7D).
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
W_{fc}, W_{oc}`}. W_{fc}, W_{oc}`}.
use_peepholes(bool): Whether to enable diagonal/peephole connections, use_peepholes(bool): Whether to enable diagonal/peephole connections,
default `True`. default `True`.
is_reverse(bool): Whether to compute reversed LSTM, default `False`. is_reverse(bool): Whether to compute reversed LSTM, default `False`.
gate_activation(str): The activation for input gate, forget gate and gate_activation(str): The activation for input gate, forget gate and
output gate. Choices = ["sigmoid", "tanh", "relu", output gate. Choices = ["sigmoid", "tanh", "relu",
"identity"], default "sigmoid". "identity"], default "sigmoid".
cell_activation(str): The activation for cell output. Choices = ["sigmoid", cell_activation(str): The activation for cell output. Choices = ["sigmoid",
"tanh", "relu", "identity"], default "tanh". "tanh", "relu", "identity"], default "tanh".
candidate_activation(str): The activation for candidate hidden state. candidate_activation(str): The activation for candidate hidden state.
Choices = ["sigmoid", "tanh", "relu", "identity"], Choices = ["sigmoid", "tanh", "relu", "identity"],
...@@ -2027,3 +2028,57 @@ def warpctc(input, label, blank=0, norm_by_times=False, **kwargs): ...@@ -2027,3 +2028,57 @@ def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
attrs={'blank': blank, attrs={'blank': blank,
'norm_by_times': norm_by_times}) 'norm_by_times': norm_by_times})
return loss_out return loss_out
def sequence_reshape(input, new_dim):
"""
**Sequence Reshape Layer**
This layer will rearrange the input sequences. The new dimension is set by
user. Length of each sequence is computed according to original length,
original dimension and new dimension. The following example will help to
illustrate the function of this layer:
.. code-block:: text
x is a LoDTensor:
x.lod = [[0, 2, 6]]
x.data = [[1, 2], [3, 4],
[5, 6], [7, 8], [9, 10], [11, 12]]
x.dims = [6, 2]
set new_dim = 4
then out is a LoDTensor:
out.lod = [[0, 1, 3]]
out.data = [[1, 2, 3, 4],
[5, 6, 7, 8], [9, 10, 11, 12]]
out.dims = [3, 4]
Currently, only 1-level LoDTensor is supported and please make sure
(original length * original dimension) can be divided by new dimension with
no remainder for each sequence.
Args:
input (Variable): (LodTensor, default: LoDTensor<float>), a 2-D LoDTensor
with shape being [N, M] where M for dimension.
new_dim (int): New dimension which the input LoDTensor is reshaped to.
Returns:
Variable: Reshaped LoDTensor according to new dimension.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[5, 20],
dtype='float32', lod_level=1)
x_reshaped = layers.sequence_reshape(input=x, new_dim=10)
"""
helper = LayerHelper('sequence_reshape', **locals())
out = helper.create_tmp_variable(helper.input_dtype())
helper.append_op(
type='sequence_reshape',
inputs={'X': [input]},
outputs={'Out': [out]},
attrs={'new_dim': new_dim})
return out
#Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import print_function
import sys
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import os
import sys
TRAINERS = 5
BATCH_SIZE = 128
PASS_NUM = 100
def resnet_cifar10(input, depth=32):
def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=False)
return fluid.layers.batch_norm(input=tmp, act=act)
def shortcut(input, ch_in, ch_out, stride):
if ch_in != ch_out:
return conv_bn_layer(input, ch_out, 1, stride, 0, None)
else:
return input
def basicblock(input, ch_in, ch_out, stride):
tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None)
short = shortcut(input, ch_in, ch_out, stride)
return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
def layer_warp(block_func, input, ch_in, ch_out, count, stride):
tmp = block_func(input, ch_in, ch_out, stride)
for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
conv1 = conv_bn_layer(
input=input, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
pool = fluid.layers.pool2d(
input=res3, pool_size=8, pool_type='avg', pool_stride=1)
return pool
def vgg16_bn_drop(input):
def conv_block(input, num_filter, groups, dropouts):
return fluid.nets.img_conv_group(
input=input,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act='relu',
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type='max')
conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
fc1 = fluid.layers.fc(input=drop, size=512, act=None)
bn = fluid.layers.batch_norm(input=fc1, act='relu')
drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
return fc2
classdim = 10
data_shape = [3, 32, 32]
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
net_type = "vgg"
if len(sys.argv) >= 2:
net_type = sys.argv[1]
if net_type == "vgg":
print("train vgg net")
net = vgg16_bn_drop(images)
elif net_type == "resnet":
print("train resnet")
net = resnet_cifar10(images, 32)
else:
raise ValueError("%s network is not supported" % net_type)
predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimize_ops, params_grads = optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
t.transpile(
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=TRAINERS)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
print("start pserver at:", current_endpoint)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
print("pserver run end")
elif training_role == "TRAINER":
print("start trainer")
trainer_prog = t.get_trainer_program()
feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
accuracy.reset(exe)
for data in train_reader():
loss, acc = exe.run(trainer_prog,
feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe)
print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(
pass_acc))
# this model is slow, so if we can train two mini batch, we think it works properly.
print("trainer run end")
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
exit(1)
...@@ -216,6 +216,14 @@ class TestBook(unittest.TestCase): ...@@ -216,6 +216,14 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(x) self.assertIsNotNone(x)
print(str(program)) print(str(program))
def test_sequence_reshape(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[8], dtype='float32', lod_level=1)
out = layers.sequence_reshape(input=x, new_dim=16)
self.assertIsNotNone(out)
print(str(program))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import unittest
import numpy as np
import math
from op_test import OpTest
class TestSequenceReshape(OpTest):
def setUp(self):
self.op_type = 'sequence_reshape'
dimension = 12
x_lod = [[0, 4, 5, 8, 11]]
x = np.random.uniform(0.1, 1, [11, 24]).astype('float32')
self.inputs = {'X': (x, x_lod)}
self.attrs = {'new_dim': dimension}
out, out_lod = self.compute_output(x, x_lod, dimension)
self.outputs = {'Out': (out, out_lod)}
def compute_output(self, x, x_lod, dimension):
x_width = x.shape[1]
out_lod = [[0]]
for i in xrange(len(x_lod[0]) - 1):
seq_len = x_lod[0][i + 1] - x_lod[0][i]
offset = (seq_len * x_width) / dimension
assert int(offset) * dimension == seq_len * x_width
out_lod[0].append(out_lod[0][-1] + int(offset))
out = np.zeros(shape=(out_lod[0][-1], dimension)).astype('float32')
out.ravel()[:] = x.ravel()[:]
return out, out_lod
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(["X"], "Out")
class TestSequenceReshape_reduce(TestSequenceReshape):
def setUp(self):
self.op_type = 'sequence_reshape'
dimension = 24
x_lod = [[0, 4, 6, 8, 12]]
x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
self.inputs = {'X': (x, x_lod)}
self.attrs = {'new_dim': dimension}
out, out_lod = self.compute_output(x, x_lod, dimension)
self.outputs = {'Out': (out, out_lod)}
class TestSequenceReshape_same(TestSequenceReshape):
def setUp(self):
self.op_type = 'sequence_reshape'
dimension = 12
x_lod = [[0, 4, 6, 8, 12]]
x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
self.inputs = {'X': (x, x_lod)}
self.attrs = {'new_dim': dimension}
out, out_lod = self.compute_output(x, x_lod, dimension)
self.outputs = {'Out': (out, out_lod)}
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册