提交 2cf56367 编写于 作者: L Liu Yiqun

Merge branch 'develop' into core_add_inference_unittest

......@@ -534,3 +534,8 @@ row_conv
--------
.. autofunction:: paddle.v2.fluid.layers.row_conv
:noindex:
multiplex
---------
.. autofunction:: paddle.v2.fluid.layers.multiplex
:noindex:
......@@ -39,6 +39,7 @@ PaddlePaddle可以使用常用的Python包管理工具
"cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "暂无"
"cpu_noavx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "暂无"
"cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
......
......@@ -42,6 +42,7 @@ If the links below shows up the login form, just click "Log in as guest" to star
"cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "Not Available"
"cpu_noavx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "Not Available"
"cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
......
......@@ -74,8 +74,10 @@ cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
framework_proto backward glog lod_rank_table profiler)
framework_proto backward glog lod_rank_table profiler feed_fetch_method)
cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <set>
#include "gflags/gflags.h"
#include "paddle/framework/feed_fetch_method.h"
#include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor_array.h"
......@@ -149,5 +150,168 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
}
}
// Check whether the block already has feed operators and feed_holder.
// Return false if the block does not have any feed operators.
// If some feed operators have been prepended to the block, check that
// the info contained in these feed operators matches the feed_targets
// and feed_holder_name. Raise exception when any mismatch is found.
// Return true if the block has feed operators and holder of matching info.
static bool has_feed_operators(
BlockDesc* block, std::map<std::string, const LoDTensor*>& feed_targets,
const std::string& feed_holder_name) {
size_t feed_count = 0;
for (auto* op : block->AllOps()) {
if (op->Type() == kFeedOpType) {
feed_count++;
PADDLE_ENFORCE_EQ(op->Input("X")[0], feed_holder_name,
"Input to feed op should be '%s'", feed_holder_name);
std::string feed_target_name = op->Output("Out")[0];
PADDLE_ENFORCE(
feed_targets.find(feed_target_name) != feed_targets.end(),
"Feed operator output name '%s' cannot be found in 'feed_targets'",
feed_target_name);
} else {
break;
}
}
if (feed_count > 0) {
PADDLE_ENFORCE_EQ(
feed_count, feed_targets.size(),
"The number of feed operators should match 'feed_targets'");
// When feed operator are present, so should be feed_holder
auto var = block->FindVar(feed_holder_name);
PADDLE_ENFORCE_NOT_NULL(var, "Block should already have a '%s' variable",
feed_holder_name);
PADDLE_ENFORCE_EQ(var->GetType(), proto::VarDesc::FEED_MINIBATCH,
"'%s' variable should be 'FEED_MINIBATCH' type",
feed_holder_name);
}
return feed_count > 0;
}
// Check whether the block already has fetch operators and fetch_holder.
// Return false if the block does not have any fetch operators.
// If some fetch operators have been appended to the block, check that
// the info contained in these fetch operators matches the fetch_targets
// and fetch_holder_name. Raise exception when any mismatch is found.
// Return true if the block has fetch operators and holder of matching info.
static bool has_fetch_operators(
BlockDesc* block, std::map<std::string, LoDTensor*>& fetch_targets,
const std::string& fetch_holder_name) {
size_t fetch_count = 0;
for (auto* op : block->AllOps()) {
if (op->Type() == kFetchOpType) {
fetch_count++;
PADDLE_ENFORCE_EQ(op->Output("Out")[0], fetch_holder_name,
"Output of fetch op should be '%s'", fetch_holder_name);
std::string fetch_target_name = op->Input("X")[0];
PADDLE_ENFORCE(
fetch_targets.find(fetch_target_name) != fetch_targets.end(),
"Fetch operator input name '%s' cannot be found in 'fetch_targets'",
fetch_target_name);
}
}
if (fetch_count > 0) {
PADDLE_ENFORCE_EQ(
fetch_count, fetch_targets.size(),
"The number of fetch operators should match 'fetch_targets'");
// When fetch operator are present, so should be fetch_holder
auto var = block->FindVar(fetch_holder_name);
PADDLE_ENFORCE_NOT_NULL(var, "Block should already have a '%s' variable",
fetch_holder_name);
PADDLE_ENFORCE_EQ(var->GetType(), proto::VarDesc::FETCH_LIST,
"'%s' variable should be 'FETCH_LIST' type",
fetch_holder_name);
}
return fetch_count > 0;
}
void Executor::Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>& feed_targets,
std::map<std::string, LoDTensor*>& fetch_targets,
const std::string& feed_holder_name,
const std::string& fetch_holder_name) {
auto* copy_program = new ProgramDesc(program);
auto* global_block = copy_program->MutableBlock(0);
if (!has_feed_operators(global_block, feed_targets, feed_holder_name)) {
// create feed_holder variable
auto* feed_holder = global_block->Var(feed_holder_name);
feed_holder->SetType(proto::VarDesc::FEED_MINIBATCH);
feed_holder->SetPersistable(true);
int i = 0;
for (auto& feed_target : feed_targets) {
std::string var_name = feed_target.first;
VLOG(3) << "feed target's name: " << var_name;
// prepend feed op
auto* op = global_block->PrependOp();
op->SetType(kFeedOpType);
op->SetInput("X", {feed_holder_name});
op->SetOutput("Out", {var_name});
op->SetAttr("col", {static_cast<int>(i)});
op->CheckAttrs();
i++;
}
}
// map the data of feed_targets to feed_holder
for (auto* op : global_block->AllOps()) {
if (op->Type() == kFeedOpType) {
std::string feed_target_name = op->Output("Out")[0];
int idx = boost::get<int>(op->GetAttr("col"));
SetFeedVariable(scope, *feed_targets[feed_target_name], feed_holder_name,
idx);
} else {
break;
}
}
if (!has_fetch_operators(global_block, fetch_targets, fetch_holder_name)) {
// create fetch_holder variable
auto* fetch_holder = global_block->Var(fetch_holder_name);
fetch_holder->SetType(proto::VarDesc::FETCH_LIST);
fetch_holder->SetPersistable(true);
int i = 0;
for (auto& fetch_target : fetch_targets) {
std::string var_name = fetch_target.first;
VLOG(3) << "fetch target's name: " << var_name;
// append fetch op
auto* op = global_block->AppendOp();
op->SetType(kFetchOpType);
op->SetInput("X", {var_name});
op->SetOutput("Out", {fetch_holder_name});
op->SetAttr("col", {static_cast<int>(i)});
op->CheckAttrs();
i++;
}
}
Run(*copy_program, scope, 0, true, true);
// obtain the data of fetch_targets from fetch_holder
for (auto* op : global_block->AllOps()) {
if (op->Type() == kFetchOpType) {
std::string fetch_target_name = op->Input("X")[0];
int idx = boost::get<int>(op->GetAttr("col"));
*fetch_targets[fetch_target_name] =
GetFetchVariable(*scope, fetch_holder_name, idx);
}
}
delete copy_program;
}
} // namespace framework
} // namespace paddle
......@@ -41,6 +41,12 @@ class Executor {
void Run(const ProgramDesc&, Scope*, int, bool create_local_scope = true,
bool create_vars = true);
void Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>& feed_targets,
std::map<std::string, LoDTensor*>& fetch_targets,
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");
private:
const platform::Place place_;
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/feed_fetch_method.h"
#include "glog/logging.h"
#include "paddle/framework/variable.h"
namespace paddle {
namespace framework {
void SetFeedVariable(Scope* scope, const LoDTensor& input,
const std::string& var_name, size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs =
*(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
if (index >= feed_inputs.size()) {
feed_inputs.resize(index + 1);
}
// shared data with input tensor
feed_inputs[index].ShareDataWith(input);
// set lod
feed_inputs[index].set_lod(input.lod());
}
LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
size_t index) {
// Since we want to fetch LodTensor from a variable, the variable must
// be created alreadly.
Variable* g_fetch_value = scope.FindVar(var_name);
PADDLE_ENFORCE(g_fetch_value->IsType<FeedFetchList>(),
"Only %s can be invoked by GetFetchVariable",
typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index];
VLOG(3) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return tensor;
}
} // namespace framework
} // namespace paddle
......@@ -13,46 +13,18 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "glog/logging.h"
#include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/variable.h"
namespace paddle {
namespace framework {
void SetFeedVariable(Scope* scope, const LoDTensor& input,
const std::string& var_name, size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs =
*(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
if (index >= feed_inputs.size()) {
feed_inputs.resize(index + 1);
}
// shared data with input tensor
feed_inputs[index].ShareDataWith(input);
// set lod
feed_inputs[index].set_lod(input.lod());
}
const std::string& var_name, size_t index);
LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
size_t index) {
// Since we want to fetch LodTensor from a variable, the variable must
// be created alreadly.
Variable* g_fetch_value = scope.FindVar(var_name);
PADDLE_ENFORCE(g_fetch_value->IsType<FeedFetchList>(),
"Only %s can be invoked by GetFetchVariable",
typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index];
VLOG(3) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return tensor;
}
size_t index);
} // namespace framework
} // namespace paddle
......@@ -15,7 +15,6 @@ limitations under the License. */
#include "inference.h"
#include <fstream>
#include "paddle/framework/executor.h"
#include "paddle/framework/feed_fetch_method.h"
#include "paddle/framework/init.h"
#include "paddle/framework/scope.h"
......@@ -154,7 +153,7 @@ void InferenceEngine::Execute(const std::vector<framework::LoDTensor>& feeds,
LOG(FATAL) << "Please initialize the program_ and load_program_ first.";
}
if (feeds.size() < feed_var_names_.size()) {
if (feeds.size() != feed_var_names_.size()) {
LOG(FATAL) << "Please feed " << feed_var_names_.size() << " input Tensors.";
}
......@@ -165,19 +164,22 @@ void InferenceEngine::Execute(const std::vector<framework::LoDTensor>& feeds,
executor->Run(*load_program_, scope, 0, true, true);
std::map<std::string, const framework::LoDTensor*> feed_targets;
std::map<std::string, framework::LoDTensor*> fetch_targets;
// set_feed_variable
for (size_t i = 0; i < feed_var_names_.size(); ++i) {
framework::SetFeedVariable(scope, feeds[i], "feed", i);
feed_targets[feed_var_names_[i]] = &feeds[i];
}
executor->Run(*program_, scope, 0, true, true);
// get_fetch_variable
fetchs.resize(fetch_var_names_.size());
for (size_t i = 0; i < fetch_var_names_.size(); ++i) {
fetchs[i] = framework::GetFetchVariable(*scope, "fetch", i);
fetch_targets[fetch_var_names_[i]] = &fetchs[i];
}
executor->Run(*program_, scope, feed_targets, fetch_targets);
delete place;
delete scope;
delete executor;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/lstmp_op.h"
namespace paddle {
namespace operators {
class LSTMPOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(Weight) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("ProjWeight"),
"Input(ProjWeight) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Bias"),
"Input(Bias) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Projection"),
"Output(Projection) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Cell"),
"Output(Cell) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("BatchGate"),
"Output(BatchGate) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("BatchCellPreAct"),
"Output(BatchCellPreAct) of LSTMP operator should not be "
"null.");
PADDLE_ENFORCE(ctx->HasOutput("BatchHidden"),
"Output(BatchHidden) of LSTMP operator should not be null.");
auto in_dims = ctx->GetInputDim("Input");
PADDLE_ENFORCE_EQ(in_dims.size(), 2,
"Input(X)'s rank of LSTMP operator must be 2.");
int frame_size = in_dims[1] / 4;
auto w_dims = ctx->GetInputDim("Weight");
auto proj_dims = ctx->GetInputDim("ProjWeight");
PADDLE_ENFORCE_EQ(w_dims.size(), 2,
"The rank of Input(Weight) should be 2.");
PADDLE_ENFORCE_EQ(w_dims[0], proj_dims[1],
"The first dimension of Input(Weight) "
"should be %d.",
proj_dims[1]);
PADDLE_ENFORCE_EQ(w_dims[1], 4 * frame_size,
"The second dimension of Input(Weight) "
"should be 4 * %d.",
frame_size);
PADDLE_ENFORCE_EQ(proj_dims.size(), 2,
"The rank of Input(ProjWeight) should be 2.");
PADDLE_ENFORCE_EQ(proj_dims[0], frame_size,
"The first dimension of Input(ProjWeight) "
"should be %d.",
frame_size);
if (ctx->HasInput("H0")) {
PADDLE_ENFORCE(ctx->HasInput("C0"),
"Input(C0) of LSTMP operator should not be null after "
"Input(H0) provided.");
auto h_dims = ctx->GetInputDim("H0");
auto c_dims = ctx->GetInputDim("C0");
PADDLE_ENFORCE(h_dims == c_dims,
"The dimension of Input(H0) and Input(C0) "
"should be the same.");
ctx->SetOutputDim("OrderedP0", {h_dims[0], proj_dims[1]});
}
auto b_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(b_dims.size(), 2, "The rank of Input(Bias) should be 2.");
PADDLE_ENFORCE_EQ(b_dims[0], 1,
"The first dimension of Input(Bias) should be 1.");
if (ctx->Attrs().Get<bool>("use_peepholes")) {
PADDLE_ENFORCE_EQ(b_dims[1], 7 * frame_size,
"The second dimension of Input(Bias) should be "
"7 * %d if enable peepholes connection",
frame_size);
} else {
PADDLE_ENFORCE_EQ(b_dims[1], 4 * frame_size,
"The second dimension of Input(Bias) should be "
"4 * %d if disable peepholes connection",
frame_size);
}
framework::DDim out_dims({in_dims[0], frame_size});
framework::DDim proj_out_dims({in_dims[0], proj_dims[1]});
ctx->SetOutputDim("Projection", proj_out_dims);
ctx->SetOutputDim("Cell", out_dims);
ctx->SetOutputDim("BatchGate", in_dims);
ctx->SetOutputDim("BatchCellPreAct", out_dims);
ctx->SetOutputDim("BatchHidden", out_dims);
ctx->ShareLoD("Input", "Projection");
ctx->ShareLoD("Input", "Cell");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("Input")->type()),
ctx.device_context());
}
};
class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LSTMPOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(LoDTensor) the input for sequence data, which supports "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X 4D), where T is the "
"total time steps in this mini-batch, D is the hidden size.");
AddInput("H0",
"(Tensor, optional) the initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size and D is the hidden size.")
.AsDispensable();
AddInput("C0",
"(Tensor, optional) the initial cell state is an optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size. `C0` should not be null if `H0` provided.")
.AsDispensable();
AddInput("Weight",
"(Tensor) the learnable hidden-hidden weights."
" - The shape is (P x 4D), where P is the projection layer size "
"and D is the hidden size."
" - Weight = {W_cr, W_ir, W_fr, W_or}");
AddInput("ProjWeight",
"(Tensor) the learnable weight of the projection layer."
" - The shape is (D x P), where P is the recurrent projection "
"layer size and D is the hidden size."
" - ProjWeight = {W_rh}");
AddInput("Bias",
"(Tensor) the learnable biases, which contains two parts: "
"input-hidden biases and peephole connections weights if "
"setting `use_peepholes` to `True`. "
"1. `use_peepholes = False` "
" - The shape is (1 x 4D). "
" - Bias = {b_c, b_i, b_f, b_o}."
"2. `use_peepholes = True` "
" - The shape is (1 x 7D). "
" - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}.");
AddOutput("Projection",
"(LoDTensor) the projection of the hidden state of LSTMP "
"operator. The shape is (T x P), and LoD is the same with the "
"`Input`.");
AddOutput("Cell",
"(LoDTensor) the cell state of LSTMP operator. "
"The shape is (T x D), and lod is the same with the `Input`.");
AddOutput("BatchGate",
"(LoDTensor) This LoDTensor contains input gate, forget gate "
"and output gate after the activations. This LoDTensor has the "
"same shape as the reorganized input, which is also be called "
"batch input. The LoD size is 2. The first-level LoD is the "
"batch offsets and the second contains the indices, which "
"denotes the position of reorganized sequence in the raw input.")
.AsIntermediate();
AddOutput("BatchCellPreAct",
"(LoDTensor) the pre-activation cell state reorganized in batch. "
"This LoDTensor is obtained in the forward and used in the "
"backward.")
.AsIntermediate();
AddOutput("BatchHidden",
"(LoDTensor) the hidden state reorganized in batch. "
"This LoDTensor is obtained in the forward and used in the "
"backward.")
.AsIntermediate();
AddOutput("OrderedP0",
"(Tensor) the projection of the initial hidden state "
"H0. This is a tensor with shape (N x P), where N is the "
"batch size and P is the hidden size.")
.AsIntermediate();
AddAttr<bool>("use_peepholes",
"(bool, defalut: True) "
"whether to enable diagonal/peephole connections.")
.SetDefault(true);
AddAttr<bool>("is_reverse",
"(bool, defalut: False) "
"whether to compute reversed LSTMP.")
.SetDefault(false);
AddAttr<std::string>(
"gate_activation",
"(string, default: sigmoid)"
"The activation for input gate, forget gate and output "
"gate, `sigmoid` by default.")
.SetDefault("sigmoid")
.InEnum({"sigmoid", "tanh", "relu", "identity"});
AddAttr<std::string>("cell_activation",
"(string, default: tanh)"
"The activation for cell output, `tanh` by defalut.")
.SetDefault("tanh")
.InEnum({"sigmoid", "tanh", "relu", "identity"});
AddAttr<std::string>("candidate_activation",
"(string, default: tanh)"
"The activation for candidate hidden state, "
"`tanh` by default.")
.SetDefault("tanh")
.InEnum({"sigmoid", "tanh", "relu", "identity"});
AddAttr<std::string>("proj_activation",
"(string, default: tanh)"
"The activation for projection output, "
"`tanh` by defalut.")
.SetDefault("tanh")
.InEnum({"sigmoid", "tanh", "relu", "identity"});
AddComment(R"DOC(
Long-Short Term Memory with recurrent Projection layer (LSTMP) Operator.
LSTMP has a separate projection layer after the LSTM layer, projecting the
original hidden state to a lower-dimensional one, which is proposed to reduce
the number of total parameters and furthermore computational complexity for
the LSTM, espeacially for the case that the size of output units is relative
large (https://research.google.com/pubs/archive/43905.pdf).
The formula is as follows:
$$
i_t = \sigma(W_{ix}x_{t} + W_{ir}r_{t-1} + W_{ic}c_{t-1} + b_i) \\
f_t = \sigma(W_{fx}x_{t} + W_{fr}r_{t-1} + W_{fc}c_{t-1} + b_f) \\
\tilde{c_t} = act_g(W_{cx}x_t + W_{cr}r_{t-1} + b_c) \\
o_t = \sigma(W_{ox}x_{t} + W_{or}r_{t-1} + W_{oc}c_t + b_o) \\
c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\
h_t = o_t \odot act_h(c_t) \\
r_t = \overline{act_h}(W_{rh}h_t)
$$
where the W terms denote weight matrices (e.g. $W_{xi}$ is the matrix
of weights from the input gate to the input), $W_{ic}, W_{fc}, W_{oc}$
are diagonal weight matrices for peephole connections. In our implementation,
we use vectors to reprenset these diagonal weight matrices. The b terms
denote bias vectors ($b_i$ is the input gate bias vector), $\sigma$
is the activation, such as logistic sigmoid function, and
$i, f, o$ and $c$ are the input gate, forget gate, output gate,
and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector $h$. Here $h$ is usually called the hidden
state and $r$ denotes its recurrent projection. And $\tilde{c_t}$ is also
called the candidate hidden state, whose computation is based on the current
input and previous hidden state.
The $\odot$ is the element-wise product of the vectors. $act_g$ and $act_h$
are the cell input and cell output activation functions and `tanh` is usually
used for them. $\overline{act_h}$ is the activation function for the
projection output, usually using `identity` or same as $act_h$.
Note that these $W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}$
operations on the input $x_{t}$ are NOT included in this operator.
Users can choose to use fully-connected operator before LSTMP operator.
)DOC");
}
};
class LSTMPGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Projection"),
"Input(Projection) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Cell"),
"Input(Cell) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(Weight) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("ProjWeight"),
"Input(ProjWeight) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Bias"),
"Input(Bias) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("BatchGate"),
"Input(BatchGate) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("BatchCellPreAct"),
"Input(BatchGate) of LSTMP operator should not be null.");
auto SetOutGradDim = [&ctx](const std::string& name) {
auto g_name = framework::GradVarName(name);
if (ctx->HasOutput(g_name))
ctx->SetOutputDim(g_name, ctx->GetInputDim(name));
};
SetOutGradDim("Input");
SetOutGradDim("Weight");
SetOutGradDim("ProjWeight");
SetOutGradDim("Bias");
SetOutGradDim("H0");
SetOutGradDim("C0");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("Input")->type()),
ctx.device_context());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(lstmp, ops::LSTMPOp, ops::LSTMPOpMaker, lstmp_grad,
ops::LSTMPGradOp);
REGISTER_OP_CPU_KERNEL(
lstmp, ops::LSTMPKernel<paddle::platform::CPUDeviceContext, float>,
ops::LSTMPKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
lstmp_grad, ops::LSTMPGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::LSTMPGradKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/lstmp_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
lstmp, ops::LSTMPKernel<paddle::platform::CUDADeviceContext, float>,
ops::LSTMPKernel<paddle::platform::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
lstmp_grad,
ops::LSTMPGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::LSTMPGradKernel<paddle::platform::CUDADeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/activation_op.h"
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/operators/math/lstm_compute.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence2batch.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using LoDTensor = framework::LoDTensor;
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename DeviceContext, typename T>
inline void ReorderInitState(const DeviceContext& ctx,
const framework::Tensor& src, const size_t* index,
framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index, *dst, indexed_src);
}
template <typename DeviceContext, typename T>
class LSTMPKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y>
void ActCompute(const math::detail::ActivationType act_type, const Device& d,
X x, Y y) const {
if (act_type == math::detail::ActivationType::kIdentity)
y.device(d) = x;
else if (act_type == math::detail::ActivationType::kSigmoid)
SigmoidFunctor<T>()(d, x, y);
else if (act_type == math::detail::ActivationType::kTanh)
TanhFunctor<T>()(d, x, y);
else if (act_type == math::detail::ActivationType::kReLU)
ReluFunctor<T>()(d, x, y);
else
PADDLE_THROW("unsupported activation type");
}
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<LoDTensor>("Input");
auto* weight = ctx.Input<Tensor>("Weight");
auto* proj_weight = ctx.Input<Tensor>("ProjWeight");
auto* bias = ctx.Input<Tensor>("Bias");
auto* hidden_t0 = ctx.Input<Tensor>("H0");
auto* ordered_proj0 = ctx.Output<Tensor>("OrderedP0");
auto* cell_t0 = ctx.Input<Tensor>("C0");
auto* batch_gate = ctx.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(ctx.GetPlace());
auto* proj_out = ctx.Output<LoDTensor>("Projection");
proj_out->mutable_data<T>(ctx.GetPlace());
auto* cell_out = ctx.Output<LoDTensor>("Cell");
cell_out->mutable_data<T>(ctx.GetPlace());
bool is_reverse = ctx.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& device_ctx = ctx.template device_context<DeviceContext>();
to_batch(device_ctx, *input, *batch_gate, true, is_reverse);
auto in_dims = input->dims();
int frame_size = static_cast<int>(in_dims[1] / 4);
framework::DDim dims({in_dims[0], frame_size});
framework::DDim proj_dims({in_dims[0], proj_weight->dims()[1]});
if (bias) {
Tensor b = *bias;
b.Resize({bias->numel(), 1});
Tensor gate_bias = b.Slice(0, 4 * frame_size);
math::RowwiseAdd<DeviceContext, T> add_bias;
add_bias(device_ctx, *batch_gate, gate_bias, batch_gate);
}
math::LstmMetaValue<T> lstmp_value;
if (bias && ctx.Attr<bool>("use_peepholes")) {
T* bias_data = const_cast<T*>(bias->data<T>());
// the code style in LstmpMetaValue will be updated later.
lstmp_value.check_ig = bias_data + 4 * frame_size;
lstmp_value.check_fg = lstmp_value.check_ig + frame_size;
lstmp_value.check_og = lstmp_value.check_fg + frame_size;
} else {
lstmp_value.check_ig = nullptr;
lstmp_value.check_fg = nullptr;
lstmp_value.check_og = nullptr;
}
lstmp_value.prev_state_value = nullptr;
Tensor ordered_c0;
const size_t* order = batch_gate->lod()[2].data();
if (cell_t0) {
// Since the batch computing for LSTMP reorders the input sequence
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState<DeviceContext, T>(device_ctx, *cell_t0, order,
&ordered_c0, true);
lstmp_value.prev_state_value = ordered_c0.data<T>();
}
// Use the local variable as here.
LoDTensor batch_proj, batch_cell;
auto* batch_cell_pre_act = ctx.Output<LoDTensor>("BatchCellPreAct");
batch_cell_pre_act->mutable_data<T>(dims, ctx.GetPlace());
auto* batch_hidden = ctx.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(dims, ctx.GetPlace()); // T x D
batch_proj.mutable_data<T>(proj_dims, ctx.GetPlace()); // T x P
batch_cell.mutable_data<T>(dims, ctx.GetPlace()); // T x D
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
auto gate_act = math::detail::GetActivationType(
ctx.Attr<std::string>("gate_activation"));
auto cell_act = math::detail::GetActivationType(
ctx.Attr<std::string>("cell_activation"));
auto cand_act = math::detail::GetActivationType(
ctx.Attr<std::string>("candidate_activation"));
auto proj_act = math::detail::GetActivationType(
ctx.Attr<std::string>("proj_activation"));
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
for (size_t n = 0; n < num_batch; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
Tensor proj_t = batch_proj.Slice(bstart, bend);
Tensor cell_t = batch_cell.Slice(bstart, bend);
Tensor cell_pre_act_t = batch_cell_pre_act->Slice(bstart, bend);
int cur_batch_size = bend - bstart;
if (n > 0) {
int pre_h_start = static_cast<int>(batch_starts[n - 1]);
int pre_h_end = pre_h_start + cur_batch_size;
auto pre_proj_t = batch_proj.Slice(pre_h_start, pre_h_end);
math::matmul<DeviceContext, T>(device_ctx, pre_proj_t, false, *weight,
false, static_cast<T>(1.0), &gate_t,
static_cast<T>(1.0));
} else if (hidden_t0) {
// If n == 0 and there is no initialized hidden state, that is to say
// the H0 is zeros, the calculation W_h * H0 will be skiped.
// If n == 0 and there is initialized hidden state, calculate W_h * H0.
// Since the batch computing for LSTMP reorders the input sequence
// according to their length. The initialized hidden state also needs
// to reorder.
Tensor ordered_h0;
ordered_proj0->mutable_data<T>(ctx.GetPlace());
ReorderInitState<DeviceContext, T>(device_ctx, *hidden_t0, order,
&ordered_h0, true);
math::matmul<DeviceContext, T>(device_ctx, ordered_h0, false,
*proj_weight, false, static_cast<T>(1.0),
ordered_proj0, static_cast<T>(0.0));
if (proj_act != math::detail::ActivationType::kIdentity) {
auto proj0_dev = EigenMatrix<T>::From(*ordered_proj0);
ActCompute(cell_act, place, proj0_dev, proj0_dev);
}
math::matmul<DeviceContext, T>(device_ctx, *ordered_proj0, false,
*weight, false, static_cast<T>(1.0),
&gate_t, static_cast<T>(1.0));
}
lstmp_value.gate_value = gate_t.data<T>();
lstmp_value.output_value = hidden_t.data<T>();
lstmp_value.state_value = cell_t.data<T>();
lstmp_value.state_active_value = cell_pre_act_t.data<T>();
math::LstmUnitFunctor<DeviceContext, T>::compute(
device_ctx, lstmp_value, frame_size, cur_batch_size, gate_act,
cell_act, cand_act);
lstmp_value.prev_state_value = lstmp_value.state_value;
math::matmul<DeviceContext, T>(device_ctx, hidden_t, false, *proj_weight,
false, static_cast<T>(1.0), &proj_t,
static_cast<T>(0.0));
if (proj_act != math::detail::ActivationType::kIdentity) {
auto proj_t_dev = EigenMatrix<T>::From(proj_t);
ActCompute(cell_act, place, proj_t_dev, proj_t_dev);
}
}
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_proj.set_lod(batch_gate->lod());
// restore the output hidden in LoDTensor from the batch hidden
to_seq(device_ctx, batch_proj, *proj_out);
batch_cell.set_lod(batch_gate->lod());
// restore the output cell state in LoDTensor from the batch cell
to_seq(device_ctx, batch_cell, *cell_out);
}
};
template <typename DeviceContext, typename T>
class LSTMPGradKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y, typename DX, typename DY>
void ActGradCompute(const math::detail::ActivationType act_type,
const Device& d, X x, Y y, DX dx, DY dy) const {
// x is dummy and won't be used even in Relu(use y instead)
if (act_type == math::detail::ActivationType::kIdentity)
dx.device(d) = dy;
else if (act_type == math::detail::ActivationType::kSigmoid)
SigmoidGradFunctor<T>()(d, x, y, dy, dx);
else if (act_type == math::detail::ActivationType::kTanh)
TanhGradFunctor<T>()(d, x, y, dy, dx);
else if (act_type == math::detail::ActivationType::kReLU)
ReluGradFunctor<T>()(d, x, y, dy, dx);
else
PADDLE_THROW("unsupported activation type");
}
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<LoDTensor>("Input");
auto* weight = ctx.Input<Tensor>("Weight");
auto* proj_weight = ctx.Input<Tensor>("ProjWeight");
auto* bias = ctx.Input<Tensor>("Bias");
auto* proj_out = ctx.Input<LoDTensor>("Projection");
auto* cell_out = ctx.Input<LoDTensor>("Cell");
auto* batch_gate = ctx.Input<LoDTensor>("BatchGate");
auto* batch_cell_pre_act = ctx.Input<LoDTensor>("BatchCellPreAct");
auto* batch_hidden = ctx.Input<LoDTensor>("BatchHidden");
auto* projection_g =
ctx.Input<LoDTensor>(framework::GradVarName("Projection"));
auto* in_g = ctx.Output<LoDTensor>(framework::GradVarName("Input"));
auto* weight_g = ctx.Output<Tensor>(framework::GradVarName("Weight"));
auto* proj_weight_g =
ctx.Output<Tensor>(framework::GradVarName("ProjWeight"));
auto* bias_g = ctx.Output<Tensor>(framework::GradVarName("Bias"));
auto* h0 = ctx.Input<Tensor>("H0");
auto* ordered_proj0 = ctx.Input<Tensor>("OrderedP0");
auto* c0 = ctx.Input<Tensor>("C0");
auto* h0_g = ctx.Output<Tensor>(framework::GradVarName("H0"));
auto* c0_g = ctx.Output<Tensor>(framework::GradVarName("C0"));
auto& device_ctx = ctx.template device_context<DeviceContext>();
math::SetConstant<DeviceContext, T> zero;
if (weight_g) {
weight_g->mutable_data<T>(ctx.GetPlace());
zero(device_ctx, weight_g, static_cast<T>(0.0));
}
if (proj_weight_g) {
proj_weight_g->mutable_data<T>(ctx.GetPlace());
zero(device_ctx, proj_weight_g, static_cast<T>(0.0));
}
// ordered_h0/c0 is the reordered hidden/cell initialization.
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell
// initialization.
Tensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g;
const size_t* order = batch_gate->lod()[2].data();
if (c0) {
ReorderInitState<DeviceContext, T>(device_ctx, *c0, order, &ordered_c0,
true);
}
if (c0 && c0_g) {
ordered_c0_g.mutable_data<T>(c0_g->dims(), ctx.GetPlace());
}
auto in_dims = input->dims();
auto out_dims = cell_out->dims();
framework::DDim proj_dims({in_dims[0], proj_weight->dims()[1]});
int frame_size = static_cast<int>(in_dims[1] / 4);
PADDLE_ENFORCE_EQ(frame_size, out_dims[1]);
math::LstmMetaValue<T> lstmp_value;
if (bias && ctx.Attr<bool>("use_peepholes")) {
T* bias_data = const_cast<T*>(bias->data<T>());
lstmp_value.check_ig = bias_data + 4 * frame_size;
lstmp_value.check_fg = lstmp_value.check_ig + frame_size;
lstmp_value.check_og = lstmp_value.check_fg + frame_size;
} else {
lstmp_value.check_ig = nullptr;
lstmp_value.check_fg = nullptr;
lstmp_value.check_og = nullptr;
}
math::LstmMetaGrad<T> lstmp_grad;
if (bias && bias_g) {
bias_g->mutable_data<T>(ctx.GetPlace());
zero(device_ctx, bias_g, static_cast<T>(0.0));
}
if (bias && bias_g && ctx.Attr<bool>("use_peepholes")) {
T* bias_g_data = bias_g->data<T>();
lstmp_grad.check_ig_grad = bias_g_data + 4 * frame_size;
lstmp_grad.check_fg_grad = lstmp_grad.check_ig_grad + frame_size;
lstmp_grad.check_og_grad = lstmp_grad.check_fg_grad + frame_size;
} else {
lstmp_grad.check_ig_grad = nullptr;
lstmp_grad.check_fg_grad = nullptr;
lstmp_grad.check_og_grad = nullptr;
}
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto ToBatch = [&batch_gate, &to_batch](
const DeviceContext& ctx, const framework::LoDTensor& src,
const framework::DDim& dims, framework::LoDTensor& dst) {
dst.mutable_data<T>(dims, ctx.GetPlace());
dst.set_lod(batch_gate->lod());
to_batch(ctx, src, dst, false);
};
LoDTensor batch_hidden_g, batch_proj, batch_proj_g, batch_cell;
batch_hidden_g.mutable_data<T>(out_dims, ctx.GetPlace());
ToBatch(device_ctx, *proj_out, proj_dims, batch_proj); // T x P
ToBatch(device_ctx, *projection_g, proj_dims, batch_proj_g); // T x P
ToBatch(device_ctx, *cell_out, out_dims, batch_cell); // T x D
LoDTensor batch_cell_g, batch_gate_g;
batch_cell_g.mutable_data<T>(out_dims, ctx.GetPlace());
// TODO(qingqing) support the case output cell has gradient.
// to_batch(device_ctx, *cell_g, batch_cell_g, false);
zero(device_ctx, &batch_cell_g, static_cast<T>(0.0));
batch_gate_g.mutable_data<T>(batch_gate->dims(), ctx.GetPlace());
batch_gate_g.set_lod(batch_gate->lod());
auto gate_act = math::detail::GetActivationType(
ctx.Attr<std::string>("gate_activation"));
auto cell_act = math::detail::GetActivationType(
ctx.Attr<std::string>("cell_activation"));
auto cand_act = math::detail::GetActivationType(
ctx.Attr<std::string>("candidate_activation"));
auto proj_act = math::detail::GetActivationType(
ctx.Attr<std::string>("proj_activation"));
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
for (int n = static_cast<int>(num_batch) - 1; n >= 0; n--) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
Tensor cur_proj = batch_proj.Slice(bstart, bend);
Tensor proj_g = batch_proj_g.Slice(bstart, bend);
if (proj_act != math::detail::ActivationType::kIdentity) {
auto cur_proj_dev = EigenMatrix<T>::From(cur_proj);
auto proj_g_dev = EigenMatrix<T>::From(proj_g);
ActGradCompute(cell_act, place, cur_proj_dev, cur_proj_dev, proj_g_dev,
proj_g_dev);
}
/* hidden state backwarad */
Tensor out_g = batch_hidden_g.Slice(bstart, bend);
math::matmul<DeviceContext, T>(device_ctx, proj_g, false, *proj_weight,
true, static_cast<T>(1.0), &out_g,
static_cast<T>(0.0));
/* projection weight backward*/
if (proj_weight_g) {
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
math::matmul<DeviceContext, T>(device_ctx, hidden_t, true, proj_g,
false, static_cast<T>(1.0),
proj_weight_g, static_cast<T>(1.0));
}
Tensor gate = batch_gate->Slice(bstart, bend);
Tensor cell = batch_cell.Slice(bstart, bend);
Tensor cell_pre_act = batch_cell_pre_act->Slice(bstart, bend);
lstmp_value.gate_value = gate.data<T>();
lstmp_value.state_value = cell.data<T>();
lstmp_value.state_active_value = cell_pre_act.data<T>();
Tensor gate_g = batch_gate_g.Slice(bstart, bend);
Tensor cell_g = batch_cell_g.Slice(bstart, bend);
lstmp_grad.state_grad = cell_g.data<T>();
lstmp_grad.gate_grad = gate_g.data<T>();
lstmp_grad.output_grad = out_g.data<T>();
if (n > 0) {
int bstart_pre = static_cast<int>(batch_starts[n - 1]);
Tensor cell_pre = batch_cell.Slice(bstart_pre, bstart);
Tensor cell_pre_g = batch_cell_g.Slice(bstart_pre, bstart);
lstmp_value.prev_state_value = cell_pre.data<T>();
lstmp_grad.prev_state_grad = cell_pre_g.data<T>();
} else {
lstmp_value.prev_state_value = c0 ? ordered_c0.data<T>() : nullptr;
lstmp_grad.prev_state_grad = c0_g ? ordered_c0_g.data<T>() : nullptr;
}
int cur_batch_size = bend - bstart;
math::LstmUnitGradFunctor<DeviceContext, T>::compute(
device_ctx, lstmp_value, lstmp_grad, frame_size, cur_batch_size,
gate_act, cell_act, cand_act);
if (n > 0) {
int pre_h_start = static_cast<int>(batch_starts[n - 1]);
int pre_h_end = pre_h_start + cur_batch_size;
auto pre_proj_g = batch_proj_g.Slice(pre_h_start, pre_h_end);
math::matmul<DeviceContext, T>(device_ctx, gate_g, false, *weight, true,
static_cast<T>(1.0), &pre_proj_g,
static_cast<T>(1.0));
if (weight_g) {
/* weight backward*/
auto pre_proj = batch_proj.Slice(pre_h_start, pre_h_end);
math::matmul<DeviceContext, T>(device_ctx, pre_proj, true, gate_g,
false, static_cast<T>(1.0), weight_g,
static_cast<T>(1.0));
}
} else {
if (h0 && weight_g) {
ReorderInitState<DeviceContext, T>(device_ctx, *h0, order,
&ordered_h0, true);
if (weight_g) {
math::matmul<DeviceContext, T>(device_ctx, *ordered_proj0, true,
gate_g, false, static_cast<T>(1.0),
weight_g, static_cast<T>(1.0));
}
}
if (h0 && (h0_g || proj_weight_g)) {
ordered_h0_g.mutable_data<T>(h0_g->dims(), ctx.GetPlace());
Tensor proj0_g;
proj0_g.Resize({in_dims[0], proj_weight->dims()[1]});
proj0_g.mutable_data<T>(ctx.GetPlace());
math::matmul<DeviceContext, T>(device_ctx, gate_g, false, *weight,
true, static_cast<T>(1.0), &proj0_g,
static_cast<T>(0.0));
if (proj_act != math::detail::ActivationType::kIdentity) {
auto proj0_dev = EigenMatrix<T>::From(*ordered_proj0);
auto proj0_g_dev = EigenMatrix<T>::From(proj0_g);
ActGradCompute(cell_act, place, proj0_dev, proj0_dev, proj0_g_dev,
proj0_g_dev);
}
if (h0_g) {
math::matmul<DeviceContext, T>(
device_ctx, proj0_g, false, *proj_weight, true,
static_cast<T>(1.0), &ordered_h0_g, static_cast<T>(0.0));
}
if (proj_weight_g) {
math::matmul<DeviceContext, T>(device_ctx, ordered_h0, true,
proj0_g, false, static_cast<T>(1.0),
proj_weight_g, static_cast<T>(1.0));
}
}
}
}
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
if (in_g) {
/* backward data */
in_g->mutable_data<T>(ctx.GetPlace());
to_seq(device_ctx, batch_gate_g, *in_g);
}
if (bias && bias_g) {
/* backward bias */
Tensor b_g = *bias_g;
b_g.Resize({bias_g->numel(), 1});
Tensor gate_bias_g = b_g.Slice(0, 4 * frame_size);
math::ColwiseSum<DeviceContext, T> col_sum;
col_sum(device_ctx, batch_gate_g, &gate_bias_g);
}
if (h0 && h0_g) {
ReorderInitState<DeviceContext, T>(device_ctx, ordered_h0_g, order, h0_g,
false);
}
if (c0 && c0_g) {
ReorderInitState<DeviceContext, T>(device_ctx, ordered_c0_g, order, c0_g,
false);
}
}
};
} // namespace operators
} // namespace paddle
......@@ -119,7 +119,13 @@ REGISTER_OPERATOR(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker,
REGISTER_OPERATOR(multiplex_grad, ops::MultiplexGradOp);
REGISTER_OP_CPU_KERNEL(
multiplex,
ops::MultiplexCPUKernel<paddle::platform::CPUDeviceContext, float>);
ops::MultiplexCPUKernel<paddle::platform::CPUDeviceContext, float>,
ops::MultiplexCPUKernel<paddle::platform::CPUDeviceContext, double>,
ops::MultiplexCPUKernel<paddle::platform::CPUDeviceContext, int>,
ops::MultiplexCPUKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
multiplex_grad,
ops::MultiplexGradCPUKernel<paddle::platform::CPUDeviceContext, float>);
ops::MultiplexGradCPUKernel<paddle::platform::CPUDeviceContext, float>,
ops::MultiplexGradCPUKernel<paddle::platform::CPUDeviceContext, double>,
ops::MultiplexGradCPUKernel<paddle::platform::CPUDeviceContext, int>,
ops::MultiplexGradCPUKernel<paddle::platform::CPUDeviceContext, int64_t>);
......@@ -90,7 +90,13 @@ namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
multiplex,
ops::MultiplexGPUKernel<paddle::platform::CUDADeviceContext, float>);
ops::MultiplexGPUKernel<paddle::platform::CUDADeviceContext, float>,
ops::MultiplexGPUKernel<paddle::platform::CUDADeviceContext, double>,
ops::MultiplexGPUKernel<paddle::platform::CUDADeviceContext, int>,
ops::MultiplexGPUKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
multiplex_grad,
ops::MultiplexGradGPUKernel<paddle::platform::CUDADeviceContext, float>);
ops::MultiplexGradGPUKernel<paddle::platform::CUDADeviceContext, float>,
ops::MultiplexGradGPUKernel<paddle::platform::CUDADeviceContext, double>,
ops::MultiplexGradGPUKernel<paddle::platform::CUDADeviceContext, int>,
ops::MultiplexGradGPUKernel<paddle::platform::CUDADeviceContext, int64_t>);
......@@ -139,10 +139,8 @@ class PoolGradKernel : public framework::OpKernel<T> {
auto& dev_ctx = context.template device_context<DeviceContext>();
if (in_x_grad) {
in_x_grad->mutable_data<T>(context.GetPlace());
auto temp = framework::EigenVector<T>::Flatten(*in_x_grad);
temp.device(
*context.template device_context<DeviceContext>().eigen_device()) =
temp.constant(static_cast<T>(0));
paddle::operators::math::SetConstant<DeviceContext, T> set_constant;
set_constant(dev_ctx, in_x_grad, 0.0);
switch (ksize.size()) {
case 2: {
......
if(WITH_PYTHON)
cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method
${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID)
target_link_libraries(paddle_pybind rt)
......
......@@ -424,7 +424,9 @@ All parameter, weight, gradient are variables in Paddle.
py::class_<framework::Executor>(m, "Executor")
.def(py::init<const platform::Place &>())
.def("run", &Executor::Run);
.def("run",
(void (Executor::*)(const ProgramDesc &, Scope *, int, bool, bool)) &
Executor::Run);
m.def("unique_integer", UniqueIntegerGenerator);
m.def("init_gflags", framework::InitGflags);
......
......@@ -225,7 +225,7 @@ class DistributeTranspiler:
if len(splited_vars) <= 1:
continue
orig_var = program.global_block().vars[varname]
if orig_var == core.VarDesc.VarType.SELECTED_ROWS:
if orig_var.type == core.VarDesc.VarType.SELECTED_ROWS:
height_sections = []
for v in splited_vars:
height_sections.append(v.shape[0])
......@@ -234,7 +234,7 @@ class DistributeTranspiler:
inputs={"X": orig_var},
outputs={"Out": splited_vars},
attrs={"height_sections": height_sections})
elif orig_var == core.VarDesc.VarType.LOD_TENSOR:
elif orig_var.type == core.VarDesc.VarType.LOD_TENSOR:
sections = []
for v in splited_vars:
sections.append(v.shape[0])
......
......@@ -63,6 +63,7 @@ __all__ = [
'nce',
'beam_search',
'row_conv',
'multiplex',
]
......@@ -2707,3 +2708,55 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
'Filter': [filter_param]},
outputs={'Out': [out]})
return helper.append_activation(out)
def multiplex(inputs, index):
"""
**Multiplex Layer**
Referring to the given index variable, this layer selects rows from the
input variables to construct a multiplex variable. Assuming that there are
:math:`m` input variables and :math:`I_i` represents the i-th input
variable and :math:`i` is in [0, :math:`m`). All input variables are
tensors with same shape [:math:`d_0`, :math:`d_1`, ..., :math:`d_R`].
Please note that rank of the input tensor should be at least 2. Each input
variable will be treated as a 2-D matrix with shape [:math:`M`, :math:`N`]
where :math:`M` for :math:`d_0` and :math:`N` for :math:`d_1` * :math:`d_2`
* ... * :math:`d_R`. Let :math:`I_i[j]` be the j-th row of the i-th input
variable. The given index variable should be a 2-D tensor with shape
[:math:`M`, 1]. Let `ID[i]` be the i-th index value of the index variable.
Then the output variable will be a tensor with shape [:math:`d_0`,
:math:`d_1`, ..., :math:`d_R`]. If we treat the output tensor as a 2-D
matrix with shape [:math:`M`, :math:`N`] and let :math:`O[i]` be the i-th
row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`.
Args:
inputs (list): A list of variables to gather from. All variables have the
same shape and the rank is at least 2.
index (Variable): Tensor<int32>, index variable which is a 2-D tensor
with shape [M, 1] where M is the batch size.
Returns:
Variable: Multiplex variable gathered from input variables.
Examples:
.. code-block:: python
x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
index = fluid.layers.data(name='index', shape=[1], dtype='int32')
out = fluid.layers.multiplex(inputs=[x1, x2], index=index)
"""
helper = LayerHelper('multiplex', **locals())
if not isinstance(inputs, list) and len(inputs) < 2:
raise ValueError("inputs should be a list object and contains at least "
"2 elements.")
out = helper.create_tmp_variable(inputs[0].dtype)
helper.append_op(
type='multiplex',
inputs={'X': inputs,
'Ids': index},
outputs={'Out': [out]})
return out
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
hidden_dim = 32
embedding_dim = 16
batch_size = 10
max_length = 50
topk_size = 50
encoder_size = decoder_size = hidden_dim
IS_SPARSE = True
USE_PEEPHOLES = False
def bi_lstm_encoder(input_seq, hidden_size):
input_forward_proj = fluid.layers.fc(input=input_seq,
size=hidden_size * 4,
bias_attr=True)
forward, _ = fluid.layers.dynamic_lstm(
input=input_forward_proj,
size=hidden_size * 4,
use_peepholes=USE_PEEPHOLES)
input_backward_proj = fluid.layers.fc(input=input_seq,
size=hidden_size * 4,
bias_attr=True)
backward, _ = fluid.layers.dynamic_lstm(
input=input_backward_proj,
size=hidden_size * 4,
is_reverse=True,
use_peepholes=USE_PEEPHOLES)
return forward, backward
# FIXME(peterzhang2029): Replace this function with the lstm_unit_op.
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size):
def linear(inputs):
return fluid.layers.fc(input=inputs, size=size, bias_attr=True)
forget_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t]))
input_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t]))
output_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t]))
cell_tilde = fluid.layers.tanh(x=linear([hidden_t_prev, x_t]))
cell_t = fluid.layers.sums(input=[
fluid.layers.elementwise_mul(
x=forget_gate, y=cell_t_prev), fluid.layers.elementwise_mul(
x=input_gate, y=cell_tilde)
])
hidden_t = fluid.layers.elementwise_mul(
x=output_gate, y=fluid.layers.tanh(x=cell_t))
return hidden_t, cell_t
def lstm_decoder_without_attention(target_embedding, decoder_boot, context,
decoder_size):
rnn = fluid.layers.DynamicRNN()
cell_init = fluid.layers.fill_constant_batch_size_like(
input=decoder_boot,
value=0.0,
shape=[-1, decoder_size],
dtype='float32')
cell_init.stop_gradient = False
with rnn.block():
current_word = rnn.step_input(target_embedding)
context = rnn.static_input(context)
hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
cell_mem = rnn.memory(init=cell_init)
decoder_inputs = fluid.layers.concat(
input=[context, current_word], axis=1)
h, c = lstm_step(decoder_inputs, hidden_mem, cell_mem, decoder_size)
rnn.update_memory(hidden_mem, h)
rnn.update_memory(cell_mem, c)
out = fluid.layers.fc(input=h,
size=target_dict_dim,
bias_attr=True,
act='softmax')
rnn.output(out)
return rnn()
def seq_to_seq_net():
"""Construct a seq2seq network."""
src_word_idx = fluid.layers.data(
name='source_sequence', shape=[1], dtype='int64', lod_level=1)
src_embedding = fluid.layers.embedding(
input=src_word_idx,
size=[source_dict_dim, embedding_dim],
dtype='float32')
src_forward, src_backward = bi_lstm_encoder(
input_seq=src_embedding, hidden_size=encoder_size)
src_forward_last = fluid.layers.sequence_last_step(input=src_forward)
src_backward_first = fluid.layers.sequence_first_step(input=src_backward)
encoded_vector = fluid.layers.concat(
input=[src_forward_last, src_backward_first], axis=1)
decoder_boot = fluid.layers.fc(input=encoded_vector,
size=decoder_size,
bias_attr=False,
act='tanh')
trg_word_idx = fluid.layers.data(
name='target_sequence', shape=[1], dtype='int64', lod_level=1)
trg_embedding = fluid.layers.embedding(
input=trg_word_idx,
size=[target_dict_dim, embedding_dim],
dtype='float32')
prediction = lstm_decoder_without_attention(trg_embedding, decoder_boot,
encoded_vector, decoder_size)
label = fluid.layers.data(
name='label_sequence', shape=[1], dtype='int64', lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = core.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def main():
avg_cost = seq_to_seq_net()
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
optimizer.minimize(avg_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=1000),
batch_size=batch_size)
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
batch_id = 0
for pass_id in xrange(2):
for data in train_data():
word_data = to_lodtensor(map(lambda x: x[0], data), place)
trg_word = to_lodtensor(map(lambda x: x[1], data), place)
trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
outs = exe.run(framework.default_main_program(),
feed={
'source_sequence': word_data,
'target_sequence': trg_word,
'label_sequence': trg_word_next
},
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
" avg_cost=" + str(avg_cost_val))
if batch_id > 3:
exit(0)
batch_id += 1
if __name__ == '__main__':
main()
......@@ -279,6 +279,16 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(out)
print(str(program))
def test_multiplex(self):
program = Program()
with program_guard(program):
x1 = layers.data(name='x1', shape=[4], dtype='float32')
x2 = layers.data(name='x2', shape=[4], dtype='float32')
index = layers.data(name='index', shape=[1], dtype='int32')
out = layers.multiplex(inputs=[x1, x2], index=index)
self.assertIsNotNone(out)
print(str(program))
if __name__ == '__main__':
unittest.main()
......@@ -42,7 +42,7 @@ def relu(x):
return np.maximum(x, 0)
ACTVATION = {
ACTIVATION = {
'identity': identity,
'sigmoid': sigmoid,
'tanh': tanh,
......@@ -158,8 +158,8 @@ class TestLstmOp(OpTest):
w_b = b[:, 0:4 * self.D]
w_c = b[:, 4 * self.D:] if self.use_peepholes else None
h, c = lstm(x, self.lod, h0, c0, w, w_b, w_c, self.is_reverse,
ACTVATION[self.act_gate], ACTVATION[self.act_cell],
ACTVATION[self.act_cand])
ACTIVATION[self.act_gate], ACTIVATION[self.act_cell],
ACTIVATION[self.act_cand])
self.inputs = {'Input': (x, self.lod), 'Weight': w}
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import unittest
import numpy as np
import test_lstm_op as LstmTest
ACTIVATION = {
'identity': LstmTest.identity,
'sigmoid': LstmTest.sigmoid,
'tanh': LstmTest.tanh,
'relu': LstmTest.relu
}
# LSTM with recurrent projection Layer
def lstmp(
input, # T x 4D
lod, # 1 x N
h0=None, # N x D
c0=None, # N x D
w_r=None, # P x 4D
w_rh=None, # D x P
w_b=None, # 1 x 4D
w_c=None, # 1 x 3D
is_reverse=False,
act_gate=None,
act_cell=None,
act_cand=None,
act_proj=None):
def _step(x, w_r, w_rh, w_c, r_pre, c_pre, act_gate, act_cell, act_cand,
act_proj):
g = np.dot(r_pre, w_r) # 1 x 4D
g = g + x
g = np.reshape(g, (1, g.size))
c, g_i, g_f, g_o = np.split(g, 4, axis=1)
if w_c is None:
g_i = act_gate(g_i) # 1 x D
g_f = act_gate(g_f) # 1 x D
else:
w_ic, w_fc, _ = np.split(w_c, 3, axis=1)
g_i = act_gate(g_i + w_ic * c_pre) # 1 x D
g_f = act_gate(g_f + w_fc * c_pre) # 1 x D
c = g_f * c_pre + g_i * act_cand(c) # 1 x D
if w_c is None:
g_o = act_gate(g_o) # 1 x D
else:
_, _, w_oc = np.split(w_c, 3, axis=1)
g_o = act_gate(g_o + w_oc * c) # 1 x D
h = g_o * act_cell(c)
# projection
r = np.dot(h, w_rh)
r = act_proj(r)
return r, c
def _reverse(x, lod):
y = np.zeros_like(x)
for i in range(len(lod) - 1):
b, e = lod[i], lod[i + 1]
y[b:e, :] = np.flip(x[b:e, :], 0)
return y
offset = lod[0]
batch_size = len(offset) - 1
# recurrent projection state
projection = []
cell = []
input = _reverse(input, offset) if is_reverse else input
if w_b is not None:
input = input + np.tile(w_b, (offset[-1], 1))
for i in range(batch_size):
# compute one sequence
seq_len = offset[i + 1] - offset[i]
x = input[offset[i]:offset[i + 1], :]
r_pre = np.dot(h0[i], w_rh) # 1 x P
r_pre = act_proj(r_pre)
c_pre = c0[i] # 1 x D
for j in range(seq_len):
# compute one step
r_pre, c_pre = _step(x[j], w_r, w_rh, w_c, r_pre, c_pre, act_gate,
act_cell, act_cand, act_proj)
projection.append(r_pre.flatten())
cell.append(c_pre.flatten())
projection = np.array(projection).astype('float64')
cell = np.array(cell).astype('float64')
projection = _reverse(projection, offset) if is_reverse else projection
cell = _reverse(cell, offset) if is_reverse else cell
assert projection.shape == (input.shape[0], w_r.shape[0]) # T x P
assert cell.shape == (input.shape[0], input.shape[1] / 4) # T x D
return projection, cell
class TestLstmpOp(LstmTest.TestLstmOp):
def reset_argument(self):
pass
def setUp(self):
self.set_argument()
# projection size
self.P = 10
self.act_proj = self.act_cell
self.reset_argument()
self.op_type = 'lstmp'
T = self.lod[0][-1]
N = len(self.lod[0]) - 1
x = np.random.normal(size=(T, 4 * self.D)).astype('float64')
if self.has_initial_state:
h0 = np.random.normal(size=(N, self.D)).astype('float64')
c0 = np.random.normal(size=(N, self.D)).astype('float64')
else:
h0 = np.zeros((N, self.D)).astype('float64')
c0 = np.zeros((N, self.D)).astype('float64')
w = np.random.normal(size=(self.P, 4 * self.D)).astype('float64')
if self.use_peepholes:
b = np.random.normal(size=(1, 7 * self.D)).astype('float64')
else:
b = np.random.normal(size=(1, 4 * self.D)).astype('float64')
w_b = b[:, 0:4 * self.D]
w_c = b[:, 4 * self.D:] if self.use_peepholes else None
w_rh = np.random.normal(size=(self.D, self.P)).astype('float64')
r, c = lstmp(x, self.lod, h0, c0, w, w_rh, w_b, w_c, self.is_reverse,
ACTIVATION[self.act_gate], ACTIVATION[self.act_cell],
ACTIVATION[self.act_cand], ACTIVATION[self.act_proj])
self.inputs = {'Input': (x, self.lod), 'Weight': w, 'ProjWeight': w_rh}
self.inputs['Bias'] = b
if self.has_initial_state:
self.inputs['H0'] = h0
self.inputs['C0'] = c0
self.outputs = {
'Projection': (r, self.lod),
'Cell': (c, self.lod),
}
self.attrs = {
'use_peepholes': self.use_peepholes,
'is_reverse': self.is_reverse,
'gate_activation': self.act_gate,
'cell_activation': self.act_cell,
'candidate_activation': self.act_cand,
'proj_activation': self.act_proj
}
def test_check_output(self):
self.check_output(atol=1e-8)
def test_check_grad(self):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N = len(self.lod[0]) - 1
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
self.check_grad(
['Input', 'Weight', 'ProjWeight', 'Bias'], ['Projection'],
max_relative_error=1e-2)
class TestLstmpOpHasInitial(TestLstmpOp):
def reset_argument(self):
self.has_initial_state = True
def test_check_grad(self):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N = len(self.lod[0]) - 1
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
self.check_grad(
['Input', 'Weight', 'ProjWeight', 'Bias', 'H0', 'C0'],
['Projection'],
max_relative_error=1e-2)
def test_check_grad_ingore_bias(self):
N = len(self.lod[0]) - 1
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
self.check_grad(
['Input', 'ProjWeight', 'Weight'], ['Projection'],
max_relative_error=1e-2,
no_grad_set=set('Bias'))
def test_check_grad_ingore_weight(self):
N = len(self.lod[0]) - 1
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
self.check_grad(
['Input', 'ProjWeight', 'Bias'], ['Projection'],
max_relative_error=1e-2,
no_grad_set=set('Weight'))
def test_check_grad_ingore_proj_weight(self):
N = len(self.lod[0]) - 1
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
self.check_grad(
['Input', 'Weight', 'Bias'], ['Projection'],
max_relative_error=1e-2,
no_grad_set=set('ProjWeight'))
def test_check_grad_ingore_input(self):
N = len(self.lod[0]) - 1
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
self.check_grad(
['Weight', 'ProjWeight', 'Bias'], ['Projection'],
max_relative_error=1e-2,
no_grad_set=set('Input'))
def test_check_grad_ingore_h0(self):
N = len(self.lod[0]) - 1
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
self.check_grad(
['Input', 'Weight', 'ProjWeight', 'Bias', 'C0'], ['Projection'],
max_relative_error=1e-2,
no_grad_set=set('H0'))
def test_check_grad_ingore_c0(self):
N = len(self.lod[0]) - 1
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
self.check_grad(
['Input', 'Weight', 'ProjWeight', 'Bias', 'H0'], ['Projection'],
max_relative_error=1e-2,
no_grad_set=set('C0'))
class TestLstmpOpRerverse(TestLstmpOp):
def reset_argument(self):
self.is_reverse = True
class TestLstmpOpNotUsePeepholes(TestLstmpOp):
def reset_argument(self):
self.use_peepholes = False
class TestLstmpOpLinearProjection(TestLstmpOp):
def reset_argument(self):
self.act_proj = 'identity'
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册