提交 6cfb9a32 编写于 作者: D dangqingqing

Refine InferShape for recurrent_network_op.

* the tensor only contains shape and does not hold memory when inferring shape.
上级 0973c2c9
......@@ -29,7 +29,8 @@ namespace rnn {
void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
const std::vector<Link>& inlinks,
const size_t seq_len) {
const size_t seq_len,
bool infer_shape) {
PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided.");
for (size_t i = 0; i < inlinks.size(); ++i) {
Tensor* input =
......@@ -42,7 +43,9 @@ void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
Tensor* step_input = step_scopes[j]
->CreateVariable(inlinks[i].internal)
->GetMutable<Tensor>();
*step_input = input->Slice<float>(j, j + 1);
if (!infer_shape) {
*step_input = input->Slice<float>(j, j + 1);
}
step_input->Resize(step_dims);
}
}
......@@ -50,20 +53,23 @@ void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
const std::vector<Link>& outlinks,
const size_t seq_len) {
const size_t seq_len,
bool infer_shape) {
for (size_t i = 0; i < outlinks.size(); i++) {
Tensor* output =
step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable<Tensor>();
// TODO(qingiqng) remove following code after adding
// InferShape in RecurrentGradientOp
DDim step_dims = step_scopes[0]
->GetVariable(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
dims_vec.insert(dims_vec.begin(), seq_len);
output->mutable_data<float>(make_ddim(dims_vec), platform::CPUPlace());
if (infer_shape) {
DDim step_dims = step_scopes[0]
->GetVariable(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
dims_vec.insert(dims_vec.begin(), seq_len);
output->Resize(make_ddim(dims_vec));
} else {
output->mutable_data<float>(platform::CPUPlace());
}
for (size_t j = 0; j < seq_len; j++) {
Tensor* step_output = step_scopes[j]
......@@ -79,8 +85,9 @@ void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
void LinkMemories(std::vector<std::shared_ptr<Scope>>& scopes,
const std::vector<rnn::MemoryAttr>& memories,
size_t step_id,
int offset) {
const size_t step_id,
const int offset,
bool infer_shape) {
PADDLE_ENFORCE(step_id < scopes.size(),
"step [%d] is out of range of step scopes' size [%d]",
step_id,
......@@ -97,18 +104,14 @@ void LinkMemories(std::vector<std::shared_ptr<Scope>>& scopes,
std::shared_ptr<Scope> scope = scopes[step_id];
std::shared_ptr<Scope> linked_scope = scopes[step_id + offset];
for (auto& attr : memories) {
auto mem = scope->CreateVariable(attr.pre_var)->GetMutable<Tensor>();
auto mem = scope->GetVariable(attr.pre_var)->GetMutable<Tensor>();
// maybe share variable is better?
auto linked_mem = linked_scope->GetVariable(attr.var)->GetMutable<Tensor>();
mem->ShareDataWith<float>(*linked_mem);
// TODO(qingqing) remove following code
// the memory of current step should be allocated in step net
auto m = scope->CreateVariable(attr.var)->GetMutable<Tensor>();
// for unit test, as addOp and mulOp are null currently, if not
// mutable_data, mem.data() in output will be error. We will
// remove this line after merge the correct addOp and mulOp.
m->mutable_data<float>(mem->dims(), platform::CPUPlace());
if (infer_shape) {
mem->Resize(linked_mem->dims());
} else {
mem->ShareDataWith<float>(*linked_mem);
}
}
}
......@@ -176,61 +179,43 @@ void RecurrentAlgorithm::InferShape(const std::shared_ptr<Scope>& scope) const {
->GetMutable<Tensor>()
->dims()[0];
CreateScopes(scope);
auto step_scopes = GetStepScopes(scope);
// SegmentInputs is called in InferShape. The input must hold memory in
// SegmentInputs. But the other op only set dimension for the output in
// InferShape. That's a problem. Wether the RNN op needs InferShape or not?
// Wether the following functions (SegmentInputs, InitMemories, ...) need
// to rewrite for RNN op?
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true);
InitMemories(step_scopes[0]);
InitMemories(step_scopes[0], true);
PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
"stepnet [%s] is not in scope.",
arg_->step_net);
Variable* net = scope->GetVariable(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net");
// If the InferShape is called in OperatorBase's run function,
// the rnn op only needs to do InferShape for the first time step
for (size_t i = 0; i < seq_len_; i++) {
if (i > 0) {
rnn::LinkMemories(step_scopes, arg_->memories, i, -1);
rnn::LinkMemories(step_scopes, arg_->memories, i, -1, true);
}
net->GetMutable<NetOp>()->InferShape(step_scopes[i]);
}
auto outlinks = arg_->outlinks;
for (size_t i = 0; i < outlinks.size(); i++) {
DDim step_dims = step_scopes[0]
->GetVariable(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
// now only support fixed length
dims_vec.insert(dims_vec.begin(), seq_len_);
Tensor* output =
step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable<Tensor>();
output->Resize(make_ddim(dims_vec));
}
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true);
}
void RecurrentAlgorithm::Run(const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& dev_ctx) const {
auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false);
InitMemories(step_scopes[0], false);
Variable* net = scope->GetVariable(arg_->step_net);
for (size_t step_id = 0; step_id < seq_len_; step_id++) {
// the link memory is done in InferShape
// maybe remove following code after testing
if (step_id > 0) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1);
rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1, false);
}
net->GetMutable<NetOp>()->Run(step_scopes[step_id], dev_ctx);
}
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_);
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false);
}
void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
......@@ -246,6 +231,7 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
// Now all variables in scope must be created outside of op.
auto net_op = scope->GetVariable(arg_->step_net)->GetMutable<NetOp>();
for (auto& input : net_op->inputs_) {
// the weight are located in parent scope
step_scope->CreateVariable(input);
}
for (auto& output : net_op->outputs_) {
......@@ -257,7 +243,8 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
}
}
void RecurrentAlgorithm::InitMemories(std::shared_ptr<Scope> step_scope) const {
void RecurrentAlgorithm::InitMemories(std::shared_ptr<Scope> step_scope,
bool infer_shape) const {
for (auto& attr : arg_->memories) {
Tensor* pre_mem =
step_scope->CreateVariable(attr.pre_var)->GetMutable<Tensor>();
......@@ -267,14 +254,11 @@ void RecurrentAlgorithm::InitMemories(std::shared_ptr<Scope> step_scope) const {
attr.boot_var);
Tensor* boot_mem =
step_scope->GetVariable(attr.boot_var)->GetMutable<Tensor>();
pre_mem->ShareDataWith<float>(*boot_mem);
// TODO(qingqing) remove following code
// the memory of current step should be allocated in step net
// here for unit test
auto cur_step_mem =
step_scope->CreateVariable(attr.var)->GetMutable<Tensor>();
cur_step_mem->mutable_data<float>(boot_mem->dims(), platform::CPUPlace());
if (infer_shape) {
pre_mem->Resize(boot_mem->dims());
} else {
pre_mem->ShareDataWith<float>(*boot_mem);
}
}
}
......@@ -336,35 +320,37 @@ void RecurrentGradientAlgorithm::Run(
const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& dev_ctx) const {
auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false);
PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
"step net is not in scope.");
Variable* net = scope->GetVariable(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net");
for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
if (static_cast<size_t>(step_id) != seq_len_ - 1) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1);
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, false);
}
net->GetMutable<NetOp>()->Run(step_scopes[step_id], dev_ctx);
}
LinkBootMemoryGradients(step_scopes[0]);
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_);
LinkBootMemoryGradients(step_scopes[0], false);
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false);
}
void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
std::shared_ptr<Scope> step_scope) const {
std::shared_ptr<Scope> step_scope, bool infer_shape) const {
for (auto& attr : arg_->memories) {
Tensor* mem_grad =
step_scope->CreateVariable(attr.var)->GetMutable<Tensor>();
PADDLE_ENFORCE(mem_grad != nullptr,
"boot_tensor should be retrieved before");
PADDLE_ENFORCE(step_scope->HasVariable(attr.boot_var),
"memory [%s]'s boot variable [%s] not exists",
attr.var,
attr.boot_var);
Tensor* boot_mem_grad =
step_scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
boot_mem_grad->ShareDataWith<float>(*mem_grad);
if (infer_shape) {
boot_mem_grad->Resize(mem_grad->dims());
} else {
boot_mem_grad->ShareDataWith<float>(*mem_grad);
}
}
}
......@@ -374,7 +360,7 @@ void RecurrentGradientAlgorithm::InferShape(
->GetMutable<Tensor>()
->dims()[0];
auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true);
PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
"step net is not in scope.");
......@@ -383,25 +369,12 @@ void RecurrentGradientAlgorithm::InferShape(
for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
if (static_cast<size_t>(step_id) != seq_len_ - 1) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1);
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, true);
}
net->GetMutable<NetOp>()->InferShape(step_scopes[step_id]);
}
auto outlinks = arg_->outlinks;
for (size_t i = 0; i < outlinks.size(); i++) {
DDim step_dims = step_scopes[0]
->GetVariable(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
// now only support fixed length
dims_vec.insert(dims_vec.begin(), seq_len_);
Tensor* output =
step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable<Tensor>();
output->Resize(make_ddim(dims_vec));
}
LinkBootMemoryGradients(step_scopes[0]);
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true);
LinkBootMemoryGradients(step_scopes[0], true);
}
void RecurrentGradientOp::Init() {
......
......@@ -72,19 +72,22 @@ struct ArgumentName {
*/
void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
const std::vector<Link>& inlinks,
const size_t seq_len);
const size_t seq_len,
bool infer_shape);
/**
* Process outputs of step nets and merge to variables.
*/
void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
const std::vector<Link>& outlinks,
const size_t seq_len);
const size_t seq_len,
bool infer_shape);
void LinkMemories(std::vector<std::shared_ptr<Scope>>& step_scopes,
const std::vector<MemoryAttr>& memories,
size_t step_id,
int offset);
const size_t step_id,
const int offset,
bool infer_shape);
void InitArgument(const ArgumentName& name, Argument* arg);
......@@ -125,7 +128,7 @@ protected:
->GetMutable<std::vector<std::shared_ptr<Scope>>>();
}
void InitMemories(std::shared_ptr<Scope> step_scopes) const;
void InitMemories(std::shared_ptr<Scope> step_scopes, bool infer_shape) const;
private:
std::unique_ptr<rnn::Argument> arg_;
......@@ -149,7 +152,8 @@ public:
void Run(const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& dev_ctx) const;
void LinkBootMemoryGradients(std::shared_ptr<Scope> step_scopes) const;
void LinkBootMemoryGradients(std::shared_ptr<Scope> step_scopes,
bool infer_shape) const;
/**
* InferShape must be called before Run.
......
......@@ -56,7 +56,7 @@ protected:
w->GetMutable<Tensor>()->mutable_data<float>(
make_ddim(std::vector<int>{30, 30}), platform::CPUPlace());
for (auto boot : std::vector<std::string>{"x_boot", "h_boot"}) {
for (auto boot : std::vector<std::string>{"h_boot"}) {
LOG(INFO) << "create global variable " << boot;
Variable* h_boot = scope_->CreateVariable(boot);
h_boot->GetMutable<Tensor>()->mutable_data<float>(
......@@ -80,7 +80,6 @@ protected:
op_desc.add_inputs("x0");
op_desc.add_inputs("x1");
// boot_memories 3
op_desc.add_inputs("x_boot");
op_desc.add_inputs("h_boot");
// step net 5
op_desc.add_inputs("step_net");
......@@ -92,7 +91,7 @@ protected:
auto _input_format = std::vector<int>{
0, // in_link
3, // memories
5 // step_net
4 // step_net
};
auto input_format = op_desc.add_attrs();
input_format->set_name("input_format");
......@@ -130,12 +129,11 @@ protected:
inlink_alias->add_strings(item);
}
// pre memories
for (const auto& item :
std::vector<std::string>{"rnn/x@pre", "rnn/h@pre"}) {
for (const auto& item : std::vector<std::string>{"rnn/h@pre"}) {
pre_memories->add_strings(item);
}
// memories
for (const auto& item : std::vector<std::string>{"rnn/x", "rnn/h"}) {
for (const auto& item : std::vector<std::string>{"rnn/h"}) {
memories->add_strings(item);
}
// output alias
......@@ -152,14 +150,11 @@ protected:
LOG(INFO) << "create variable step_net";
Variable* var = scope_->CreateVariable("step_net");
auto net = var->GetMutable<NetOp>();
// rnn/s is net's input or output?
net->inputs_ = {"rnn/h@pre", "rnn/w", "rnn/x"};
net->inputs_ = {"rnn/s", "rnn/h"};
net->AddOp(
OpRegistry::CreateOp("mul", {"rnn/h@pre", "rnn/w"}, {"rnn/s"}, {}));
net->AddOp(
OpRegistry::CreateOp("add_two", {"rnn/x", "rnn/s"}, {"rnn/h"}, {}));
OpRegistry::CreateOp("add_two", {"x@alias", "rnn/s"}, {"rnn/h"}, {}));
net->CompleteAddOp();
}
......@@ -303,7 +298,7 @@ protected:
std::vector<std::shared_ptr<Scope>>* step_scopes =
scope_->GetVariable("step_scopes")
->GetMutable<std::vector<std::shared_ptr<Scope>>>();
rnn::SegmentInputs(*step_scopes, std::vector<rnn::Link>{inlink}, 10);
rnn::SegmentInputs(*step_scopes, std::vector<rnn::Link>{inlink}, 10, true);
}
void LinkeMemories() {
......@@ -318,7 +313,7 @@ protected:
scope_->GetVariable("step_scopes")
->GetMutable<std::vector<std::shared_ptr<Scope>>>();
for (int i = 1; i < 10; ++i) {
rnn::LinkMemories(*step_scopes, memories, i, -1);
rnn::LinkMemories(*step_scopes, memories, i, -1, true);
}
}
......@@ -347,7 +342,7 @@ TEST(RecurrentOp, LinkMemories) {
scope->CreateVariable("pre_h");
auto tensor = scope->CreateVariable("h")->GetMutable<Tensor>();
float* data = tensor->mutable_data<float>(make_ddim({15, 20}), CPUPlace());
for (int i = 0; i < 15 * 20; ++i) {
for (int j = 0; j < 15 * 20; ++j) {
data[i] = rand() * (1. / (double)RAND_MAX);
}
step_scopes.push_back(scope);
......@@ -362,7 +357,7 @@ TEST(RecurrentOp, LinkMemories) {
memories.push_back(mem_attr);
for (int i = 1; i < len; ++i) {
rnn::LinkMemories(step_scopes, memories, i, -1);
rnn::LinkMemories(step_scopes, memories, i, -1, false);
}
// check
for (int i = 0; i < len - 1; ++i) {
......@@ -372,13 +367,13 @@ TEST(RecurrentOp, LinkMemories) {
->GetVariable("pre_h")
->GetMutable<Tensor>()
->data<float>();
for (size_t i = 0; i < 15 * 20; ++i) {
ASSERT_FLOAT_EQ(a[i], b[i]);
for (size_t j = 0; j < 15 * 20; ++j) {
ASSERT_FLOAT_EQ(a[j], b[j]);
}
}
for (int i = len - 2; i >= 0; --i) {
rnn::LinkMemories(step_scopes, memories, i, 1);
rnn::LinkMemories(step_scopes, memories, i, 1, false);
}
// check
for (int i = len - 2; i >= 0; --i) {
......@@ -390,8 +385,8 @@ TEST(RecurrentOp, LinkMemories) {
->GetVariable("h")
->GetMutable<Tensor>()
->data<float>();
for (size_t i = 0; i < 15 * 20; ++i) {
ASSERT_FLOAT_EQ(a[i], b[i]);
for (size_t j = 0; j < 15 * 20; ++j) {
ASSERT_FLOAT_EQ(a[j], b[j]);
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册