提交 c4f8afa2 编写于 作者: Y Yu Yang

Fix bug when memopt optimize loss.grad and use ParallelExecutor

上级 7c8b69c7
......@@ -275,7 +275,8 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
if (strategy_.gradient_scale_ !=
BuildStrategy::GradientScaleStrategy::kCustomized) {
// TODO(paddle-dev): Why is there no input for this op_handle?
CreateScaleLossGradOp(&result);
auto loss_grad_name = node->Op()->OutputArgumentNames()[0];
CreateScaleLossGradOp(&result, loss_grad_name);
}
// This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss.
......@@ -535,7 +536,8 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(const ir::Graph &graph,
return got == sharded_var_device.end() ? -1 : got->second;
}
void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(ir::Graph *result) const {
void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(
ir::Graph *result, const std::string &loss_grad_name) const {
for (size_t i = 0; i < places_.size(); ++i) {
// Insert ScaleCost OpHandle
#ifdef PADDLE_WITH_CUDA
......@@ -558,10 +560,10 @@ void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(ir::Graph *result) const {
// loss->pending_ops_.emplace_back(op_handle);
// op_handle->inputs_.emplace_back(loss);
CreateOpOutput(result, op_handle,
result->CreateEmptyNode(GradVarName(loss_var_name_),
ir::Node::Type::kVariable),
places_[i], i);
CreateOpOutput(
result, op_handle,
result->CreateEmptyNode(loss_grad_name, ir::Node::Type::kVariable),
places_[i], i);
}
}
......
......@@ -75,7 +75,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
void CreateComputationalOps(ir::Graph *result, ir::Node *node,
size_t num_places) const;
void CreateScaleLossGradOp(ir::Graph *result) const;
void CreateScaleLossGradOp(ir::Graph *result,
const std::string &loss_grad_name) const;
VarHandle *CreateReduceOp(ir::Graph *result, const std::string &og,
int dst_dev_id) const;
void CreateComputationalOp(ir::Graph *result, ir::Node *node,
......
......@@ -778,6 +778,7 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
const ExecutionContext& ctx) const {
auto& scope = ctx.scope();
int data_type = -1;
std::string last_input_name;
for (auto& input : this->inputs_) {
for (auto& ipt_name : input.second) {
auto* var = scope.FindVar(ipt_name);
......@@ -794,9 +795,10 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
int tmp = static_cast<int>(ToDataType(t->type()));
PADDLE_ENFORCE(
tmp == data_type || data_type == -1,
"DataType of Paddle Op %s must be the same. Get %d != %d", Type(),
data_type, tmp);
"DataType of Paddle Op %s must be the same. Get %s(%d) != %s(%d)",
Type(), last_input_name, data_type, ipt_name, tmp);
data_type = tmp;
last_input_name = ipt_name;
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册