未验证 提交 883ee1a3 编写于 作者: W wanghuancoder 提交者: GitHub

Merge branch 'develop' into revert-37926-eager_coreops_500

......@@ -75,6 +75,11 @@ class Carrier final {
bool IsInit() const;
// NOTE: This mutex will be used in interceptor's RunOps function.
// This mutex is used for avoiding forward ops and backward ops run
// simultaneously, which will lead to a random hang for some sync ops.
std::mutex run;
DISABLE_COPY_AND_ASSIGN(Carrier);
private:
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/distributed/fleet_executor/compute_interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
......@@ -169,6 +170,8 @@ void ComputeInterceptor::ReplyCompletedToUpStream() {
}
void ComputeInterceptor::RunOps() {
Carrier& carrier_instance = Carrier::Instance();
std::unique_lock<std::mutex> lock(carrier_instance.run);
VLOG(3) << "ComputeInterceptor " << interceptor_id_ << " running ops for the "
<< step_ + 1 << " time.";
for (auto op : node_->ops()) {
......
......@@ -116,6 +116,22 @@ class TensorAddFunctor : public boost::static_visitor<> {
}
#endif
#ifdef PADDLE_WITH_IPU
void operator()(const paddle::platform::IPUPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
#else
void operator()(const paddle::platform::IPUPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
#endif
void operator()(const paddle::platform::NPUPinnedPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
......
......@@ -81,6 +81,11 @@ struct DLDeviceVisitor : public boost::static_visitor<::DLDevice> {
return device;
}
inline ::DLDevice operator()(const platform::IPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::IPUPlace is not supported"));
}
inline ::DLDevice operator()(const platform::XPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::XPUPlace is not supported"));
......
......@@ -463,6 +463,14 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#else
PADDLE_THROW(
platform::errors::Unimplemented("No XPU gc found in CPU/GPU paddle"));
#endif
} else if (platform::is_ipu_place(place_)) {
#ifdef PADDLE_WITH_IPU
gc.reset(new IPUGarbageCollector(
BOOST_GET_CONST(platform::IPUPlace, place_), max_memory_size));
#else
PADDLE_THROW(
platform::errors::Unimplemented("No IPU gc found in CPU/IPU paddle"));
#endif
} else if (platform::is_npu_place(place_)) {
#ifdef PADDLE_WITH_ASCEND_CL
......
......@@ -156,7 +156,7 @@ cc_test(test_seqpool_cvm_concat_fuse_pass SRCS seqpool_cvm_concat_fuse_pass_test
cc_test(test_repeated_fc_relu_fuse_pass_cc SRCS repeated_fc_relu_fuse_pass_tester.cc DEPS repeated_fc_relu_fuse_pass framework_proto)
cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass)
cc_test(test_simplify_with_basic_ops_pass SRCS simplify_with_basic_ops_pass_tester.cc DEPS simplify_with_basic_ops_pass)
cc_test(test_fc_elementwise_layernorm_fuse_pass SRCS fc_elementwise_layernorm_fuse_pass_tester.cc DEPS fc_elementwise_layernorm_fuse_pass)
cc_test(test_fc_elementwise_layernorm_fuse_pass_cc SRCS fc_elementwise_layernorm_fuse_pass_tester.cc DEPS fc_elementwise_layernorm_fuse_pass)
cc_test(test_skip_layernorm_fuse_pass SRCS skip_layernorm_fuse_pass_tester.cc DEPS skip_layernorm_fuse_pass)
cc_test(test_multihead_matmul_fuse_pass SRCS multihead_matmul_fuse_pass_tester.cc DEPS multihead_matmul_fuse_pass)
cc_test(test_conv_bn_fuse_pass_cc SRCS conv_bn_fuse_pass_tester.cc DEPS conv_bn_fuse_pass)
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle {
namespace framework {
......@@ -338,3 +339,9 @@ void FCElementwiseLayerNormFusePass::ApplyImpl(ir::Graph *graph) const {
REGISTER_PASS(fc_elementwise_layernorm_fuse_pass,
paddle::framework::ir::FCElementwiseLayerNormFusePass);
REGISTER_PASS_CAPABILITY(fc_elementwise_layernorm_fuse_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("fc", 0)
.LE("elementwise_add", 1)
.EQ("layer_norm", 0));
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/avg_shard_pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void AvgShardPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter AvgShardPass::ApplyImpl";
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
if (ipu_backend->GetIpuStrategy()->need_avg_shard) {
VLOG(10) << "start AvgShardPass";
auto nodes = ir::TopologySortOperations(*graph);
auto num_ipus = ipu_backend->GetIpuStrategy()->num_ipus;
int shard_position = nodes.size() / num_ipus;
int index_and_stage = -1;
for (int i = 0; i < nodes.size(); i++) {
if ((i % shard_position) == 0 && index_and_stage < num_ipus - 1) {
index_and_stage++;
}
nodes[i]->Op()->SetAttr("ipu_index", index_and_stage);
nodes[i]->Op()->SetAttr("ipu_stage", index_and_stage);
}
VLOG(10) << "end AvgShardPass";
}
VLOG(10) << "leave AvgShardPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(avg_shard_pass, paddle::framework::ir::AvgShardPass);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class AvgShardPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void ForwardGraphExtractPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter ForwardGraphExtractPass::ApplyImpl";
std::unordered_map<OpRole, std::unordered_set<ir::Node*>> all_ops{
{OpRole::kForward, {}}, {OpRole::kBackward, {}},
{OpRole::kOptimize, {}}, {OpRole::kRPC, {}},
{OpRole::kDist, {}}, {OpRole::kLRSched, {}},
{OpRole::kLoss, {}}, {OpRole::kNotSpecified, {}}};
for (auto* node : graph->Nodes()) {
if (!node->IsOp()) {
continue;
}
auto op_role = BOOST_GET_MUTABLE(int, node->Op()->GetAttr("op_role"));
if (op_role == static_cast<int>(OpRole::kForward)) {
all_ops[OpRole::kForward].insert(node);
} else if (op_role == static_cast<int>(OpRole::kBackward)) {
all_ops[OpRole::kBackward].insert(node);
} else if (op_role == static_cast<int>(OpRole::kOptimize)) {
all_ops[OpRole::kOptimize].insert(node);
} else if (op_role == static_cast<int>(OpRole::kRPC)) {
} else if (op_role == static_cast<int>(OpRole::kDist)) {
} else if (op_role == static_cast<int>(OpRole::kLRSched)) {
} else if (op_role == static_cast<int>(OpRole::kLoss)) {
all_ops[OpRole::kLoss].insert(node);
} else if (op_role == static_cast<int>(OpRole::kNotSpecified)) {
LOG(WARNING) << "Op: " << node->Name() << " OpRole is NotSpecified ";
}
}
std::unordered_set<ir::Node*> forward_vars;
std::unordered_set<ir::Node*> backward_vars;
std::unordered_set<ir::Node*> control_vars;
// forward_vars
for (auto& nodes : std::array<std::unordered_set<ir::Node*>, 2>{
all_ops[OpRole::kForward], all_ops[OpRole::kLoss]}) {
for (auto* node : nodes) {
for (auto* in_node : node->inputs) {
forward_vars.insert(in_node);
}
for (auto* out_node : node->outputs) {
forward_vars.insert(out_node);
}
}
}
// control_vars & backward_vars
for (auto* node : graph->Nodes()) {
if (!node->IsVar()) {
continue;
}
if (node->IsCtrlVar()) {
control_vars.insert(node);
}
for (auto* in_node : node->inputs) {
if (all_ops[OpRole::kOptimize].count(in_node)) {
backward_vars.insert(node);
}
}
}
// all removed node
std::unordered_set<ir::Node*> rm_nodes;
for (auto* node : graph->Nodes()) {
if (backward_vars.count(node)) {
rm_nodes.insert(node);
} else if (control_vars.count(node)) {
rm_nodes.insert(node);
} else if (all_ops[OpRole::kBackward].count(node)) {
rm_nodes.insert(node);
} else if (all_ops[OpRole::kForward].count(node) == 0 &&
all_ops[OpRole::kLoss].count(node) == 0 &&
forward_vars.count(node) == 0) {
rm_nodes.insert(node);
} else if (node->Name() == "feed" || node->Name() == "fetch") {
rm_nodes.insert(node);
}
}
VLOG(10) << "Remove Node: ";
for (auto* node : rm_nodes) {
// rm node releations
for (auto* node_in : node->inputs) {
for (size_t i = 0; i < node_in->outputs.size(); ++i) {
if (node_in->outputs[i] == node) {
node_in->outputs.erase(node_in->outputs.begin() + i);
break;
}
}
}
for (auto* node_out : node->outputs) {
for (size_t i = 0; i < node_out->inputs.size(); ++i) {
if (node_out->inputs[i] == node) {
node_out->inputs.erase(node_out->inputs.begin() + i);
break;
}
}
}
VLOG(10) << "\t" << node->Name();
graph->RemoveNode(node);
}
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave ForwardGraphExtractPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(forward_graph_extract_pass,
paddle::framework::ir::ForwardGraphExtractPass);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class ForwardGraphExtractPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/infer_shape_pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/variable_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void InferShapePass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter InferShapePass::ApplyImpl";
VLOG(10) << "Raw Graph: ";
VLOG(10) << DebugString(graph);
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
auto batch_size = ipu_backend->GetIpuStrategy()->batch_size;
auto feed_list = Get<std::vector<std::string>>("feed_list");
for (auto node : graph->Nodes()) {
if (!node->IsVar()) {
continue;
}
bool is_feed = std::find(feed_list.begin(), feed_list.end(),
node->Name()) != feed_list.end();
if (is_feed) {
auto input_shape = node->Var()->GetShape();
if (input_shape[0] <= -1) {
input_shape[0] = batch_size;
node->Var()->SetShape(input_shape);
}
// int64->int32
if (node->Var()->GetDataType() == proto::VarType::INT64) {
node->Var()->SetDataType(proto::VarType::INT32);
}
}
}
// temp scope for shape inference
std::shared_ptr<paddle::framework::Scope> scope(
new paddle::framework::Scope());
for (auto node : graph->Nodes()) {
if (!node->IsVar()) {
continue;
}
auto var_desc = node->Var();
auto* ptr = scope->Var(var_desc->Name());
paddle::framework::InitializeVariable(ptr, var_desc->GetType());
auto tensor = ptr->GetMutable<paddle::framework::LoDTensor>();
tensor->Resize(paddle::framework::make_ddim(var_desc->GetShape()));
}
// infer shape
auto nodes = ir::TopologySortOperations(*graph);
for (auto node : nodes) {
auto op_desc = node->Op();
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
paddle::framework::RuntimeContext ctx(op->Inputs(), op->Outputs(), *scope);
op->RuntimeInferShape(*scope, paddle::platform::CPUPlace(), ctx);
for (auto it = ctx.outputs.begin(); it != ctx.outputs.end(); it++) {
for (int i = 0; i < it->second.size(); i++) {
auto output_name = op_desc->Output(it->first)[i];
auto dim =
it->second[i]->GetMutable<paddle::framework::LoDTensor>()->dims();
auto new_shape = paddle::framework::vectorize(dim);
for (auto output_node : node->outputs) {
if (output_node->Name() == output_name) {
output_node->Var()->SetShape(new_shape);
}
}
}
}
}
// release the temp scope
scope.reset();
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave InferShapePass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(infer_shape_pass, paddle::framework::ir::InferShapePass)
.RequirePassAttr("feed_list");
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class InferShapePass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/inference_postprocess_pass.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
namespace ir {
void InferencePostprocessPass::ApplyImpl(ir::Graph *graph) const {
VLOG(10) << "enter InferencePostprocessPass::ApplyImpl";
std::vector<std::string> feed_list;
feed_list = Get<std::vector<std::string>>("feed_list");
std::vector<std::string> fetch_list;
fetch_list = Get<std::vector<std::string>>("fetch_list");
auto *feed_var = new paddle::framework::VarDesc("feed");
feed_var->SetType(proto::VarType::FEED_MINIBATCH);
auto *feed_var_node = graph->CreateVarNode(feed_var);
auto *fetch_var = new paddle::framework::VarDesc("fetch");
fetch_var->SetType(proto::VarType::FETCH_LIST);
auto *fetch_var_node = graph->CreateVarNode(fetch_var);
for (int i = 0; i < feed_list.size(); i++) {
for (auto node : graph->Nodes()) {
if (node->Name() == feed_list[i]) {
auto *op = new paddle::framework::OpDesc();
op->SetType("feed");
op->SetInput("X", {"feed"});
op->SetOutput("Out", {node->Name()});
op->SetAttr("col", i);
auto *op_node = graph->CreateOpNode(op);
node->inputs.push_back(op_node);
op_node->outputs.push_back(node);
feed_var_node->outputs.push_back(op_node);
op_node->inputs.push_back(feed_var_node);
break;
}
}
}
for (int i = 0; i < fetch_list.size(); i++) {
for (auto node : graph->Nodes()) {
if (node->Name() == fetch_list[i]) {
auto *op = new paddle::framework::OpDesc();
op->SetType("fetch");
op->SetInput("X", {node->Name()});
op->SetOutput("Out", {"fetch"});
op->SetAttr("col", i);
auto *op_node = graph->CreateOpNode(op);
node->outputs.push_back(op_node);
op_node->inputs.push_back(node);
fetch_var_node->inputs.push_back(op_node);
op_node->outputs.push_back(fetch_var_node);
break;
}
}
}
VLOG(10) << "leave InferencePostprocessPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(inference_postprocess_pass,
paddle::framework::ir::InferencePostprocessPass)
.RequirePassAttr("feed_list")
.RequirePassAttr("fetch_list");
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class InferencePostprocessPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/inference_process_pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
namespace ir {
void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter InferenceProcessPass::ApplyImpl";
// Get a new instance of ipu_backend
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetNewInstance();
// Set scope
auto& scope = graph->Get<Scope>(kParamScopeAttr);
ipu_backend->SetScope(scope);
// Set ipu_strategy
static std::shared_ptr<platform::ipu::IpuStrategy> ipu_strategy_instance_(
new platform::ipu::IpuStrategy());
ipu_strategy_instance_->is_training = false;
auto num_ipus = graph->Get<int>("num_ipus");
ipu_strategy_instance_->num_ipus = num_ipus;
if (num_ipus > 1) {
ipu_strategy_instance_->popart_options_.virtualGraphMode =
platform::ipu::VirtualGraphMode::Manual;
} else {
ipu_strategy_instance_->popart_options_.virtualGraphMode =
platform::ipu::VirtualGraphMode::Off;
}
auto enable_pipelining = graph->Get<bool>("enable_pipelining");
ipu_strategy_instance_->popart_options_.enablePipelining = enable_pipelining;
if (enable_pipelining) {
auto batches_per_step = graph->Get<int>("batches_per_step");
PADDLE_ENFORCE_GE(
batches_per_step, num_ipus,
platform::errors::InvalidArgument("Batched per step should be equal or "
"greater than the number of IPUs"));
ipu_strategy_instance_->batches_per_step = batches_per_step;
}
ipu_strategy_instance_->batch_size = graph->Get<int>("batch_size");
ipu_strategy_instance_->need_avg_shard = graph->Get<bool>("need_avg_shard");
ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get()));
// Get feed_list and fetch list
std::vector<std::string> feed_list = {};
std::vector<std::string> fetch_list = {};
for (auto node : graph->Nodes()) {
if (node->Name() == "feed") {
if (node->IsOp()) {
feed_list.push_back("");
}
} else if (node->Name() == "fetch") {
if (node->IsOp()) {
fetch_list.push_back("");
}
}
}
for (auto node : graph->Nodes()) {
if (node->Name() == "feed") {
if (node->IsOp()) {
feed_list[BOOST_GET_CONST(int, node->Op()->GetAttr("col"))] =
node->outputs[0]->Name();
}
} else if (node->Name() == "fetch") {
if (node->IsOp()) {
fetch_list[BOOST_GET_CONST(int, node->Op()->GetAttr("col"))] =
node->inputs[0]->Name();
}
}
}
// Run passes
std::vector<std::string> graph_pass = {"forward_graph_extract_pass",
"infer_shape_pass", "avg_shard_pass",
"popart_canonicalization_pass"};
std::vector<std::string> compile_pass = {
"ipu_inplace_pass", "ipu_graph_builder_pass", "ipu_runtime_replacer_pass",
"inference_postprocess_pass"};
for (auto pass_name : graph_pass) {
auto pass = PassRegistry::Instance().Get(pass_name);
if (pass_name == "infer_shape_pass") {
pass->Set("feed_list", new std::vector<std::string>(feed_list.begin(),
feed_list.end()));
}
pass->Apply(graph);
}
for (auto pass_name : compile_pass) {
auto pass = PassRegistry::Instance().Get(pass_name);
pass->Set("feed_list",
new std::vector<std::string>(feed_list.begin(), feed_list.end()));
pass->Set("fetch_list", new std::vector<std::string>(fetch_list.begin(),
fetch_list.end()));
pass->Apply(graph);
}
VLOG(10) << "leave InferenceProcessPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(inference_process_pass,
paddle::framework::ir::InferenceProcessPass);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class InferenceProcessPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/ipu_graph_builder_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
namespace paddle {
namespace framework {
namespace ir {
void IpuGraphBuilderPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter IpuGraphBuilderPass::ApplyImpl";
VLOG(10) << "Raw Graph: ";
VLOG(10) << DebugString(graph);
std::vector<std::string> feed_list;
feed_list = Get<std::vector<std::string>>("feed_list");
std::vector<std::string> fetch_list;
fetch_list = Get<std::vector<std::string>>("fetch_list");
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
ipu_backend->Compile(graph, feed_list, fetch_list);
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave IpuGraphBuilderPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(ipu_graph_builder_pass,
paddle::framework::ir::IpuGraphBuilderPass)
.RequirePassAttr("feed_list")
.RequirePassAttr("fetch_list");
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class IpuGraphBuilderPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/ipu_inplace_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace paddle {
namespace framework {
namespace ir {
std::string GenerateVarName(Node *node) {
return node->Name() + "_" + std::to_string(node->id());
}
void IpuInplacePass::ApplyImpl(ir::Graph *graph) const {
// use this pass after forward_graph_extract_pass
// raise error if the inplaced var both in feed_list & fetch_list
VLOG(10) << "enter IpuInplacePass::ApplyImpl";
VLOG(10) << "Raw Graph: ";
VLOG(10) << DebugString(graph);
std::vector<std::string> feed_list;
feed_list = Get<std::vector<std::string>>("feed_list");
std::vector<std::string> fetch_list;
fetch_list = Get<std::vector<std::string>>("fetch_list");
std::map<std::string, int> var_name;
for (auto *node : graph->Nodes()) {
if (node->IsVar()) {
if (var_name.find(node->Name()) == var_name.end()) {
var_name.emplace(node->Name(), 1);
} else {
var_name[node->Name()]++;
}
}
}
for (auto *node : graph->Nodes()) {
if (node->IsVar()) {
if (var_name[node->Name()] > 1) {
auto is_feed = (std::find(feed_list.begin(), feed_list.end(),
node->Name()) != feed_list.end()) &&
(node->inputs.size() == 0);
auto is_fetch = (std::find(fetch_list.begin(), fetch_list.end(),
node->Name()) != fetch_list.end()) &&
(node->outputs.size() == 0);
if (!is_feed && !is_fetch && !node->Var()->Persistable()) {
auto old_name = node->Name();
auto new_name = GenerateVarName(node);
node->RenameVar(new_name);
for (auto *op_in : node->inputs) {
op_in->Op()->RenameOutput(old_name, new_name);
}
for (auto *op_out : node->outputs) {
op_out->Op()->RenameInput(old_name, new_name);
}
}
}
}
}
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave IpuInplacePass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(ipu_inplace_pass, paddle::framework::ir::IpuInplacePass)
.RequirePassAttr("feed_list")
.RequirePassAttr("fetch_list");
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class IpuInplacePass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
void IPUPassBase::Init(const std::string& repr, Graph* graph) const {
repr_ = repr;
graph_ = graph;
}
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
namespace paddle {
namespace framework {
namespace ir {
class IPUPassBase : public Pass {
public:
void Init(const std::string& repr, Graph* graph) const;
virtual ~IPUPassBase() {}
protected:
mutable Graph* graph_;
mutable std::string repr_;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void IpuRuntimeReplacerPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter IpuRuntimeReplacerPass::ApplyImpl";
VLOG(10) << "Raw Graph: ";
VLOG(10) << DebugString(graph);
std::vector<std::string> feed_list;
feed_list = Get<std::vector<std::string>>("feed_list");
std::vector<std::string> fetch_list;
fetch_list = Get<std::vector<std::string>>("fetch_list");
framework::OpDesc ipu_rt_op_desc;
ipu_rt_op_desc.SetType("ipu_runtime");
ipu_rt_op_desc.SetInput("FeedList", feed_list);
ipu_rt_op_desc.SetOutput("FetchList", fetch_list);
ipu_rt_op_desc.Flush();
// Create a new node for the ipu_runtime_op.
auto* ipu_rt_node = graph->CreateOpNode(&ipu_rt_op_desc);
for (auto* node : graph->Nodes()) {
if (node->IsVar()) {
for (auto feed : feed_list) {
if (node->Name() == feed) {
IR_NODE_LINK_TO(node, ipu_rt_node);
}
}
for (auto fetch : fetch_list) {
if (node->Name() == fetch) {
IR_NODE_LINK_TO(ipu_rt_node, node);
}
}
}
}
// set ipu_runtime_op dtype attr
if (fetch_list.size() == 1) {
for (auto* node : graph->Nodes()) {
if (node->IsVar()) {
for (auto fetch : fetch_list) {
if (node->Name() == fetch) {
ipu_rt_node->Op()->SetAttr("dtype", node->Var()->GetDataType());
}
}
}
}
}
// Remove unneeded nodes.
std::unordered_set<const Node*> marked_nodes;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op_desc = node->Op();
if (op_desc->Type() != "ipu_runtime") {
marked_nodes.insert(node);
}
}
}
GraphSafeRemoveNodes(graph, marked_nodes);
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave IpuRuntimeReplacerPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(ipu_runtime_replacer_pass,
paddle::framework::ir::IpuRuntimeReplacerPass)
.RequirePassAttr("feed_list")
.RequirePassAttr("fetch_list");
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class IpuRuntimeReplacerPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/optimizer_extract_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
namespace paddle {
namespace framework {
namespace ir {
void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter IpuOptimizerExtractPass::ApplyImpl";
VLOG(10) << "Raw Graph: ";
VLOG(10) << DebugString(graph);
auto ipu_backend = paddle::platform::ipu::IpuBackend::GetInstance();
for (auto* node : graph->Nodes()) {
if (node->IsOp() && node->Op()) {
int op_role = BOOST_GET_CONST(
int, node->Op()->GetAttr(
framework::OpProtoAndCheckerMaker::OpRoleAttrName()));
// graph usually have multiple optimizer node for different parameter,
// and these node have the same type and attr value usually
if ((op_role == static_cast<int>(framework::OpRole::kOptimize))) {
ipu_backend->GetExecutor().SetOptimizerType(node->Op()->Type());
VLOG(10) << "found optimizer type: " << node->Op()->Type();
for (const std::string& attr_name : node->Op()->AttrNames()) {
auto attr_type = node->Op()->GetAttrType(attr_name);
// with adam, attr are float
if (attr_type == proto::AttrType::FLOAT) {
auto attr_value =
BOOST_GET_CONST(float, node->Op()->GetAttr(attr_name));
ipu_backend->GetExecutor().SetOptimizerAttr(attr_name, attr_value);
} else {
VLOG(10) << "Skip " << attr_type;
}
}
auto lr_var_name = node->Op()->Input("LearningRate");
PADDLE_ENFORCE_EQ(lr_var_name.size(), 1u,
platform::errors::InvalidArgument(
"In op(%s), find input(LearningRate) failed.",
node->Op()->Type()));
ipu_backend->GetExecutor().SetLRVarName(lr_var_name[0]);
}
if ((op_role == static_cast<int>(framework::OpRole::kLoss))) {
VLOG(10) << "found loss op type: " << node->Op()->Type();
auto outputs = node->Op()->Outputs();
PADDLE_ENFORCE_EQ(
outputs.size(), 1,
platform::errors::InvalidArgument("Can only support one loss key"));
auto losses_name = outputs.begin()->second;
PADDLE_ENFORCE_EQ(losses_name.size(), 1,
platform::errors::InvalidArgument(
"Can only support one loss name"));
ipu_backend->GetExecutor().SetLoss(losses_name[0]);
}
}
}
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave IpuOptimizerExtractPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(optimizer_extract_pass,
paddle::framework::ir::IpuOptimizerExtractPass);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class IpuOptimizerExtractPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/common.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
namespace paddle {
namespace framework {
namespace ir {
using paddle::platform::ipu::IpuBackend;
using framework::ir::Graph;
using framework::ir::Node;
void IpuOptimizerStateAlignPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter IpuOptimizerStateAlignPass::ApplyImpl";
VLOG(10) << "Raw Graph: ";
VLOG(10) << DebugString(graph);
auto ipu_backend = IpuBackend::GetInstance();
const auto* scope_ = ipu_backend->GetScope();
for (auto* node : graph->Nodes()) {
if (node->IsOp() && node->Op()) {
int op_role = BOOST_GET_CONST(
int, node->Op()->GetAttr(
framework::OpProtoAndCheckerMaker::OpRoleAttrName()));
if ((op_role == static_cast<int>(framework::OpRole::kOptimize))) {
auto inputs = node->Op()->Inputs();
if (inputs.count(platform::ipu::sBeta1Pow)) {
auto var = scope_->GetVar(inputs.at(platform::ipu::sBeta1Pow)[0]);
auto data = var->GetMutable<framework::LoDTensor>()->data<float>();
auto beta = BOOST_GET_CONST(
float, node->Op()->GetAttr(platform::ipu::sBeta1));
// ensure current save with beta1pow, rather than step.
// beta1pow = beta1 ^ (step + 1). Just set beta1pow because popart
// support single Step__
bool save_with_beta1pow = (data[0] < 1.0f) && (data[0] > 0.0f);
float step = 0;
float beta_acc = beta;
while (beta_acc > data[0] && save_with_beta1pow) {
beta_acc *= beta;
step += 1;
}
if (save_with_beta1pow) {
data[0] = step;
}
}
}
}
}
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave IpuOptimizerStateAlignPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(optimizer_state_align_pass,
paddle::framework::ir::IpuOptimizerStateAlignPass);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
/*
* This pass should only affect optimizer that need bias correction,
* include Adam/Lamb.
*/
class IpuOptimizerStateAlignPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/post_canonicalization.h"
namespace paddle {
namespace framework {
namespace ir {
using framework::ir::Graph;
using framework::ir::Node;
using platform::ipu::SymbolHandler;
void PopartCanonicalizationPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter PopartCanonicalizationPass::ApplyImpl";
VLOG(10) << "Raw Graph: ";
VLOG(10) << DebugString(graph);
auto nodes = graph->Nodes();
for (auto* node : nodes) {
if (!node->IsOp()) {
continue;
}
auto* op = node->Op();
auto op_type = op->Type();
ir::Node* new_node = nullptr;
SymbolHandler handler = platform::ipu::GetHandler(op_type);
if (handler) {
VLOG(11) << "Raw Paddle Node:";
VLOG(11) << node->Op()->Proto()->DebugString();
new_node = handler(graph, node);
VLOG(11) << "Post Popart Node:";
VLOG(11) << new_node->Op()->Proto()->DebugString();
platform::ipu::ClearNode(node);
graph->RemoveNode(node);
} else {
LOG(ERROR) << "Can not find OpHandler for op_type: " << op_type;
}
}
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave PopartCanonicalizationPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(popart_canonicalization_pass,
paddle::framework::ir::PopartCanonicalizationPass);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
class PopartCanonicalizationPass : public IPUPassBase {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
......@@ -84,13 +84,16 @@ void TransposeFlattenConcatFusePass::RunTransposeFlattenConcatFuse(
LOG(WARNING) << "Pass in op compat failed.";
return;
}
const int kNumFields = 5;
const int kTransOffset = 1;
const int kTransOutOffset = 2;
const int kFlattenOffset = 3;
const int kFlattenOutOffset = 4;
std::vector<Node *> nodes;
std::vector<Node *> nodes;
std::vector<int> trans_axis0;
int flatten_axis0;
for (int i = 0; i < times; i++) {
PADDLE_ENFORCE_NOT_NULL(
subgraph.at(pattern.GetPDNode("transpose" + std::to_string(i))),
......@@ -112,6 +115,33 @@ void TransposeFlattenConcatFusePass::RunTransposeFlattenConcatFuse(
platform::errors::NotFound("Can not find %s in subgraph.",
input_nodes[i]->name()));
if (i == 0) {
trans_axis0 = BOOST_GET_CONST(
std::vector<int>,
subgraph.at(pattern.GetPDNode("transpose" + std::to_string(0)))
->Op()
->GetAttr("axis"));
flatten_axis0 = BOOST_GET_CONST(
int, subgraph.at(pattern.GetPDNode("flatten" + std::to_string(0)))
->Op()
->GetAttr("axis"));
} else {
std::vector<int> trans_axis = BOOST_GET_CONST(
std::vector<int>,
subgraph.at(pattern.GetPDNode("transpose" + std::to_string(i)))
->Op()
->GetAttr("axis"));
// All axis of transpose should be the same
if (trans_axis0 != trans_axis) return;
int flatten_axis = BOOST_GET_CONST(
int, subgraph.at(pattern.GetPDNode("flatten" + std::to_string(0)))
->Op()
->GetAttr("axis"));
// All axis of flatten should be the same
if (flatten_axis0 != flatten_axis) return;
}
nodes.push_back(subgraph.at(input_nodes[i]));
nodes.push_back(
subgraph.at(pattern.GetPDNode("transpose" + std::to_string(i))));
......
......@@ -327,6 +327,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
#define REGISTER_OP_IPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, IPU, ::paddle::platform::IPUPlace, __VA_ARGS__)
#define REGISTER_OP_XPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)
......
......@@ -2,7 +2,7 @@ cc_library(cinn_cache_key SRCS cinn_cache_key.cc DEPS boost graph graph_helper l
cc_library(build_cinn_pass SRCS build_cinn_pass.cc DEPS pass subgraph_detector graph_pattern_detector cinn_compiler errors enforce)
cc_library(transform_desc SRCS transform_desc.cc DEPS proto_desc cinn)
cc_library(cinn_graph_symbolization SRCS cinn_graph_symbolization.cc DEPS lod_tensor graph transform_desc cinn)
cc_library(cinn_compiler SRCS cinn_compiler.cc DEPS framework_proto graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn)
cc_library(cinn_compiler SRCS cinn_compiler.cc DEPS framework_proto graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn cinn_launch_context)
if (WITH_TESTING)
cc_test(cinn_lib_test SRCS cinn_lib_test.cc DEPS cinn)
......
......@@ -29,55 +29,32 @@ namespace paddle {
namespace framework {
namespace paddle2cinn {
using GraphHashStrategy = CinnCacheKey::GraphHashStrategy;
CinnCacheKey::CinnCacheKey(GraphHashStrategy graph_hash)
: graph_hash_(graph_hash) {}
CinnCacheKey::CinnCacheKey(
const ir::Graph& graph,
const std::map<std::string, const LoDTensor*>& input_tensors,
const std::string& arch_str) {
const std::string& arch_str, GraphHashStrategy graph_hash)
: graph_hash_(graph_hash) {
this->SetKey(graph, input_tensors, arch_str);
}
CinnCacheKey::CinnCacheKey(const ir::Graph& graph,
const std::map<std::string, DDim>& input_shapes,
const std::string& arch_str) {
const std::string& arch_str,
GraphHashStrategy graph_hash)
: graph_hash_(graph_hash) {
this->SetKey(graph, input_shapes, arch_str);
}
size_t CinnCacheKey::HashGraph(const ir::Graph& graph) {
// using Dot to unqiue graph
inference::analysis::Dot dot;
std::unordered_map<const ir::Node*, std::string> node2dot;
int id = 0;
// Create nodes
// graph.Nodes() return unordered_set, the same graph may
// return different result?
for (const ir::Node* n : graph.Nodes()) {
std::string node_id = std::to_string(id++);
dot.AddNode(node_id, {}, n->Name(), true);
node2dot[n] = node_id;
}
// Create edges
for (const ir::Node* n : graph.Nodes()) {
const auto& src_id = node2dot.at(n);
for (auto* out : n->outputs) {
const auto& dest_id = node2dot.at(out);
dot.AddEdge(src_id, dest_id, {});
}
}
const std::string& viz_graph = dot.Build();
VLOG(1) << "The hash graph:\n" << viz_graph;
size_t hash_val = std::hash<std::string>()(viz_graph);
VLOG(4) << "The graph's hash value is: " << hash_val;
return hash_val;
}
void CinnCacheKey::SetKey(
const ir::Graph& graph,
const std::map<std::string, const LoDTensor*>& input_tensors,
const std::string& arch_str) {
graph_serialize_str_ = std::to_string(HashGraph(graph));
graph_hash_val_ = graph_hash_(graph);
for (const auto& name_tensor : input_tensors) {
input_shapes_[name_tensor.first] = name_tensor.second->dims();
}
......@@ -87,7 +64,7 @@ void CinnCacheKey::SetKey(
void CinnCacheKey::SetKey(const ir::Graph& graph,
const std::map<std::string, DDim>& input_shapes,
const std::string& arch_str) {
graph_serialize_str_ = std::to_string(HashGraph(graph));
graph_hash_val_ = graph_hash_(graph);
input_shapes_ = input_shapes;
arch_str_ = arch_str;
}
......@@ -97,7 +74,7 @@ bool CinnCacheKey::operator!=(const CinnCacheKey& other) const {
}
bool CinnCacheKey::operator==(const CinnCacheKey& other) const {
return graph_serialize_str_ == other.graph_serialize_str_ &&
return graph_hash_val_ == other.graph_hash_val_ &&
input_shapes_ == other.input_shapes_ && arch_str_ == other.arch_str_;
}
......@@ -114,11 +91,48 @@ size_t CinnCacheKey::Hash::operator()(const CinnCacheKey& key) const {
ret = hash_combine(ret, string_hasher(name_shape.second.to_str()));
}
ret = hash_combine(ret, string_hasher(key.graph_serialize_str_));
ret = hash_combine(ret, key.graph_hash_val_);
ret = hash_combine(ret, string_hasher(key.arch_str_));
return ret;
}
size_t CinnCacheKeyByStructure::HashGraph(const ir::Graph& graph) {
// sort grad node by name and id.
auto compare = [](ir::Node* n1, ir::Node* n2) {
return (n1->Name() == n2->Name()) ? (n1->id() < n2->id())
: (n1->Name() < n2->Name());
};
// graph.Nodes() return unordered_set, here using set to avoid the same graph
// may return different result
std::set<ir::Node *, bool (*)(ir::Node *, ir::Node *)> node_set(compare),
output_set(compare);
node_set.insert(graph.Nodes().begin(), graph.Nodes().end());
std::string hash_str;
for (ir::Node* n : node_set) {
hash_str.append(n->Name());
output_set.clear();
output_set.insert(n->outputs.begin(), n->outputs.end());
for (auto* out : output_set) {
hash_str.append(out->Name());
}
}
VLOG(1) << "The hash graph:\n" << hash_str;
size_t hash_val = std::hash<std::string>()(hash_str);
VLOG(4) << "The graph's hash value by graph structure is: " << hash_val;
return hash_val;
}
size_t CinnCacheKeyByAddress::HashGraph(const ir::Graph& graph) {
size_t hash_val = reinterpret_cast<size_t>(&graph);
VLOG(4) << "The graph's hash value by graph address is: " << hash_val;
return hash_val;
}
} // namespace paddle2cinn
} // namespace framework
} // namespace paddle
......@@ -14,6 +14,7 @@
#pragma once
#include <functional>
#include <map>
#include "paddle/fluid/framework/ddim.h"
......@@ -33,14 +34,18 @@ namespace paddle2cinn {
// shapes.
class CinnCacheKey {
public:
using GraphHashStrategy = std::function<size_t(const ir::Graph&)>;
explicit CinnCacheKey(GraphHashStrategy graph_hash);
CinnCacheKey(const ir::Graph& graph,
const std::map<std::string, const LoDTensor*>& input_tensors,
const std::string& arch_str);
const std::string& arch_str, GraphHashStrategy graph_hash);
CinnCacheKey(const ir::Graph& graph,
const std::map<std::string, DDim>& input_shapes,
const std::string& arch_str);
const std::string& arch_str, GraphHashStrategy graph_hash);
~CinnCacheKey() {}
~CinnCacheKey() = default;
void SetKey(const ir::Graph& graph,
const std::map<std::string, const LoDTensor*>& input_tensors,
......@@ -58,13 +63,38 @@ class CinnCacheKey {
};
private:
size_t HashGraph(const ir::Graph& graph);
std::string graph_serialize_str_;
GraphHashStrategy graph_hash_;
size_t graph_hash_val_;
std::map<std::string, DDim> input_shapes_;
std::string arch_str_;
};
#define CINN_CACHE_KEY_CREATE(NAME) \
class NAME : public CinnCacheKey { \
public: \
NAME() : CinnCacheKey(HashGraph) {} \
\
NAME(const ir::Graph& graph, \
const std::map<std::string, const LoDTensor*>& input_tensors, \
const std::string& arch_str) \
: CinnCacheKey(graph, input_tensors, arch_str, HashGraph) {} \
\
NAME(const ir::Graph& graph, \
const std::map<std::string, DDim>& input_shapes, \
const std::string& arch_str) \
: CinnCacheKey(graph, input_shapes, arch_str, HashGraph) {} \
\
private: \
static size_t HashGraph(const ir::Graph& graph); \
};
// Class to store the keys by graph address for compiling CINN.
CINN_CACHE_KEY_CREATE(CinnCacheKeyByAddress)
// Class to store the keys by graph structure for compiling CINN.
CINN_CACHE_KEY_CREATE(CinnCacheKeyByStructure)
#undef CINN_CACHE_KEY_CREATE
} // namespace paddle2cinn
} // namespace framework
} // namespace paddle
......@@ -26,8 +26,8 @@ namespace paddle {
namespace framework {
namespace paddle2cinn {
TEST(CinnCacheKeyTest, TestAsUnorderedKey) {
std::unordered_set<CinnCacheKey, CinnCacheKey::Hash> test_set;
TEST(CinnCacheKeyTest, TestAsUnorderedKeyByStructure) {
std::unordered_set<CinnCacheKeyByStructure, CinnCacheKey::Hash> test_set;
ProgramDesc empty_program;
ir::Graph empty_graph(empty_program);
......@@ -47,19 +47,20 @@ TEST(CinnCacheKeyTest, TestAsUnorderedKey) {
DDim ddim = paddle::framework::make_ddim({1, 2, 3});
std::map<std::string, DDim> feed_shapes = {{"X", ddim}};
CinnCacheKey cache_key0(empty_graph, feed_tensors, "x86");
CinnCacheKey cache_key1(empty_graph, feed_shapes, "x86");
CinnCacheKeyByStructure cache_key0(empty_graph, feed_tensors, "x86");
CinnCacheKeyByStructure cache_key1(empty_graph, feed_shapes, "x86");
EXPECT_EQ(cache_key0, cache_key1);
CinnCacheKey cache_key2(graph, feed_shapes, "x86");
CinnCacheKey cache_key3(graph, feed_shapes, "nvgpu");
CinnCacheKey cache_key4(graph, feed_tensors, "nvgpu");
CinnCacheKeyByStructure cache_key2(graph, feed_shapes, "x86");
CinnCacheKeyByStructure cache_key3(graph, feed_shapes, "nvgpu");
CinnCacheKeyByStructure cache_key4(graph, feed_tensors, "nvgpu");
EXPECT_NE(cache_key2, cache_key3);
EXPECT_EQ(cache_key3, cache_key4);
CinnCacheKey cache_key5(empty_graph,
std::map<std::string, const LoDTensor *>(), "unk");
CinnCacheKey cache_key6(empty_graph, std::map<std::string, DDim>(), "unk");
CinnCacheKeyByStructure cache_key5(
empty_graph, std::map<std::string, const LoDTensor *>(), "unk");
CinnCacheKeyByStructure cache_key6(empty_graph, std::map<std::string, DDim>(),
"unk");
EXPECT_EQ(cache_key5, cache_key6);
EXPECT_NE(cache_key1, cache_key3);
......@@ -98,6 +99,107 @@ TEST(CinnCacheKeyTest, TestAsUnorderedKey) {
EXPECT_EQ(test_set.find(cache_key6), test_set.end());
}
TEST(CinnCacheKeyTest, TestAsUnorderedKeyByAddress) {
std::unordered_set<CinnCacheKeyByAddress, CinnCacheKey::Hash> test_set;
ProgramDesc empty_program;
ir::Graph empty_graph(empty_program);
ProgramDesc program;
auto *global_block = program.MutableBlock(0);
auto *x = global_block->Var("X");
x->SetType(proto::VarType::LOD_TENSOR);
ir::Graph graph(program);
LoDTensor tensor;
tensor.Resize({1, 2, 3});
const LoDTensor *tensor_pointer = &tensor;
std::map<std::string, const LoDTensor *> feed_tensors = {
{"X", tensor_pointer}};
DDim ddim = paddle::framework::make_ddim({1, 2, 3});
std::map<std::string, DDim> feed_shapes = {{"X", ddim}};
CinnCacheKeyByAddress cache_key0(empty_graph, feed_tensors, "x86");
CinnCacheKeyByAddress cache_key1(empty_graph, feed_shapes, "x86");
EXPECT_EQ(cache_key0, cache_key1);
CinnCacheKeyByAddress cache_key2(graph, feed_shapes, "x86");
CinnCacheKeyByAddress cache_key3(graph, feed_shapes, "nvgpu");
CinnCacheKeyByAddress cache_key4(graph, feed_tensors, "nvgpu");
EXPECT_NE(cache_key2, cache_key3);
EXPECT_EQ(cache_key3, cache_key4);
CinnCacheKeyByAddress cache_key5(
empty_graph, std::map<std::string, const LoDTensor *>(), "unk");
CinnCacheKeyByAddress cache_key6(empty_graph, std::map<std::string, DDim>(),
"unk");
EXPECT_EQ(cache_key5, cache_key6);
EXPECT_NE(cache_key1, cache_key3);
EXPECT_NE(cache_key4, cache_key2);
EXPECT_NE(cache_key3, cache_key5);
EXPECT_NE(cache_key6, cache_key4);
EXPECT_NE(cache_key5, cache_key1);
EXPECT_NE(cache_key2, cache_key6);
test_set.insert(cache_key0);
test_set.insert(cache_key1);
test_set.insert(cache_key3);
test_set.insert(cache_key4);
test_set.insert(cache_key5);
test_set.insert(cache_key6);
EXPECT_EQ(test_set.size(), 3U);
auto iter = test_set.find(cache_key0);
EXPECT_NE(iter, test_set.end());
test_set.erase(iter);
EXPECT_EQ(test_set.size(), 2U);
EXPECT_EQ(test_set.find(cache_key1), test_set.end());
iter = test_set.find(cache_key3);
EXPECT_NE(iter, test_set.end());
test_set.erase(iter);
EXPECT_EQ(test_set.size(), 1U);
EXPECT_EQ(test_set.find(cache_key4), test_set.end());
iter = test_set.find(cache_key5);
EXPECT_NE(iter, test_set.end());
test_set.erase(iter);
EXPECT_EQ(test_set.size(), 0U);
EXPECT_EQ(test_set.find(cache_key6), test_set.end());
}
TEST(CinnCacheKeyTest, TestSameGraph) {
ProgramDesc program1;
auto *global_block1 = program1.MutableBlock(0);
auto *x1 = global_block1->Var("X");
x1->SetType(proto::VarType::LOD_TENSOR);
ir::Graph graph1(program1);
ProgramDesc program2;
auto *global_block2 = program2.MutableBlock(0);
auto *x2 = global_block2->Var("X");
x2->SetType(proto::VarType::LOD_TENSOR);
ir::Graph graph2(program2);
LoDTensor tensor;
tensor.Resize({1, 2, 3});
const LoDTensor *tensor_pointer = &tensor;
std::map<std::string, const LoDTensor *> feed_tensors = {
{"X", tensor_pointer}};
CinnCacheKeyByAddress cache_key_by_address1(graph1, feed_tensors, "x86");
CinnCacheKeyByAddress cache_key_by_address2(graph2, feed_tensors, "x86");
EXPECT_NE(cache_key_by_address1, cache_key_by_address2);
CinnCacheKeyByStructure cache_key_by_struct1(graph1, feed_tensors, "x86");
CinnCacheKeyByStructure cache_key_by_struct2(graph2, feed_tensors, "x86");
EXPECT_EQ(cache_key_by_struct1, cache_key_by_struct2);
}
} // namespace paddle2cinn
} // namespace framework
} // namespace paddle
......@@ -41,6 +41,7 @@
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/operators/cinn/cinn_launch_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/string_helper.h"
......@@ -68,23 +69,41 @@ const CinnCompiledObject& CinnCompiler::Compile(
const std::map<std::string, const LoDTensor*>& input_tensors,
const Target& target, void* stream) {
VLOG(1) << "-- The graph to be compiled is:\n" << VizGraph(graph);
CinnCacheKey cur_key(graph, input_tensors, target.arch_str());
CinnCacheKeyByAddress cur_key_by_address(graph, input_tensors,
target.arch_str());
CinnCacheKeyByStructure cur_key_by_struct;
bool exist = false;
{
AutoRDLock r_guard{&rwlock_};
exist = cache_.count(cur_key) != 0;
exist = cache_by_address_.count(cur_key_by_address) != 0;
// if cannot find graph by address, checkout whether the graph structure
// have been stored in cache.
if (!exist) {
// generate the structure cache key
cur_key_by_struct.SetKey(graph, input_tensors, target.arch_str());
// if the graph structure can be found, storing the graph address in
// cache for next query.
if (cache_by_struct_.count(cur_key_by_struct) != 0) {
exist = true;
cache_by_address_[cur_key_by_address] =
cache_by_struct_.at(cur_key_by_struct).get();
}
}
}
if (!exist) {
std::int64_t compiled_num = real_compiled_num_.fetch_add(1);
auto compiled_res =
CompileGraph(graph, input_tensors, target, compiled_num, stream);
AutoWRLock w_guard{&rwlock_};
if (!cache_.count(cur_key)) {
cache_[cur_key] = std::move(compiled_res);
if (!cache_by_struct_.count(cur_key_by_struct)) {
cache_by_address_[cur_key_by_address] = compiled_res.get();
cache_by_struct_[cur_key_by_struct] = std::move(compiled_res);
}
}
AutoRDLock guard{&rwlock_};
const auto& cached_boj = *cache_[cur_key];
const auto& cached_boj = *cache_by_address_[cur_key_by_address];
return cached_boj;
}
......@@ -181,7 +200,8 @@ void CinnCompiler::Clear() {
{
AutoWRLock guard{&rwlock_};
graphs_.clear();
cache_.clear();
cache_by_address_.clear();
cache_by_struct_.clear();
}
real_compiled_num_.store(0);
}
......@@ -217,6 +237,9 @@ std::unique_ptr<CinnCompiledObject> CinnCompiler::CompileGraph(
*compiled_obj = {std::move(graph_compiler),
std::move(compiled_res.runtime_program), scope,
symbol.var_model_to_program_map()};
compiled_obj->launch_context =
std::make_unique<operators::details::CinnLaunchContext>(
compiled_obj->paddle2cinn_varmap, compiled_obj->scope);
return compiled_obj;
}
......
......@@ -31,6 +31,13 @@
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace operators {
namespace details {
class CinnLaunchContext;
} // namespace details
} // namespace operators
namespace framework {
namespace paddle2cinn {
......@@ -39,6 +46,7 @@ struct CinnCompiledObject {
std::unique_ptr<::cinn::hlir::framework::Program> runtime_program;
std::shared_ptr<::cinn::hlir::framework::Scope> scope;
std::unordered_map<std::string, std::string> paddle2cinn_varmap;
std::unique_ptr<operators::details::CinnLaunchContext> launch_context;
};
// Entrance to use CINN.
......@@ -87,9 +95,12 @@ class CinnCompiler {
void* stream = nullptr) const;
std::unordered_map<std::string, std::unique_ptr<ir::Graph>> graphs_;
std::unordered_map<CinnCacheKey, std::unique_ptr<CinnCompiledObject>,
std::unordered_map<CinnCacheKeyByAddress, CinnCompiledObject*,
CinnCacheKey::Hash>
cache_;
cache_by_address_;
std::unordered_map<CinnCacheKeyByStructure,
std::unique_ptr<CinnCompiledObject>, CinnCacheKey::Hash>
cache_by_struct_;
std::atomic_int64_t real_compiled_num_{0};
mutable RWLock rwlock_;
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include <sstream>
#include "paddle/fluid/framework/pten_utils.h"
#include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_factory.h"
#include "paddle/fluid/framework/lod_tensor.h"
......@@ -190,8 +191,9 @@ KernelArgsNameMakerByOpProto::GetAttrsArgsNames() {
}
KernelSignature KernelArgsNameMakerByOpProto::GetKernelSignature() {
return KernelSignature(op_proto_->type(), GetInputArgsNames(),
GetAttrsArgsNames(), GetOutputArgsNames());
return KernelSignature(pten::TransToPtenKernelName(op_proto_->type()),
GetInputArgsNames(), GetAttrsArgsNames(),
GetOutputArgsNames());
}
std::string KernelSignatureToString(const KernelSignature& signature) {
......
......@@ -76,6 +76,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_IPU
else if (platform::is_ipu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) &&
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else if (platform::is_ipu_place(src_place) &&
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
}
#endif
#ifdef PADDLE_WITH_XPU
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
......@@ -386,17 +402,33 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_IPU
else if (platform::is_ipu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
#ifdef PADDLE_WITH_XPU
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
}
else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
}
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
......@@ -404,7 +436,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
}
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size);
} else { // NOLINT
}
else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
......@@ -571,6 +604,11 @@ class AnyVisitor : public boost::static_visitor<bool> {
platform::errors::Unimplemented("Not supported on place (%s) ", npu));
// return GetResultHelper(out, npu);
}
bool GetResult(const framework::Tensor& out,
const platform::IPUPlace& ipu) const {
PADDLE_THROW(
platform::errors::Unimplemented("Not supported on place (%s) ", ipu));
}
bool GetResult(const framework::Tensor& out,
const platform::NPUPinnedPlace& cpu) const {
......@@ -762,6 +800,9 @@ struct BothFalseVisitor : public boost::static_visitor<> {
void VisitorImpl(const platform::XPUPlace& xpu) const {
PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported"));
}
void VisitorImpl(const platform::IPUPlace& ipu) const {
PADDLE_THROW(platform::errors::Unimplemented("IPUPlace is not supported"));
}
void VisitorImpl(const platform::CUDAPlace& gpu) const {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
......
......@@ -155,6 +155,13 @@ class TensorAddFunctor : public boost::static_visitor<> {
"is not supported in imperative mode",
place));
}
// there is NO support in IPUPlace
void operator()(const platform::IPUPlace& place) {
PADDLE_THROW(platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
private:
int64_t numel_;
......
......@@ -487,6 +487,14 @@ static void PreparedOpRunImpl(
op.Type(), outs, dev_ctx->GetPlace());
}
if (FLAGS_benchmark) {
dev_ctx->Wait();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_ENFORCE_GPU_SUCCESS(platform::GpuGetLastError());
VLOG(4) << "Operator(" << op.Type() << "): context wait and get last error";
#endif
}
/**
* [ Why need handle complex gradient to real gradient? ]
*
......
......@@ -211,70 +211,6 @@ void SplitTensorsWithType<platform::XPUDeviceContext>(
}
#endif
// NOTE(liubo48): Only implement operators::math::SplitFunctor for npu now.
// If later the operators::StridedMemcpyWithAxis0 is supported,
// then this specific SplitTensorsForAllReduce can be removed.
#ifdef PADDLE_WITH_ASCEND_CL
template <>
void SplitTensorsForAllReduce<platform::NPUDeviceContext, float>(
const platform::NPUDeviceContext &context,
framework::Variable *p_dense_contents,
std::vector<framework::Tensor> *p_dense_tensors) {
auto *in = p_dense_contents->GetMutable<framework::LoDTensor>();
std::vector<framework::Tensor *> outs;
std::vector<const framework::Tensor *> shape_refer;
outs.reserve(p_dense_tensors->size());
shape_refer.reserve(p_dense_tensors->size());
for (auto &tensor : *p_dense_tensors) {
outs.emplace_back(&tensor);
shape_refer.emplace_back(&tensor);
}
operators::math::SplitFunctor<platform::NPUDeviceContext, float>
split_functor_;
split_functor_(context, *in, shape_refer, 0, &outs);
}
template <>
void ConcatTensorsWithType<platform::NPUDeviceContext>(
const platform::NPUDeviceContext &context,
const std::vector<framework::Tensor> &dense_tensors_,
framework::Variable *p_dense_contents,
framework::proto::VarType::Type type) {
switch (type) {
case framework::proto::VarType::FP32:
ConcatTensorsForAllReduce<platform::NPUDeviceContext, float>(
context, dense_tensors_, p_dense_contents);
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Data type (%s) is not supported when it concats tensors for "
"allreduce.",
framework::DataTypeToString(type)));
}
}
template <>
void SplitTensorsWithType<platform::NPUDeviceContext>(
const platform::NPUDeviceContext &context,
framework::Variable *p_dense_contents,
std::vector<framework::Tensor> *p_dense_tensors,
framework::proto::VarType::Type type) {
switch (type) {
case framework::proto::VarType::FP32:
SplitTensorsForAllReduce<platform::NPUDeviceContext, float>(
context, p_dense_contents, p_dense_tensors);
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Data type (%s) is not supported when it splits tensors for "
"allreduce.",
framework::DataTypeToString(type)));
}
}
#endif
void Group::ConcatTensors(const platform::DeviceContext &context) {
auto place = context.GetPlace();
if (platform::is_gpu_place(place)) {
......
......@@ -348,13 +348,14 @@ class AllocatorFacadePrivate {
const AllocatorMap& GetAllocatorMap() {
#ifdef PADDLE_WITH_CUDA
if (UNLIKELY(platform::CUDAGraph::IsCapturing())) {
if (UNLIKELY(platform::CUDAGraph::IsThisThreadCapturing())) {
auto id = platform::CUDAGraph::CapturingID();
auto iter = cuda_graph_allocator_map_.find(id);
PADDLE_ENFORCE_NE(
iter, cuda_graph_allocator_map_.end(),
platform::errors::PermissionDenied(
"No memory pool is prepared for CUDA Graph capturing."));
VLOG(10) << "Choose CUDA Graph memory pool to allocate memory";
return iter->second->allocators_;
} else {
return allocators_;
......@@ -405,7 +406,7 @@ class AllocatorFacadePrivate {
#if defined(PADDLE_WITH_HIP)
auto cuda_allocator = std::make_shared<CUDAAllocator>(p);
cuda_allocators_[p][stream] = std::make_shared<AutoGrowthBestFitAllocator>(
cuda_allocator, platform::GpuMinChunkSize(), allow_free_idle_chunk_);
cuda_allocator, platform::GpuMinChunkSize(), 0, allow_free_idle_chunk_);
#endif
#if defined(PADDLE_WITH_CUDA)
......
......@@ -116,6 +116,34 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
return GetCPUBuddyAllocator()->Used();
}
// For Graphcore IPU
template <>
void *Alloc<platform::IPUPlace>(const platform::IPUPlace &place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
VLOG(10) << "IPUPlace, Allocate on cpu.";
void *p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size);
}
VLOG(10) << " pointer=" << p;
return p;
}
template <>
void Free<platform::IPUPlace>(const platform::IPUPlace &place, void *p,
size_t size) {
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p);
}
template <>
uint64_t Release<platform::IPUPlace>(const platform::IPUPlace &place) {
return GetCPUBuddyAllocator()->Release();
}
template <>
size_t Used<platform::IPUPlace>(const platform::IPUPlace &place) {
return GetCPUBuddyAllocator()->Used();
}
// For kunlun XPU
template <>
void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
......
......@@ -32,9 +32,34 @@ CinnLaunchContext::CinnLaunchContext(
[](const auto& name_view) { return std::string(name_view.data()); });
}
bool CinnLaunchContext::IsVariableUsed(const std::string& paddle_name) {
return paddle2cinn_varmap_.count(paddle_name) > 0 &&
cinn_variable_names_.count(paddle2cinn_varmap_.at(paddle_name)) > 0;
void CinnLaunchContext::UpdateCapturedEnv(const framework::Scope& scope,
const platform::Place& place) {
if (std::addressof(scope) == cached_scope_ &&
std::addressof(place) == cached_place_) {
VLOG(4) << "Captured scope:" << cached_scope_ << ", place:" << cached_place_
<< " are not changed";
return;
}
cached_scope_ = std::addressof(scope);
cached_place_ = std::addressof(place);
cached_temp_scope_ = scope.NewTmpScope();
VLOG(4) << "Captured env is update, scope:" << cached_scope_ << "->"
<< std::addressof(scope) << ", place:" << cached_place_ << "->"
<< std::addressof(place);
}
bool CinnLaunchContext::IsArgumentsInitialized() const {
if (hold_buffers_.empty() || name2argument_.empty()) {
return false;
}
return true;
}
bool CinnLaunchContext::IsVariableUsed(
const std::string& paddle_var_name) const {
return paddle2cinn_varmap_.count(paddle_var_name) > 0 &&
cinn_variable_names_.count(paddle2cinn_varmap_.at(paddle_var_name)) >
0;
}
CinnTensor CinnLaunchContext::GetCinnTensor(const std::string& var_name) {
......@@ -53,99 +78,101 @@ std::unordered_set<std::string> CinnLaunchContext::GetInternalVariableNames() {
return all_parameters;
}
void CinnLaunchContext::CheckTensorEquivalent(const std::string& paddle_name,
const LoDTensor& paddle_tensor,
const CinnTensor& cinn_tensor) {
void CinnLaunchContext::CheckTensorEquivalent(
const std::string& paddle_var_name, const LoDTensor& paddle_tensor,
const CinnTensor& cinn_tensor) {
// check dimension
auto cinn_dims = framework::make_ddim(cinn_tensor->shape().data());
PADDLE_ENFORCE_EQ(paddle_tensor.dims(), cinn_dims,
platform::errors::PreconditionNotMet(
"Tensors' shape in variable(%s) are not equivalent, "
"paddle's shape = [%s], but cinn's shape = [%s].",
paddle_name, paddle_tensor.dims(), cinn_dims));
paddle_var_name, paddle_tensor.dims(), cinn_dims));
// TODO(CtfGo): check the underlying data type after CINN ready
}
void CinnLaunchContext::AssignExternalVariable(const std::string& paddle_name,
const platform::Place& place,
LoDTensor* paddle_tensor) {
PADDLE_ENFORCE_EQ(IsVariableUsed(paddle_name), true,
platform::errors::InvalidArgument(
"Paddle variable(%s) not used by cinn", paddle_name));
const auto& cinn_name = paddle2cinn_varmap_.at(paddle_name);
CinnTensor cinn_tensor = GetCinnTensor(cinn_name);
if (!paddle_tensor->IsInitialized()) {
paddle_tensor->Resize(framework::make_ddim(cinn_tensor->shape().data()));
void CinnLaunchContext::AssignExternalVariable(
const std::string& paddle_var_name) {
PADDLE_ENFORCE_EQ(
IsVariableUsed(paddle_var_name), true,
platform::errors::InvalidArgument("Paddle variable(%s) not used by cinn",
paddle_var_name));
const auto& cinn_var_name = paddle2cinn_varmap_.at(paddle_var_name);
const auto& paddle_tensor =
cached_scope_->GetVar(paddle_var_name)->Get<LoDTensor>();
CinnTensor cinn_tensor = GetCinnTensor(cinn_var_name);
if (paddle_tensor.IsInitialized()) {
CheckTensorEquivalent(paddle_var_name, paddle_tensor, cinn_tensor);
}
CheckTensorEquivalent(paddle_name, *paddle_tensor, cinn_tensor);
return SetArgument(cinn_name, place, /* free_mem_callback = */ false,
paddle_tensor);
}
void CinnLaunchContext::AssignInternalVariable(const std::string& cinn_name,
const platform::Place& place,
LoDTensor* paddle_tensor) {
PADDLE_ENFORCE_GT(cinn_variable_names_.count(cinn_name), 0,
platform::errors::InvalidArgument(
"Variable(%s) not found in cinn socpe.", cinn_name));
CinnTensor cinn_tensor = GetCinnTensor(cinn_name);
if (!paddle_tensor->IsInitialized()) {
paddle_tensor->Resize(framework::make_ddim(cinn_tensor->shape().data()));
}
CheckTensorEquivalent(cinn_name, *paddle_tensor, cinn_tensor);
return SetArgument(cinn_name, place, /* free_mem_callback = */ true,
paddle_tensor);
}
auto cinn_buffer = std::make_unique<cinn_buffer_t>();
// assign dimensions and alloc/free callback of cinn_buffer_t
cinn_buffer->resize(cinn_tensor->shape().data().data(),
cinn_tensor->shape().data().size());
cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
[this, paddle_var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor =
cached_scope_->GetVar(paddle_var_name)->GetMutable<LoDTensor>();
tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
buffer->memory = reinterpret_cast<uint8_t*>(
tensor->mutable_data<float>(*cached_place_));
return 0;
});
std::unique_ptr<cinn_buffer_t> CinnLaunchContext::ShareTensorWithCinnBuffer(
const platform::Place& place, bool free_mem_callback, LoDTensor* tensor) {
// convert paddle dimensions array to cinn format
std::vector<cinn_dimension_t> cinn_dims(tensor->dims().size());
for (auto i = 0; i < tensor->dims().size(); ++i) {
cinn_dims[i] = static_cast<cinn_dimension_t>(tensor->dims().at(i));
}
// external variables will be recycled by global gc, so do nothing here
cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
[](void* ctx, cinn_buffer_t* buffer) {
// Do nothing
return 0;
});
return SetArgument(cinn_var_name, std::move(cinn_buffer));
}
void CinnLaunchContext::AssignInternalVariable(
const std::string& cinn_var_name) {
PADDLE_ENFORCE_GT(
cinn_variable_names_.count(cinn_var_name), 0,
platform::errors::InvalidArgument("Variable(%s) not found in cinn socpe.",
cinn_var_name));
CinnTensor cinn_tensor = GetCinnTensor(cinn_var_name);
auto cinn_buffer = std::make_unique<cinn_buffer_t>();
// assign size and memory
cinn_buffer->resize(cinn_dims.data(), cinn_dims.size());
// assign dimensions and alloc/free callback of cinn_buffer_t
cinn_buffer->resize(cinn_tensor->shape().data().data(),
cinn_tensor->shape().data().size());
cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
[place, tensor](void* ctx, cinn_buffer_t* buffer) {
buffer->memory =
reinterpret_cast<uint8_t*>(tensor->mutable_data<float>(place));
[this, cinn_var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor =
cached_temp_scope_->Var(cinn_var_name)->GetMutable<LoDTensor>();
tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
buffer->memory = reinterpret_cast<uint8_t*>(
tensor->mutable_data<float>(*cached_place_));
return 0;
});
if (free_mem_callback) {
cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
[tensor](void* ctx, cinn_buffer_t* buffer) {
tensor->clear();
return 0;
});
return cinn_buffer;
}
// internal variables should release its buffer immediately
// if no instruction use it
cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
[](void* ctx, cinn_buffer_t* buffer) {
// Do nothing
[this, cinn_var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor =
cached_temp_scope_->GetVar(cinn_var_name)->GetMutable<LoDTensor>();
tensor->clear();
return 0;
});
return cinn_buffer;
return SetArgument(cinn_var_name, std::move(cinn_buffer));
}
void CinnLaunchContext::SetArgument(const std::string& cinn_name,
const platform::Place& place,
bool free_mem_callback,
LoDTensor* paddle_tensor) {
auto buffer =
ShareTensorWithCinnBuffer(place, free_mem_callback, paddle_tensor);
name2argument_.emplace(cinn_name, buffer.get());
void CinnLaunchContext::SetArgument(const std::string& cinn_var_name,
std::unique_ptr<cinn_buffer_t>&& buffer) {
VLOG(4) << "SetArgument-" << name2argument_.size() << ": name("
<< cinn_var_name << "), dims("
<< framework::DDim(buffer->dims, buffer->dimensions) << ").";
name2argument_.emplace(cinn_var_name, buffer.get());
hold_buffers_.emplace_back(std::move(buffer));
VLOG(4) << "SetArgument-" << name2argument_.size() << ": "
<< "name(" << cinn_name << "), dims(" << paddle_tensor->dims()
<< ").";
}
const std::map<std::string, cinn_pod_value_t>&
......
......@@ -24,7 +24,7 @@
#include "cinn/runtime/cinn_runtime.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/framework/scope.h"
namespace paddle {
namespace operators {
......@@ -40,16 +40,22 @@ class CinnLaunchContext {
const std::unordered_map<std::string, std::string>& paddle2cinn_varmap,
const std::shared_ptr<CinnScope>& cinn_scope);
// explicitly update several environment variables captured
// by callback of execution arguments
void UpdateCapturedEnv(const framework::Scope& scope,
const platform::Place& place);
// Return whether execution arguments has been initialized
bool IsArgumentsInitialized() const;
// Return whether a Paddle variable used on compiled kernels
bool IsVariableUsed(const std::string& var_name);
bool IsVariableUsed(const std::string& paddle_var_name) const;
// Assign tensor buffer to input or output variables
void AssignExternalVariable(const std::string& var_name,
const platform::Place& place, LoDTensor* tensor);
void AssignExternalVariable(const std::string& paddle_var_name);
// Assign tensor buffer to internal variables
void AssignInternalVariable(const std::string& var_name,
const platform::Place& place, LoDTensor* tensor);
void AssignInternalVariable(const std::string& cinn_var_name);
// Extract internal variable names from CinnScope
// by excluding used input and output variables
......@@ -58,10 +64,6 @@ class CinnLaunchContext {
// Finalize all execution arguments and return them
const std::map<std::string, cinn_pod_value_t>& FinalizeArguments() const;
std::vector<std::unique_ptr<cinn_buffer_t>> HandoverBuffers() {
return std::move(hold_buffers_);
}
private:
// Get CinnTensor with CINN variable name
CinnTensor GetCinnTensor(const std::string& var_name);
......@@ -72,16 +74,15 @@ class CinnLaunchContext {
const LoDTensor& paddle_tensor,
const CinnTensor& cinn_tensor);
// Share the buffer of a Paddle tensor to CINN by delivering memory address
// to a cinn_buffer_t object
std::unique_ptr<cinn_buffer_t> ShareTensorWithCinnBuffer(
const platform::Place& place, bool free_mem_callback, LoDTensor* tensor);
// Set an argument with (cinn name)->(paddle tensor) pair
void SetArgument(const std::string& cinn_name, const platform::Place& place,
bool free_mem_callback, LoDTensor* paddle_tensor);
// Set an argument with (cinn name)->(cinn_buffer_t) pair
void SetArgument(const std::string& cinn_var_name,
std::unique_ptr<cinn_buffer_t>&& buffer);
private:
const framework::Scope* cached_scope_ = nullptr;
const platform::Place* cached_place_ = nullptr;
std::unique_ptr<framework::Scope> cached_temp_scope_ = nullptr;
// a variable name map from paddle to cinn
const std::unordered_map<std::string, std::string>& paddle2cinn_varmap_;
// the variable scope of cinn
......
......@@ -45,81 +45,86 @@ std::unique_ptr<CinnLaunchContext> CreateDefaultLaunchContext() {
return std::make_unique<CinnLaunchContext>(paddle2cinn_varmap, cinn_scope);
}
TEST(CinnLaunchContextTest, TestIsVariableUsed) {
TEST(CinnLaunchContextTest, TestBasic) {
auto launch_context = CreateDefaultLaunchContext();
// test IsVariableUsed
ASSERT_EQ(launch_context->IsVariableUsed("var1"), true);
ASSERT_EQ(launch_context->IsVariableUsed("var4"), false);
}
TEST(CinnLaunchContextTest, TestGetInternalVariableNames) {
auto launch_context = CreateDefaultLaunchContext();
auto internal_variable_names = launch_context->GetInternalVariableNames();
ASSERT_EQ(internal_variable_names.size(), 3);
EXPECT_NE(internal_variable_names.find("cinn_var2"),
internal_variable_names.end());
// test UpdateCapturedEnv
platform::CPUPlace place;
framework::Scope scope;
ASSERT_NO_THROW(launch_context->UpdateCapturedEnv(scope, place));
// test IsArgumentsInitialized
ASSERT_FALSE(launch_context->IsArgumentsInitialized());
}
TEST(CinnLaunchContextTest, TestCheckTensorEquivalent) {
auto launch_context = CreateDefaultLaunchContext();
platform::CPUPlace place;
framework::Scope scope;
auto launch_context = CreateDefaultLaunchContext();
launch_context->UpdateCapturedEnv(scope, place);
auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
// CheckTensorEquivalent: tensor dimension not equivalent
tensor1->mutable_data<float>(framework::make_ddim({3, 5}), place);
ASSERT_THROW(launch_context->AssignExternalVariable("var1", place, tensor1),
ASSERT_THROW(launch_context->AssignExternalVariable("var1"),
paddle::platform::EnforceNotMet);
}
TEST(CinnLaunchContextTest, TestAssignVariablePreCondition) {
auto launch_context = CreateDefaultLaunchContext();
platform::CPUPlace place;
framework::Scope scope;
auto launch_context = CreateDefaultLaunchContext();
launch_context->UpdateCapturedEnv(scope, place);
auto* tensor4 = scope.Var("var4")->GetMutable<LoDTensor>();
// not used
ASSERT_THROW(launch_context->AssignExternalVariable("var4", place, tensor4),
ASSERT_THROW(launch_context->AssignExternalVariable("var4"),
paddle::platform::EnforceNotMet);
// not found
ASSERT_THROW(
launch_context->AssignExternalVariable("cinn_var4", place, tensor4),
paddle::platform::EnforceNotMet);
ASSERT_THROW(launch_context->AssignInternalVariable("cinn_var4"),
paddle::platform::EnforceNotMet);
}
TEST(CinnLaunchContextTest, TestSetArgument) {
platform::CPUPlace cpu_place;
platform::Place place(cpu_place);
framework::Scope scope;
auto launch_context = CreateDefaultLaunchContext();
launch_context->UpdateCapturedEnv(scope, place);
platform::CPUPlace place;
framework::Scope scope;
// assign external variables
auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
float* data1 =
tensor1->mutable_data<float>(framework::make_ddim({3, 4}), place);
data1[0] = 9.99f;
data1[10] = 19.99f;
ASSERT_NO_THROW(launch_context->AssignExternalVariable("var1"));
// assign external variable
ASSERT_NO_THROW(
launch_context->AssignExternalVariable("var1", place, tensor1));
auto* tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
tensor2->mutable_data<float>(framework::make_ddim({6, 7, 8}), place);
ASSERT_NO_THROW(
launch_context->AssignInternalVariable("cinn_var2", place, tensor2));
// FinalizeArguments not missed check
ASSERT_THROW(launch_context->FinalizeArguments(),
paddle::platform::EnforceNotMet);
auto* tensor3 = scope.Var("var3")->GetMutable<LoDTensor>();
tensor3->mutable_data<float>(framework::make_ddim({10, 16}), place);
ASSERT_NO_THROW(
launch_context->AssignExternalVariable("var3", place, tensor3));
ASSERT_NO_THROW(launch_context->AssignExternalVariable("var3"));
// FinalizeArguments missed check
ASSERT_THROW(launch_context->FinalizeArguments(),
paddle::platform::EnforceNotMet);
// test get internal variables
auto internal_variable_names = launch_context->GetInternalVariableNames();
ASSERT_EQ(internal_variable_names.size(), 1);
EXPECT_EQ(*internal_variable_names.begin(), "cinn_var2");
auto* tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
tensor2->mutable_data<float>(framework::make_ddim({6, 7, 8}), place);
ASSERT_NO_THROW(launch_context->AssignInternalVariable("cinn_var2"));
// check argument is set correctly and alloc/free callbacks work well
auto name2argument = launch_context->FinalizeArguments();
ASSERT_EQ(name2argument.size(), 3);
ASSERT_EQ(name2argument.count("cinn_var1"), 1);
// check ShareTensorWithCinnBuffer
ASSERT_TRUE(launch_context->IsArgumentsInitialized());
auto* cinn_buffer =
static_cast<cinn_buffer_t*>(name2argument.at("cinn_var1"));
ASSERT_EQ(cinn_buffer->memory, nullptr);
cinn_buffer->external_malloc->operator()(nullptr, cinn_buffer);
ASSERT_NE(cinn_buffer->memory, nullptr);
......
......@@ -31,26 +31,6 @@ namespace operators {
namespace details {
#ifdef PADDLE_WITH_CUDA
void CUDART_CB ReleaseScope(void* data) {
auto* temp_scope = static_cast<framework::Scope*>(data);
delete temp_scope;
}
void CUDART_CB ReleaseBuffers(void* data) {
auto* buffers =
static_cast<std::vector<std::unique_ptr<cinn_buffer_t>>*>(data);
delete buffers;
}
template <>
void ReleaseResource<platform::CUDADeviceContext>(
const std::vector<void*>& resources, void* stream) {
PADDLE_ENFORCE_GPU_SUCCESS(cudaLaunchHostFunc(
static_cast<gpuStream_t>(stream), ReleaseScope, resources[0]));
PADDLE_ENFORCE_GPU_SUCCESS(cudaLaunchHostFunc(
static_cast<gpuStream_t>(stream), ReleaseBuffers, resources[1]));
}
template <>
void* GetStream<platform::CUDADeviceContext>(
const framework::ExecutionContext& ctx) {
......
......@@ -56,25 +56,12 @@ void LaunchCinnExecution(const CinnCompiledObject& compiled_obj,
// Set cinn FLAGS (such as FLAGS_cinn_cudnn_deterministic) with paddle's FLAGS.
void SetCinnRuntimeFlags();
template <typename DeviceContext>
void ReleaseResource(const std::vector<void*>& resources, void* stream) {
auto* temp_scope = static_cast<framework::Scope*>(resources[0]);
auto* buffers =
static_cast<std::vector<std::unique_ptr<cinn_buffer_t>>*>(resources[1]);
delete temp_scope;
delete buffers;
}
template <typename DeviceContext>
void* GetStream(const framework::ExecutionContext& ctx) {
return nullptr;
}
#ifdef PADDLE_WITH_CUDA
template <>
void ReleaseResource<platform::CUDADeviceContext>(
const std::vector<void*>& resources, void* stream);
template <>
void* GetStream<platform::CUDADeviceContext>(
const framework::ExecutionContext& ctx);
......@@ -116,56 +103,54 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
compilation_key, inputs_name2tensor, target, stream);
details::DebugCinnCompiledResult(cinn_compiled_object);
auto launch_context = std::make_unique<details::CinnLaunchContext>(
cinn_compiled_object.paddle2cinn_varmap, cinn_compiled_object.scope);
auto* launch_context = cinn_compiled_object.launch_context.get();
// Step 3. Prepare arguments needed for the compiled executable program.
VLOG(4) << "CinnLaunchOp prepare arguments";
// 3.1 Prepare input variables: tensors of input variables have
// been initialized before graph compiled, just check the
// equiality between tensors of paddle and cinn.
for (const auto& var_name : input_variable_names) {
if (!launch_context->IsVariableUsed(var_name)) {
// some input variables don't need for cinn because they are
// eliminated by optimized passes or some cinn operators use
// less variables
VLOG(4) << "Input variable(" << var_name << ") not used by cinn";
continue;
launch_context->UpdateCapturedEnv(scope, place);
if (!launch_context->IsArgumentsInitialized()) {
VLOG(4) << "CinnLaunchOp prepare arguments";
// 3.1 Prepare input variables: tensors of input variables have
// been initialized before graph compiled, just check the
// equiality between tensors of paddle and cinn.
for (const auto& var_name : input_variable_names) {
if (!launch_context->IsVariableUsed(var_name)) {
// some input variables don't need for cinn because they are
// eliminated by optimized passes or some cinn operators use
// less variables
VLOG(4) << "Input variable(" << var_name << ") not used by cinn";
continue;
}
launch_context->AssignExternalVariable(var_name);
}
launch_context->AssignExternalVariable(
var_name, place, scope.GetVar(var_name)->GetMutable<LoDTensor>());
}
// 3.2 Prepare output variables: all output variables should
// be initialized and allocated buffer before
// the runtime program start execution, the compilation result
// includes details of their buffer assginment and we use that to
// allocate space in Paddle. For those variables allocated yet,
// like persistable parameters, just check the equiality between
// Paddle allocation and CINN buffer assginment.
auto output_variable_names = ctx.OutputNames(kOutputs);
for (const auto var_name : output_variable_names) {
PADDLE_ENFORCE_EQ(launch_context->IsVariableUsed(var_name), true,
platform::errors::InvalidArgument(
"Output variable(%s) not used by cinn", var_name));
auto* tensor = scope.GetVar(var_name)->GetMutable<LoDTensor>();
launch_context->AssignExternalVariable(var_name, place, tensor);
}
// 3.2 Prepare output variables: all output variables should
// be initialized and allocated buffer before
// the runtime program start execution, the compilation result
// includes details of their buffer assginment and we use that to
// allocate space in Paddle. For those variables allocated yet,
// like persistable parameters, just check the equiality between
// Paddle allocation and CINN buffer assginment.
auto output_variable_names = ctx.OutputNames(kOutputs);
for (const auto var_name : output_variable_names) {
PADDLE_ENFORCE_EQ(
launch_context->IsVariableUsed(var_name), true,
platform::errors::InvalidArgument(
"Output variable(%s) not used by cinn", var_name));
launch_context->AssignExternalVariable(var_name);
}
// 3.3 Prepare internal or temporary variables: Create a temporary
// scope to keep internal variables within graph or temporary
// variables needed by the compiled runtime program in addition.
// Here we directly use the names from CinnScope as Paddle variable
// names, because they will not be used outside the graph
// and should be destructed after computation finished.
auto internal_variable_names = launch_context->GetInternalVariableNames();
framework::Scope* temp_scope = scope.NewTmpScope().release();
for (const auto& var_name : internal_variable_names) {
auto* tensor = temp_scope->Var(var_name)->GetMutable<LoDTensor>();
launch_context->AssignInternalVariable(var_name, place, tensor);
// 3.3 Prepare internal or temporary variables: Create a temporary
// scope to keep internal variables within graph or temporary
// variables needed by the compiled runtime program in addition.
// Here we directly use the names from CinnScope as Paddle variable
// names, because they will not be used outside the graph
// and should be destructed after computation finished.
auto internal_variable_names = launch_context->GetInternalVariableNames();
for (const auto& var_name : internal_variable_names) {
launch_context->AssignInternalVariable(var_name);
}
}
// Step 4. Set CINN runtime FLAGS, such as FLAGS_cinn_cudnn_deterministic.
......@@ -175,12 +160,6 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
VLOG(4) << "Run Cinn compiled executable program with stream: " << stream;
details::LaunchCinnExecution(cinn_compiled_object, *launch_context, stream);
VLOG(4) << "CinnLaunchOp launch execution done.";
// Step 6. Release some resources, such as `temp_scope` and cinn_buffers.
auto* buffers_holder = new std::vector<std::unique_ptr<cinn_buffer_t>>{
launch_context->HandoverBuffers()};
details::ReleaseResource<DeviceContext>({temp_scope, buffers_holder},
stream);
}
};
......
......@@ -130,8 +130,9 @@ TEST(CinnLaunchOpTest, TestElementwiseAddPass) {
scope.Var(test_out_name)->GetMutable<LoDTensor>();
scope.Var(expected_out_name)->GetMutable<LoDTensor>();
cinn_launch_op->Run(scope, place);
elementwise_add_op->Run(scope, place);
platform::Place run_place(place);
cinn_launch_op->Run(scope, run_place);
elementwise_add_op->Run(scope, run_place);
LoDTensor test_out, expected_out;
TensorCopySync(scope.Var(test_out_name)->Get<LoDTensor>(),
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/complex_view_op.h"
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
class AsComplexOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "as_complex");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "as_complex");
auto in_dims = ctx->GetInputDim("X");
const int input_rank = in_dims.size();
PADDLE_ENFORCE_GE(
input_rank, 1,
platform::errors::InvalidArgument(
"The rank of input(X) is less than 1. "
"Expected the rank of input(X) to be equal to or greater than 1."
"But received rank of input(X) = %d",
input_rank));
const int last_dim_size = in_dims[input_rank - 1];
PADDLE_ENFORCE_EQ(
last_dim_size, 2,
platform::errors::InvalidArgument(
"The size of the last dimension of input(X)"
"does not equals 2."
"Expected the size of last dimension of input(X) to be 2."
"But received %d",
last_dim_size));
const framework::DDim out_dims(in_dims.Get(), input_rank - 1);
ctx->SetOutputDim("Out", out_dims);
ctx->ShareLoD("X", /*->*/ "Out");
}
};
class AsComplexOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor), The input tensor of view_as_complex op.");
AddOutput("Out", "(Tensor), The output tensor of view_as_complex op.");
AddComment(R"DOC(
As_complex Operator.
This operator is used to return a complex tensor represented
by an old-fashioned real tensor. The size of the last dimension of
the input tensor should be 2, which corresponds to 'real' and
'complex', respectively.
)DOC");
}
};
template <typename T>
class AsComplexGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> retv) const override {
retv->SetType("as_real");
retv->SetInput("X", this->OutputGrad("Out"));
retv->SetAttrMap(this->Attrs());
retv->SetOutput("Out", this->InputGrad("X"));
}
};
class AsRealOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "as_real");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "as_real");
auto out_dims_v = framework::vectorize(ctx->GetInputDim("X"));
out_dims_v.push_back(2);
const framework::DDim out_dims = framework::make_ddim(out_dims_v);
ctx->SetOutputDim("Out", out_dims);
ctx->ShareLoD("X", /*->*/ "Out");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
auto input_data_type =
framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(framework::ToRealType(input_data_type),
ctx.GetPlace());
}
};
class AsRealOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor), The input tensor of as_real op.");
AddOutput("Out", "(Tensor), The output tensor of as_real op.");
AddComment(R"DOC(
AsReal Operator.
This operator is used to return an old-fashioned real tensor from a
complex tensor. The size of the last dimension of the output tensor is 2,
which corresponds to 'real' and 'complex', respectively.
)DOC");
}
};
template <typename T>
class AsRealGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> retv) const override {
retv->SetType("as_complex");
retv->SetInput("X", this->OutputGrad("Out"));
retv->SetAttrMap(this->Attrs());
retv->SetOutput("Out", this->InputGrad("X"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(as_complex, ops::AsComplexOp, ops::AsComplexOpMaker,
ops::AsComplexGradMaker<paddle::framework::OpDesc>,
ops::AsComplexGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(as_real, ops::AsRealOp, ops::AsRealOpMaker,
ops::AsRealGradMaker<paddle::framework::OpDesc>,
ops::AsRealGradMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
as_complex, ops::AsComplexKernel<paddle::platform::CPUDeviceContext, float>,
ops::AsComplexKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
as_real, ops::AsRealKernel<paddle::platform::CPUDeviceContext, float>,
ops::AsRealKernel<paddle::platform::CPUDeviceContext, double>);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/complex_view_op.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/enforce.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
as_complex,
ops::AsComplexKernel<paddle::platform::CUDADeviceContext, float>,
ops::AsComplexKernel<paddle::platform::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
as_real, ops::AsRealKernel<paddle::platform::CUDADeviceContext, float>,
ops::AsRealKernel<paddle::platform::CUDADeviceContext, double>);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/for_range.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class AsComplexKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const auto* x = context.Input<framework::LoDTensor>("X");
auto* out = context.Output<framework::LoDTensor>("Out");
out->mutable_data<platform::complex<T>>(context.GetPlace());
// TensorCopy also changes output's shape & dtype
const framework::DDim out_dims_original = out->dims();
framework::TensorCopy(*x, context.GetPlace(), out);
out->Resize(out_dims_original); // restored the shape
out->mutable_data<platform::complex<T>>(
context.GetPlace()); // restore the dtype
}
};
template <typename DeviceContext, typename T>
class AsRealKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const auto* x = context.Input<framework::LoDTensor>("X");
auto* out = context.Output<framework::LoDTensor>("Out");
out->mutable_data<T>(context.GetPlace());
const framework::DDim out_dims_original = out->dims();
framework::TensorCopy(*x, context.GetPlace(), out);
out->Resize(out_dims_original); // restored the shape
out->mutable_data<T>(context.GetPlace()); // restore the dtype
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/ipu_runtime_op.h"
namespace paddle {
namespace operators {
class IpuRuntimeOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::proto::VarType::Type(ctx.Attr<int>("dtype")),
ctx.device_context());
}
};
class IpuRuntimeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("FeedList", "FeedList of Graph").AsDuplicable();
AddOutput("FetchList", "FetchList of Graph").AsDuplicable();
AddAttr<int>("dtype",
"(int, default 5 (FP32)) "
"Output data type")
.SetDefault(framework::proto::VarType::FP32);
AddComment(R"DOC(
Run graph by PopART runtime.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(ipu_runtime, ops::IpuRuntimeOp, ops::IpuRuntimeOpMaker);
REGISTER_OP_IPU_KERNEL(ipu_runtime, ops::IpuRuntimeKernel<float>,
ops::IpuRuntimeKernel<double>,
ops::IpuRuntimeKernel<int>,
ops::IpuRuntimeKernel<int64_t>,
ops::IpuRuntimeKernel<bool>,
ops::IpuRuntimeKernel<int8_t>,
ops::IpuRuntimeKernel<paddle::platform::float16>);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/framework/ipu/ipu_backend.h"
#include "paddle/fluid/framework/tensor.h"
#endif
namespace paddle {
namespace operators {
template <typename T>
class IpuRuntimeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
#ifdef PADDLE_WITH_IPU
auto ipu_backend = framework::ipu::IpuBackend::GetInstance();
if (!ipu_backend->DeviceIsAttached()) {
const platform::IPUDeviceContext& ipu_ctx =
reinterpret_cast<const platform::IPUDeviceContext&>(
ctx.device_context());
ipu_backend->AttachDevice(ipu_ctx.DeviceId());
}
auto inputs = ctx.MultiInput<framework::Tensor>("FeedList");
auto outputs = ctx.MultiOutput<framework::Tensor>("FetchList");
auto output_names = ctx.OutputNames("FetchList");
VLOG(4) << "IpuRuntime Kernel, begin to run graph";
ipu_backend->Run(inputs, outputs, ctx);
// post-run
// resize tensor when tensor.dims() is empty
for (size_t i = 0; i < outputs.size(); ++i) {
auto* out = outputs[i];
if (out->dims().size() == 0) {
auto tensor_dtype = out->type();
auto sizeof_dtype = framework::SizeOfType(tensor_dtype);
int64_t dim = out->memory_size() / sizeof_dtype;
out->Resize({dim});
VLOG(10) << "set ipu_runtime_op output: " << output_names[i]
<< " dims from () to: "
<< "(" << dim << ")";
}
}
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
"Please compile WITH_IPU option to enable ipu_runtime op"));
#endif
}
};
} // namespace operators
} // namespace paddle
......@@ -173,6 +173,13 @@ void set_constant_with_place<platform::NPUPinnedPlace>(
platform::errors::Unimplemented("NPUPinnedPlace is not supported"));
}
template <>
void set_constant_with_place<platform::IPUPlace>(
const platform::DeviceContext& context, framework::Tensor* tensor,
float value) {
PADDLE_THROW(platform::errors::Unimplemented("IPUPlace is not supported"));
}
template <>
void set_constant_with_place<platform::CPUPlace>(
const platform::DeviceContext& context, framework::Tensor* tensor,
......
......@@ -54,7 +54,7 @@ class PyLayerOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
auto data_type = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
return framework::OpKernelType(data_type, ctx.device_context());
}
......
......@@ -555,10 +555,10 @@ class Reshape2Op : public ReshapeOp {
const framework::ExecutionContext &ctx) const override {
auto multi_inputs = ctx.MultiInput<framework::Tensor>("ShapeTensor");
if (multi_inputs.size() > 0) {
return framework::KernelSignature("reshape.mulhost", {"X", "ShapeTensor"},
return framework::KernelSignature("reshape_mulhost", {"X", "ShapeTensor"},
{}, {"Out"});
} else if (ctx.HasInput("Shape")) {
return framework::KernelSignature("reshape.host", {"X", "Shape"}, {},
return framework::KernelSignature("reshape_host", {"X", "Shape"}, {},
{"Out"});
} else {
return framework::KernelSignature("reshape", {"X"}, {"shape"}, {"Out"});
......
......@@ -18,6 +18,7 @@ namespace paddle {
namespace platform {
std::unique_ptr<CUDAGraph> CUDAGraph::capturing_graph_{nullptr};
paddle::optional<std::thread::id> CUDAGraph::capturing_thread_id_{paddle::none};
void CUDAGraph::Reset() {
if (is_reset_) return;
......@@ -58,6 +59,13 @@ void CUDAGraph::BeginSegmentCapture() {
IsCapturing(), true,
errors::PermissionDenied("BeginSegmentCapture should be called when CUDA "
"Graph is capturing."));
if (IsThreadLocalCapturing()) {
PADDLE_ENFORCE_EQ(IsThisThreadCapturing(), true,
platform::errors::PermissionDenied(
"When capturing CUDA Graph in the thread local mode, "
"you cannot begin segmented capturing in the thread "
"which is not the one that starts the capturing."));
}
PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamBeginCapture(
capturing_graph_->stream_, capturing_graph_->capture_mode_));
PADDLE_ENFORCE_EQ(IsValidCapturing(), true,
......@@ -82,6 +90,11 @@ void CUDAGraph::BeginCapture(platform::CUDAPlace place, cudaStream_t stream,
capturing_graph_->place_ = place;
capturing_graph_->stream_ = stream;
capturing_graph_->capture_mode_ = mode;
if (mode == cudaStreamCaptureModeThreadLocal) {
capturing_thread_id_ = std::this_thread::get_id();
VLOG(10) << "Capturing CUDA Graph in thread local mode, thread id: "
<< capturing_thread_id_;
}
BeginSegmentCapture();
#endif
}
......@@ -115,6 +128,7 @@ void CUDAGraph::EndSegmentCapture() {
std::unique_ptr<CUDAGraph> CUDAGraph::EndCapture() {
EndSegmentCapture();
capturing_thread_id_ = paddle::none;
return std::move(capturing_graph_);
}
......
......@@ -18,6 +18,7 @@
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
#include "cuda.h" // NOLINT
#include "cuda_runtime.h" // NOLINT
......@@ -26,6 +27,7 @@
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/utils/optional.h"
namespace paddle {
namespace platform {
......@@ -99,6 +101,25 @@ class CUDAGraph {
// supported during capturing CUDA Graph.
static bool IsValidCapturing();
static bool IsThreadLocalCapturing() {
#if CUDA_VERSION >= 10010
return IsCapturing() &&
capturing_graph_->capture_mode_ == cudaStreamCaptureModeThreadLocal;
#else
return false;
#endif
}
static bool IsThisThreadCapturing() {
if (UNLIKELY(IsCapturing())) {
return IsThreadLocalCapturing()
? capturing_thread_id_.get() == std::this_thread::get_id()
: true;
} else {
return false;
}
}
private:
static CUDAGraphID UniqueID() {
static std::atomic<CUDAGraphID> id;
......@@ -118,6 +139,7 @@ class CUDAGraph {
bool is_reset_{false};
std::mutex mtx_;
static paddle::optional<std::thread::id> capturing_thread_id_;
static std::unique_ptr<CUDAGraph> capturing_graph_;
};
......
......@@ -101,6 +101,20 @@ inline static __device__ uint32_t add_to_high_half(uint32_t val, float x) {
return (val & 0xFFFFu) | (static_cast<uint32_t>(high_half.x) << 16);
}
#if CUDA_VERSION >= 10000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
static __device__ __forceinline__ float16 CUDAFP16ToPDFP16(__half x) {
return *reinterpret_cast<float16 *>(&x);
}
static __device__ __forceinline__ __half PDFP16ToCUDAFP16(float16 x) {
return *reinterpret_cast<__half *>(&x);
}
CUDA_ATOMIC_WRAPPER(Add, float16) {
return CUDAFP16ToPDFP16(
atomicAdd(reinterpret_cast<__half *>(address), PDFP16ToCUDAFP16(val)));
}
#else
CUDA_ATOMIC_WRAPPER(Add, float16) {
// concrete packed float16 value may exsits in lower or higher 16bits
// of the 32bits address.
......@@ -133,6 +147,7 @@ CUDA_ATOMIC_WRAPPER(Add, float16) {
}
}
#endif
#endif
CUDA_ATOMIC_WRAPPER(Add, complex<float>) {
float *real = reinterpret_cast<float *>(address);
......
# IPU
IF(WITH_IPU)
FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc)
cc_library(ipu_device SRCS device.cc DEPS enforce popart)
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart)
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce)
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace platform {
namespace ipu {
namespace {
Node *activation_op_handler(Graph *graph, Node *node, const std::string &type) {
auto new_node = CreateBaseOp(graph, node, type, {GetInputVarNode("X", node)},
node->outputs);
return new_node;
}
Node *relu_handler(Graph *graph, Node *node) {
return activation_op_handler(graph, node, "popart_relu");
}
Node *tanh_handler(Graph *graph, Node *node) {
return activation_op_handler(graph, node, "popart_tanh");
}
Node *log_handler(Graph *graph, Node *node) {
return activation_op_handler(graph, node, "popart_log");
}
Node *sigmoid_handler(Graph *graph, Node *node) {
return activation_op_handler(graph, node, "popart_sigmoid");
}
Node *sqrt_handler(Graph *graph, Node *node) {
return activation_op_handler(graph, node, "popart_sqrt");
}
Node *gelu_handler(Graph *graph, Node *node) {
return activation_op_handler(graph, node, "popart_gelu_v2");
}
Node *log_softmax_handler(Graph *graph, Node *node) {
auto axis = BOOST_GET_CONST(int, node->Op()->GetAttr("axis"));
auto new_softmax = CreateSoftmaxOpset11(graph, node, node->inputs, {}, axis);
return CreateBaseOp(graph, node, "popart_log", new_softmax->outputs,
node->outputs);
}
REGISTER_HANDLER(relu, relu_handler);
REGISTER_HANDLER(tanh, tanh_handler);
REGISTER_HANDLER(log, log_handler);
REGISTER_HANDLER(sigmoid, sigmoid_handler);
REGISTER_HANDLER(sqrt, sqrt_handler);
REGISTER_HANDLER(gelu, gelu_handler);
REGISTER_HANDLER(log_softmax, log_softmax_handler);
} // namespace
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
namespace paddle {
namespace platform {
namespace ipu {
// This avoids the static initialisation order fiasco,
std::unordered_map<std::string, SymbolHandler> &SymbolHandlers() {
static std::unordered_map<std::string, SymbolHandler> symbol_handlers;
return symbol_handlers;
}
bool RegisterHandler(const std::string &symbol, const SymbolHandler &handler) {
if (SymbolHandlers().count(symbol) != 0) {
LOG(WARNING) << "Trying to register popart handler twice for operator: "
<< symbol;
return false;
}
bool new_handler = SymbolHandlers().emplace(symbol, handler).second;
return new_handler;
}
// Return a pointer to a handler if one is registered for this kind of node or
// an empty std::function otherwise.
SymbolHandler GetHandler(const std::string &kind) {
auto it = SymbolHandlers().find(kind);
if (it != SymbolHandlers().end()) {
return it->second;
}
return {};
}
void ConnectNodes(Node *first_node, Node *next_node) {
first_node->outputs.push_back(next_node);
next_node->inputs.push_back(first_node);
}
void DisConnectNodes(Node *first_node, Node *next_node) {
auto rm_by_value = [&](std::vector<Node *> &vec, Node *n) {
vec.erase(std::remove(vec.begin(), vec.end(), n), vec.end());
};
rm_by_value(first_node->outputs, next_node);
rm_by_value(next_node->inputs, first_node);
rm_by_value(first_node->inputs, next_node);
rm_by_value(next_node->outputs, first_node);
}
void ClearNode(Node *node) {
auto rm_by_value = [&](std::vector<Node *> &vec, Node *n) {
vec.erase(std::remove(vec.begin(), vec.end(), n), vec.end());
};
for (auto *node_in : node->inputs) {
rm_by_value(node_in->outputs, node);
}
for (auto *node_out : node->outputs) {
rm_by_value(node_out->inputs, node);
}
}
void CopyOpAttr(const std::string &attr_name, OpDesc *op, OpDesc *new_op,
bool override) {
if (new_op->HasAttr(attr_name) && !override) {
return;
}
if (op->HasAttr(attr_name)) {
VLOG(10) << "Copying attr: " << attr_name << " from " << op->Type()
<< " to " << new_op->Type();
new_op->SetAttr(attr_name, op->GetAttr(attr_name));
new_op->Flush();
}
}
const int VarType2OnnxDtype(const int type) {
auto dtype = static_cast<framework::proto::VarType::Type>(type);
switch (dtype) {
case framework::proto::VarType::BOOL:
return static_cast<int>(ONNXDataType::BOOL);
case framework::proto::VarType::INT16:
return static_cast<int>(ONNXDataType::INT16);
case framework::proto::VarType::INT32:
return static_cast<int>(ONNXDataType::INT32);
case framework::proto::VarType::INT64:
return static_cast<int>(ONNXDataType::INT64);
case framework::proto::VarType::FP16:
return static_cast<int>(ONNXDataType::FLOAT16);
case framework::proto::VarType::FP32:
return static_cast<int>(ONNXDataType::FLOAT);
case framework::proto::VarType::FP64:
return static_cast<int>(ONNXDataType::DOUBLE);
case framework::proto::VarType::UINT8:
return static_cast<int>(ONNXDataType::UINT8);
case framework::proto::VarType::INT8:
return static_cast<int>(ONNXDataType::INT8);
case framework::proto::VarType::BF16:
return static_cast<int>(ONNXDataType::BFLOAT16);
case framework::proto::VarType::COMPLEX64:
return static_cast<int>(ONNXDataType::COMPLEX64);
case framework::proto::VarType::COMPLEX128:
return static_cast<int>(ONNXDataType::COMPLEX128);
default:
PADDLE_THROW(
platform::errors::Unimplemented("Unsupported data type: %d.", dtype));
}
}
const std::string VarType2PopStr(const int type) {
auto dtype = static_cast<framework::proto::VarType::Type>(type);
switch (dtype) {
case framework::proto::VarType::UINT8:
return "UINT8";
case framework::proto::VarType::INT8:
return "INT8";
case framework::proto::VarType::INT16:
return "INT16";
case framework::proto::VarType::INT32:
return "INT32";
case framework::proto::VarType::INT64:
return "INT64";
case framework::proto::VarType::BOOL:
return "BOOL";
case framework::proto::VarType::FP64:
return "DOUBLE";
case framework::proto::VarType::FP32:
return "FLOAT";
case framework::proto::VarType::FP16:
return "FLOAT16";
default:
PADDLE_THROW(
paddle::platform::errors::Unavailable("Unsupported data type."));
}
}
Node *GetInputVarNode(const std::string &input_name, const Node *op_node,
const int id) {
auto var_name = op_node->Op()->Input(input_name).at(id);
return GetInputVarNodeByVarName(var_name, op_node);
}
Node *GetOutputVarNode(const std::string &output_name, const Node *op_node,
const int id) {
auto var_name = op_node->Op()->Output(output_name).at(id);
return GetOutputVarNodeByVarName(var_name, op_node);
}
Node *GetInputVarNodeByVarName(const std::string &var_name,
const Node *op_node) {
for (auto *var : op_node->inputs) {
if (var->Name() == var_name) {
return var;
}
}
return nullptr;
}
Node *GetOutputVarNodeByVarName(const std::string &var_name,
const Node *op_node) {
for (auto *var : op_node->outputs) {
if (var->Name() == var_name) {
return var;
}
}
return nullptr;
}
const bool is_float_equal(float a, float b, float eps) {
return std::fabs(a - b) <= eps;
}
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle {
namespace platform {
namespace ipu {
using framework::ir::Graph;
using framework::ir::Node;
using framework::OpDesc;
#define REGISTER_HANDLER(name, func) \
static bool __UNUSED_##name = \
paddle::platform::ipu::RegisterHandler(#name, func)
using SymbolHandler = std::function<Node *(Graph *, Node *)>;
std::unordered_map<std::string, SymbolHandler> &SymbolHandlers();
bool RegisterHandler(const std::string &, const SymbolHandler &);
SymbolHandler GetHandler(const std::string &);
void ConnectNodes(Node *first_node, Node *next_node);
void DisConnectNodes(Node *first_node, Node *next_node);
void ClearNode(Node *node);
void CopyOpAttr(const std::string &attr_name, OpDesc *op, OpDesc *new_op,
bool override = false);
const int VarType2OnnxDtype(const int type);
const std::string VarType2PopStr(const int type);
Node *GetInputVarNode(const std::string &input_name, const Node *op_node,
const int id = 0);
Node *GetOutputVarNode(const std::string &output_name, const Node *op_node,
const int id = 0);
Node *GetInputVarNodeByVarName(const std::string &var_name,
const Node *op_node);
Node *GetOutputVarNodeByVarName(const std::string &var_name,
const Node *op_node);
const bool is_float_equal(float a, float b, float eps = 1e-8);
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace platform {
namespace ipu {
namespace {
Node *elementwise_op_handler(Graph *graph, Node *node,
const std::string &type) {
auto *op = node->Op();
auto x_shape = GetInputVarNode("X", node)->Var()->GetShape();
int64_t x_rank = x_shape.size();
auto y_shape = GetInputVarNode("Y", node)->Var()->GetShape();
int64_t y_rank = y_shape.size();
auto axis = BOOST_GET_CONST(int, op->GetAttr("axis"));
if (axis == -1 || axis == x_rank - 1 || x_rank == y_rank) {
auto new_node =
CreateBaseOp(graph, node, type,
{GetInputVarNode("X", node), GetInputVarNode("Y", node)},
node->outputs);
return new_node;
} else {
auto y_new_shape = std::vector<int64_t>(x_rank, 1);
for (int i = axis; i < axis + y_rank; ++i) {
y_new_shape[i] = y_shape[i - axis];
}
auto attrs = AttributeMap{
{"value", y_new_shape},
{"dims", std::vector<int64_t>{x_rank}},
{"dtype", ONNXDataType::INT64},
};
// constant
auto new_node_const = CreateConst(graph, node, {}, {}, attrs);
// reshape
auto new_node_reshape = CreateBaseOp(
graph, node, "popart_reshape",
{GetInputVarNode("Y", node), new_node_const->outputs[0]}, {});
// elementwise_op
auto new_node =
CreateBaseOp(graph, node, type,
{GetInputVarNode("X", node), new_node_reshape->outputs[0]},
node->outputs);
return new_node;
}
}
Node *elementwise_add_handler(Graph *graph, Node *node) {
return elementwise_op_handler(graph, node, "popart_add");
}
Node *elementwise_sub_handler(Graph *graph, Node *node) {
return elementwise_op_handler(graph, node, "popart_sub");
}
Node *elementwise_div_handler(Graph *graph, Node *node) {
return elementwise_op_handler(graph, node, "popart_div");
}
Node *elementwise_mul_handler(Graph *graph, Node *node) {
return elementwise_op_handler(graph, node, "popart_mul");
}
Node *elementwise_min_handler(Graph *graph, Node *node) {
return elementwise_op_handler(graph, node, "popart_min");
}
Node *elementwise_max_handler(Graph *graph, Node *node) {
return elementwise_op_handler(graph, node, "popart_max");
}
Node *elementwise_pow_handler(Graph *graph, Node *node) {
return elementwise_op_handler(graph, node, "popart_pow");
}
Node *elementwise_mod_handler(Graph *graph, Node *node) {
return elementwise_op_handler(graph, node, "popart_mod");
}
REGISTER_HANDLER(elementwise_add, elementwise_add_handler);
REGISTER_HANDLER(elementwise_sub, elementwise_sub_handler);
REGISTER_HANDLER(elementwise_div, elementwise_div_handler);
REGISTER_HANDLER(elementwise_mul, elementwise_mul_handler);
REGISTER_HANDLER(elementwise_min, elementwise_min_handler);
REGISTER_HANDLER(elementwise_max, elementwise_max_handler);
REGISTER_HANDLER(elementwise_pow, elementwise_pow_handler);
REGISTER_HANDLER(elementwise_mod, elementwise_mod_handler);
} // namespace
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace platform {
namespace ipu {
namespace {
Node *equal_handler(Graph *graph, Node *node) {
auto new_node = CreateBaseOp(
graph, node, "popart_equal",
{GetInputVarNode("X", node), GetInputVarNode("Y", node)}, node->outputs);
return new_node;
}
REGISTER_HANDLER(equal, equal_handler);
} // namespace
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace platform {
namespace ipu {
namespace {
Node *mean_handler(Graph *graph, Node *node) {
return CreateBaseOp(graph, node, "popart_reducemean",
{GetInputVarNode("X", node)},
{GetOutputVarNode("Out", node)},
{
{"keepdims", int64_t{0}},
});
}
Node *pow_handler(Graph *graph, Node *node) {
auto *op = node->Op();
if (op->HasInput("FactorTensor") && !op->Input("FactorTensor").empty()) {
return CreateBaseOp(
graph, node, "popart_pow",
{GetInputVarNode("X", node), GetInputVarNode("FactorTensor", node)},
node->outputs);
} else {
// Op(pow) -> Op(Constant)->Var(const_out)->Op(Pow)
auto value_ = BOOST_GET_CONST(float, op->GetAttr("factor"));
auto attrs =
MakeConstAttrMapFromValue<float>(value_, {1}, ONNXDataType::FLOAT);
auto new_node_const = CreateConst(graph, node, {}, {}, attrs);
return CreateBaseOp(graph, node, "popart_pow", {GetInputVarNode("X", node),
new_node_const->outputs[0]},
node->outputs);
}
}
Node *mul_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto x_num_col_dims = BOOST_GET_CONST(int, op->GetAttr("x_num_col_dims"));
auto y_num_col_dims = BOOST_GET_CONST(int, op->GetAttr("y_num_col_dims"));
auto x_shape_ = GetInputVarNode("X", node)->Var()->GetShape();
auto y_shape_ = GetInputVarNode("Y", node)->Var()->GetShape();
// build the shape for reshape
std::vector<int64_t> reshape_shape_{};
for (int left = 0; left < x_num_col_dims; left++) {
reshape_shape_.push_back(int64_t(x_shape_[left]));
}
for (int right = y_num_col_dims; right < y_shape_.size(); right++) {
reshape_shape_.push_back(int64_t(y_shape_[right]));
}
auto x_flatten =
CreateBaseOp(graph, node, "popart_flatten", {GetInputVarNode("X", node)},
{}, {{"axis", int64_t(x_num_col_dims)}});
auto y_flatten =
CreateBaseOp(graph, node, "popart_flatten", {GetInputVarNode("Y", node)},
{}, {{"axis", int64_t(y_num_col_dims)}});
auto matmul =
CreateBaseOp(graph, node, "popart_matmul",
{x_flatten->outputs[0], y_flatten->outputs[0]}, {}, {});
auto reshape_const = CreateConst(
graph, node, {}, {},
{{"value", reshape_shape_},
{"dims", std::vector<int64_t>{int64_t(reshape_shape_.size())}},
{"dtype", ONNXDataType::INT64}});
return CreateBaseOp(graph, node, "popart_reshape",
{matmul->outputs[0], reshape_const->outputs[0]},
node->outputs, {});
}
Node *matmul_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto transpose_x = BOOST_GET_CONST(bool, op->GetAttr("transpose_X"));
auto transpose_y = BOOST_GET_CONST(bool, op->GetAttr("transpose_Y"));
auto alpha = BOOST_GET_CONST(float, op->GetAttr("alpha"));
auto x_shape = GetInputVarNode("X", node)->Var()->GetShape();
auto y_shape = GetInputVarNode("Y", node)->Var()->GetShape();
int x_rank = x_shape.size();
std::vector<int64_t> perm;
if (x_rank == 1) {
perm = std::vector<int64_t>{0};
} else if (x_rank == 2) {
return CreateGemm(graph, node,
{GetInputVarNode("X", node), GetInputVarNode("Y", node)},
node->outputs, transpose_x, transpose_y, alpha);
} else if (x_rank == 3) {
perm = std::vector<int64_t>{0, 2, 1};
} else if (x_rank == 4) {
perm = std::vector<int64_t>{0, 1, 3, 2};
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"op matmul with input rank == %d", x_rank));
}
Node *x_node = GetInputVarNode("X", node);
Node *y_node = GetInputVarNode("Y", node);
if (transpose_x) {
x_node = CreateBaseOp(graph, node, "popart_transpose",
{GetInputVarNode("X", node)}, {}, {{"perm", perm}});
x_node = x_node->outputs[0];
}
if (transpose_y) {
y_node = CreateBaseOp(graph, node, "popart_transpose",
{GetInputVarNode("Y", node)}, {}, {{"perm", perm}});
y_node = y_node->outputs[0];
}
if (is_float_equal(alpha, 1.0)) {
auto o_node =
CreateBaseOp(graph, node, "popart_matmul", {x_node, y_node}, {});
auto attr = MakeConstAttrMapFromValue(alpha, {1}, ONNXDataType::FLOAT);
auto const_node = CreateConst(graph, node, {}, {}, attr);
return CreateBaseOp(graph, node, "popart_mul",
{o_node->outputs[0], const_node->outputs[0]},
node->outputs);
} else {
return CreateBaseOp(graph, node, "popart_matmul", {x_node, y_node},
node->outputs);
}
}
Node *sum_handler(Graph *graph, Node *node) {
return CreateBaseOp(graph, node, "popart_sum", node->inputs, node->outputs);
}
Node *softmax_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto axis = BOOST_GET_CONST(int, op->GetAttr("axis"));
return CreateSoftmaxOpset11(graph, node, node->inputs, node->outputs, axis);
}
Node *scale_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto scale_ = BOOST_GET_CONST(float, op->GetAttr("scale"));
auto bias_ = BOOST_GET_CONST(float, op->GetAttr("bias"));
auto bias_after_scale_ =
BOOST_GET_CONST(bool, op->GetAttr("bias_after_scale"));
auto data_type_ = GetInputVarNode("X", node)->Var()->GetDataType();
auto new_node_bias_var =
CreateConst(graph, node, {}, {}, {{"value", std::vector<float>{bias_}},
{"dims", std::vector<int64_t>{1}},
{"dtype", ONNXDataType::FLOAT}});
new_node_bias_var = new_node_bias_var->outputs[0];
Node *new_node_scale_var = nullptr;
if (op->HasInput("ScaleTensor") && !op->Input("ScaleTensor").empty()) {
new_node_scale_var = GetInputVarNode("ScaleTensor", node);
} else {
new_node_scale_var =
CreateConst(graph, node, {}, {}, {{"value", std::vector<float>{scale_}},
{"dims", std::vector<int64_t>{1}},
{"dtype", ONNXDataType::FLOAT}});
new_node_scale_var = new_node_scale_var->outputs[0];
}
// convert to float32
auto new_node_cast =
CreateCast(graph, node, {GetInputVarNode("X", node)}, {},
static_cast<int>(framework::proto::VarType::FP32));
Node *result = nullptr;
if (bias_after_scale_) {
auto new_node_mul =
CreateBaseOp(graph, node, "popart_mul",
{new_node_cast->outputs[0], new_node_scale_var}, {}, {});
result =
CreateBaseOp(graph, node, "popart_add",
{new_node_mul->outputs[0], new_node_bias_var}, {}, {});
} else {
auto new_node_add =
CreateBaseOp(graph, node, "popart_add",
{new_node_cast->outputs[0], new_node_bias_var}, {}, {});
result =
CreateBaseOp(graph, node, "popart_mul",
{new_node_add->outputs[0], new_node_scale_var}, {}, {});
}
auto result_after_cast =
CreateCast(graph, node, result->outputs, node->outputs,
static_cast<int>(data_type_));
return result_after_cast;
}
Node *cross_entropy2_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto ignoreIndex = BOOST_GET_CONST(int, op->GetAttr("ignore_index"));
auto new_cast = CreateCast(graph, node, {GetInputVarNode("Label", node)}, {},
framework::proto::VarType::INT32);
auto label_shape_ = GetInputVarNode("Label", node)->Var()->GetShape();
if (label_shape_.size() == 1) {
return CreateBaseOp(graph, node, "popart_nllloss",
{GetInputVarNode("X", node), new_cast->outputs[0]},
{GetOutputVarNode("Y", node)},
{
{"ignoreIndex", ignoreIndex},
});
} else {
std::vector<int64_t> new_shape_{label_shape_[0]};
auto const_before_loss = CreateBaseOp(
graph, node, "popart_constant", {}, {},
{{"value", new_shape_},
{"dims",
std::vector<int64_t>{static_cast<int64_t>(new_shape_.size())}},
{"dtype", ONNXDataType::INT64}});
auto reshape_before_loss = CreateBaseOp(
graph, node, "popart_reshape",
{new_cast->outputs[0], const_before_loss->outputs[0]}, {}, {});
auto nllloss = CreateBaseOp(
graph, node, "popart_nllloss",
{GetInputVarNode("X", node), reshape_before_loss->outputs[0]}, {},
{
{"ignoreIndex", ignoreIndex},
});
auto const_after_loss = CreateBaseOp(
graph, node, "popart_constant", {}, {},
{{"value", label_shape_},
{"dims",
std::vector<int64_t>{static_cast<int64_t>(label_shape_.size())}},
{"dtype", ONNXDataType::INT64}});
auto reshape_after_loss =
CreateBaseOp(graph, node, "popart_reshape",
{nllloss->outputs[0], const_after_loss->outputs[0]},
{GetOutputVarNode("Y", node)}, {});
return reshape_after_loss;
}
}
REGISTER_HANDLER(mean, mean_handler);
REGISTER_HANDLER(pow, pow_handler);
REGISTER_HANDLER(mul, mul_handler);
REGISTER_HANDLER(matmul, matmul_handler);
REGISTER_HANDLER(sum, sum_handler);
REGISTER_HANDLER(softmax, softmax_handler);
REGISTER_HANDLER(scale, scale_handler);
REGISTER_HANDLER(cross_entropy2, cross_entropy2_handler);
} // namespace
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace platform {
namespace ipu {
namespace {
Node *conv2d_handler(Graph *graph, Node *node) {
OpDesc *op = node->Op();
auto dilations_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("dilations"));
auto dilations = std::vector<int64_t>{dilations_.begin(), dilations_.end()};
auto group_ = BOOST_GET_CONST(int, op->GetAttr("groups"));
auto pads_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("paddings"));
if (pads_.size() == 2) {
pads_.push_back(pads_[0]);
pads_.push_back(pads_[1]);
}
auto pads = std::vector<int64_t>{pads_.begin(), pads_.end()};
auto stride_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("strides"));
auto stride = std::vector<int64_t>{stride_.begin(), stride_.end()};
if (op->HasInput("Bias") && !op->Input("Bias").empty()) {
return CreateConv(
graph, node,
{
GetInputVarNode("Input", node), GetInputVarNode("Filter", node),
GetInputVarNode("Bias", node),
},
node->outputs, dilations, group_, {}, pads, stride);
} else {
return CreateConv(
graph, node,
{
GetInputVarNode("Input", node), GetInputVarNode("Filter", node),
},
node->outputs, dilations, group_, {}, pads, stride);
}
}
Node *batch_norm_handler(Graph *graph, Node *node) {
auto *op = node->Op();
std::vector<Node *> inputs;
inputs.push_back(GetInputVarNode("X", node));
inputs.push_back(GetInputVarNode("Scale", node));
inputs.push_back(GetInputVarNode("Bias", node));
inputs.push_back(GetInputVarNode("Mean", node));
inputs.push_back(GetInputVarNode("Variance", node));
int64_t num_outputs = 1;
std::vector<Node *> outputs;
auto is_test_type = op->GetAttrType("is_test");
bool is_test;
if (is_test_type == 0) {
// int
is_test = BOOST_GET_CONST(int, op->GetAttr("is_test"));
} else {
// bool
is_test = BOOST_GET_CONST(bool, op->GetAttr("is_test"));
}
outputs.push_back(GetOutputVarNode("Y", node));
if (!is_test) {
outputs.push_back(GetOutputVarNode("MeanOut", node));
outputs.push_back(GetOutputVarNode("VarianceOut", node));
outputs.push_back(GetOutputVarNode("SavedMean", node));
outputs.push_back(GetOutputVarNode("SavedVariance", node));
num_outputs = 5;
}
// outputs.push_back(GetOutputVarNode("ReserveSpace", node));
auto momentum = BOOST_GET_CONST(float, op->GetAttr("momentum"));
auto epsilon = BOOST_GET_CONST(float, op->GetAttr("epsilon"));
// data_layout
return CreateBaseOp(graph, node, "popart_batchnormalization", inputs, outputs,
{
{"momentum", momentum},
{"epsilon", epsilon},
{"num_outputs", num_outputs},
});
}
Node *pool2d_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto pooling_type = BOOST_GET_CONST(std::string, op->GetAttr("pooling_type"));
auto global_pooling = BOOST_GET_CONST(bool, op->GetAttr("global_pooling"));
if (global_pooling) {
if (pooling_type == "max") {
return CreateBaseOp(graph, node, "popart_globalmaxpool", node->inputs,
node->outputs);
} else if (pooling_type == "avg") {
return CreateBaseOp(graph, node, "popart_globalaveragepool", node->inputs,
node->outputs);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"op pool2d with unkonwn pooling_type: %s", pooling_type));
}
}
if (op->HasAttr("padding_algorithm")) {
auto padding_algorithm =
BOOST_GET_CONST(std::string, op->GetAttr("padding_algorithm"));
if (padding_algorithm != "EXPLICIT") {
PADDLE_THROW(platform::errors::InvalidArgument(
"op pool2d with unkonwn padding_algorithm: %s", padding_algorithm));
}
}
auto ksize = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ksize"));
auto kernel_shape = std::vector<int64_t>{ksize.begin(), ksize.end()};
auto ceil_mode_ = BOOST_GET_CONST(bool, op->GetAttr("ceil_mode"));
auto ceil_mode = int64_t(ceil_mode_ ? 1 : 0);
auto paddings = BOOST_GET_CONST(std::vector<int>, op->GetAttr("paddings"));
auto pads = std::vector<int64_t>{paddings.begin(), paddings.end()};
if (pads.size() == 2) {
pads.push_back(paddings[0]);
pads.push_back(paddings[1]);
}
auto strides_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("strides"));
auto strides = std::vector<int64_t>{strides_.begin(), strides_.end()};
if (pooling_type == "max") {
int64_t num_outputs = 1;
auto dilations = std::vector<int64_t>{};
int64_t storage_order = 0;
return CreateBaseOp(graph, node, "popart_maxpool", node->inputs,
node->outputs, {
{"num_outputs", num_outputs},
{"kernel_shape", kernel_shape},
{"ceil_mode", ceil_mode},
{"dilations", dilations},
{"pads", pads},
{"storage_order", storage_order},
{"strides", strides},
});
} else if (pooling_type == "avg") {
int64_t count_include_pad = 0;
return CreateBaseOp(graph, node, "popart_averagepool", node->inputs,
node->outputs,
{
{"kernel_shape", kernel_shape},
{"ceil_mode", ceil_mode},
{"count_include_pad", count_include_pad},
{"pads", pads},
{"strides", strides},
});
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"op pool2d with unkonwn pooling_type: %s", pooling_type));
}
}
Node *group_norm_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto epsilon_ = BOOST_GET_CONST(float, op->GetAttr("epsilon"));
auto groups_ = BOOST_GET_CONST(int, op->GetAttr("groups"));
auto groups = int64_t{groups_};
auto attrs_ = AttributeMap{{"epsilon", epsilon_}, {"num_groups", groups}};
std::vector<Node *> inputs_ = {GetInputVarNode("X", node),
GetInputVarNode("Scale", node),
GetInputVarNode("Bias", node)};
std::vector<Node *> outputs_ = {GetOutputVarNode("Y", node),
GetOutputVarNode("Mean", node),
GetOutputVarNode("Variance", node)};
return CreateBaseOp(graph, node, "popart_groupnormalization_v2", inputs_,
outputs_, attrs_);
}
Node *instance_norm_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto epsilon_ = BOOST_GET_CONST(float, op->GetAttr("epsilon"));
auto attrs_ = AttributeMap{{"epsilon", epsilon_}};
std::vector<Node *> inputs_ = {GetInputVarNode("X", node),
GetInputVarNode("Scale", node),
GetInputVarNode("Bias", node)};
std::vector<Node *> outputs_ = {GetOutputVarNode("Y", node)};
return CreateBaseOp(graph, node, "popart_instancenormalization", inputs_,
outputs_, attrs_);
}
Node *layer_norm_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto begin_norm_axis_ = BOOST_GET_CONST(int, op->GetAttr("begin_norm_axis"));
auto input_shape_ = GetInputVarNode("X", node)->Var()->GetShape();
std::vector<int64_t> norm_shape_{1, 1};
for (int i = 0; i < input_shape_.size(); i++) {
if (i < begin_norm_axis_) {
norm_shape_[0] *= input_shape_[i];
} else {
norm_shape_[1] *= input_shape_[i];
}
}
auto attrs1 = AttributeMap{
{"value", norm_shape_},
{"dims", std::vector<int64_t>{static_cast<int64_t>(norm_shape_.size())}},
{"dtype", ONNXDataType::INT64}};
auto reshape1_const =
CreateBaseOp(graph, node, "popart_constant", {}, {}, attrs1);
auto new_node_reshape1 = CreateBaseOp(
graph, node, "popart_reshape",
{GetInputVarNode("X", node), reshape1_const->outputs[0]}, {}, {});
auto epsilon_ = BOOST_GET_CONST(float, op->GetAttr("epsilon"));
int64_t groups_ = 1;
auto groupnorm_attrs_ =
AttributeMap{{"epsilon", epsilon_}, {"num_groups", groups_}};
auto out_Y_ = MakeVarNode(graph, node);
CreateBaseOp(graph, node, "popart_groupnormalization_v2",
{new_node_reshape1->outputs[0], GetInputVarNode("Scale", node),
GetInputVarNode("Bias", node)},
{out_Y_, GetOutputVarNode("Mean", node),
GetOutputVarNode("Variance", node)},
groupnorm_attrs_);
auto attrs2 = AttributeMap{
{"value", input_shape_},
{"dims", std::vector<int64_t>{static_cast<int64_t>(input_shape_.size())}},
{"dtype", ONNXDataType::INT64}};
auto reshape2_const =
CreateBaseOp(graph, node, "popart_constant", {}, {}, attrs2);
auto new_node_reshape2 = CreateBaseOp(graph, node, "popart_reshape",
{out_Y_, reshape2_const->outputs[0]},
{GetOutputVarNode("Y", node)}, {});
return new_node_reshape2;
}
Node *dropout_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto dropout_prob_ = BOOST_GET_CONST(float, op->GetAttr("dropout_prob"));
auto dropout_implementation_ =
BOOST_GET_CONST(std::string, op->GetAttr("dropout_implementation"));
auto is_test_type_ = op->GetAttrType("is_test");
bool is_test_;
if (is_test_type_ == 0) {
// int
is_test_ = BOOST_GET_CONST(int, op->GetAttr("is_test"));
} else {
// bool
is_test_ = BOOST_GET_CONST(bool, op->GetAttr("is_test"));
}
if (is_test_) {
if (dropout_implementation_ == "upscale_in_train") {
return CreateBaseOp(graph, node, "popart_identity",
{GetInputVarNode("X", node)},
{GetOutputVarNode("Out", node)}, {});
} else if (dropout_implementation_ == "downgrade_in_infer") {
auto scale =
CreateConst(graph, node, {}, {},
{{"value", std::vector<float>{1 - dropout_prob_}},
{"dims", std::vector<int64_t>{1}},
{"dtype", ONNXDataType::FLOAT}});
return CreateBaseOp(graph, node, "popart_mul",
{GetInputVarNode("X", node), scale->outputs[0]},
{GetOutputVarNode("Out", node)}, {});
} else {
PADDLE_THROW(
platform::errors::InvalidArgument("Invalid dropout_implementation"));
}
} else {
if (dropout_implementation_ == "upscale_in_train") {
auto attrs_ =
AttributeMap{{"num_outputs", (int64_t)1}, {"ratio", dropout_prob_}};
return CreateBaseOp(graph, node, "popart_dropout",
{GetInputVarNode("X", node)},
{GetOutputVarNode("Out", node)}, attrs_);
} else if (dropout_implementation_ == "downgrade_in_infer") {
PADDLE_THROW(platform::errors::InvalidArgument(
"Do not support downgrade_in_infer with training"));
} else {
PADDLE_THROW(
platform::errors::InvalidArgument("Invalid dropout_implementation"));
}
}
}
REGISTER_HANDLER(pool2d, pool2d_handler);
REGISTER_HANDLER(batch_norm, batch_norm_handler);
REGISTER_HANDLER(group_norm, group_norm_handler);
REGISTER_HANDLER(instance_norm, instance_norm_handler);
REGISTER_HANDLER(layer_norm, layer_norm_handler);
REGISTER_HANDLER(conv2d, conv2d_handler);
REGISTER_HANDLER(dropout, dropout_handler);
} // namespace
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
namespace paddle {
namespace platform {
namespace ipu {
// singleton
static int var_count = 0;
static int op_count = 0;
const std::string GenerateVarName() {
return std::string("_gen_var_") + std::to_string(var_count++);
}
const std::string GenerateOpName() {
return std::string("_gen_op_") + std::to_string(op_count++);
}
const std::string CreateOpIdentifyId(Node *node) {
// format: op_type|out_var0|out_var1|...|_gen_*
// this name will be used as op name when exporting onnx model from popart
auto op_type = node->Name();
std::string op_out = "";
for (auto *out_node : node->outputs) {
op_out += "|";
op_out += out_node->Name();
}
return {op_type + op_out + "|" + GenerateOpName()};
}
Node *MakeVarNode(Graph *graph, Node *node) {
auto var_name = GenerateVarName();
auto var_desc = std::make_unique<framework::VarDesc>(var_name);
auto var = graph->CreateVarNode(var_desc.get());
return var;
}
Node *MakeOpNode(Graph *graph, Node *node, const std::string &type,
const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs) {
auto op_desc = std::make_unique<framework::OpDesc>();
op_desc->SetType(type);
auto op = graph->CreateOpNode(op_desc.get());
for (auto *in : inputs) {
ConnectNodes(in, op);
}
if (outputs.empty()) {
auto var = MakeVarNode(graph, node);
ConnectNodes(op, var);
} else {
for (auto *out : outputs) {
ConnectNodes(op, out);
}
}
// i/o
std::vector<std::string> input_names;
for (auto node : op->inputs) {
input_names.push_back(node->Name());
}
op->Op()->SetInput("__inputs__", input_names);
std::vector<std::string> output_names;
for (auto node : op->outputs) {
output_names.push_back(node->Name());
}
op->Op()->SetOutput("__outputs__", output_names);
op->Op()->Flush();
return op;
}
Node *CreateBaseOp(Graph *graph, Node *node, const std::string &type,
const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs,
const AttributeMap &attrs) {
auto new_node = MakeOpNode(graph, node, type, inputs, outputs);
if (!attrs.empty()) {
new_node->Op()->SetAttrMap(attrs);
}
// deal special attr
if (!new_node->Op()->HasAttr(sIpuIndexAttr)) {
CopyOpAttr(sIpuIndexAttr, node->Op(), new_node->Op());
}
if (!new_node->Op()->HasAttr(sIpuStageAttr)) {
CopyOpAttr(sIpuStageAttr, node->Op(), new_node->Op());
}
{
new_node->Op()->SetAttr(sOpIdentifyIdAttr, CreateOpIdentifyId(node));
new_node->Op()->Flush();
}
return new_node;
}
Node *CreateConst(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs,
const AttributeMap &attrs) {
return CreateBaseOp(graph, node, "popart_constant", inputs, outputs, attrs);
}
Node *CreateCast(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs, const int otype) {
auto to = VarType2PopStr(otype);
return CreateBaseOp(graph, node, "popart_cast", inputs, outputs,
{{"to", to}});
}
Node *CreateGemm(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs, int64_t transA,
int64_t transB, float alpha, float beta) {
return CreateBaseOp(graph, node, "popart_gemm", inputs, outputs,
{
{"alpha", alpha},
{"beta", beta},
{"transA", transA},
{"transB", transB},
});
}
Node *CreateReshape(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs,
const std::vector<int64_t> &oshape) {
auto attr = AttributeMap{
{"value", oshape},
{"dims", std::vector<int64_t>{static_cast<int64_t>(oshape.size())}},
{"dtype", ONNXDataType::INT64}};
auto new_node_const =
CreateBaseOp(graph, node, "popart_constant", {}, {}, attr);
auto new_node_reshape =
CreateBaseOp(graph, node, "popart_reshape",
{inputs[0], new_node_const->outputs[0]}, outputs);
return new_node_reshape;
}
Node *CreateConv(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs,
const std::vector<int64_t> &dilations, int64_t group,
const std::vector<int64_t> &kernel_shape,
const std::vector<int64_t> &pads,
const std::vector<int64_t> &strides) {
auto attrs = AttributeMap{
{"dilations", dilations}, {"group", group},
{"kernel_shape", kernel_shape}, {"pads", pads},
{"strides", strides},
};
return CreateBaseOp(graph, node, "popart_conv", inputs, outputs, attrs);
}
Node *CreateSoftmaxOpset11(Graph *graph, Node *node,
const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs, int64_t axis) {
PADDLE_ENFORCE_EQ(inputs.size(), 1, platform::errors::InvalidArgument(
"Softmax op only support one input"));
auto x_shape = inputs[0]->Var()->GetShape();
int x_rank = x_shape.size();
if (axis < 0) {
axis = axis + x_rank;
}
if (axis == x_rank - 1) {
return CreateBaseOp(graph, node, "popart_softmax", inputs, outputs,
{{"axis", int64_t{-1}}});
} else {
auto perm = std::vector<int64_t>(x_rank);
std::iota(perm.begin(), perm.end(), 0);
perm[x_rank - 1] = axis;
perm[axis] = x_rank - 1;
auto new_transpose_pre = CreateBaseOp(graph, node, "popart_transpose",
inputs, {}, {{"perm", perm}});
auto new_softmax =
CreateBaseOp(graph, node, "popart_softmax", new_transpose_pre->outputs,
{}, {{"axis", int64_t{-1}}});
return CreateBaseOp(graph, node, "popart_transpose", new_softmax->outputs,
outputs, {{"perm", perm}});
}
}
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/platform/device/ipu/common.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
namespace paddle {
namespace platform {
namespace ipu {
using paddle::framework::AttributeMap;
template <typename T>
AttributeMap MakeConstAttrMap(std::vector<T> value, std::vector<int64_t> dims,
int dtype) {
return AttributeMap{{"value", value}, {"dims", dims}, {"dtype", dtype}};
}
template <typename T>
AttributeMap MakeConstAttrMapFromValue(T v, std::vector<int64_t> dims,
int dtype) {
size_t size = 1;
for (auto &dim : dims) {
size *= dim;
}
return MakeConstAttrMap<T>(std::vector<T>(size, v), dims, dtype);
}
const std::string GenerateVarName();
const std::string CreateOpIdentifyId(Node *node);
Node *MakeVarNode(Graph *graph, Node *node);
Node *MakeOpNode(Graph *graph, Node *node, const std::string &type,
const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs);
Node *CreateBaseOp(Graph *graph, Node *node, const std::string &type,
const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs,
const AttributeMap &attrs = {});
Node *CreateConst(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs,
const AttributeMap &attrs);
// otype is proto::VarType::Type
Node *CreateCast(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs, const int otype);
Node *CreateGemm(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs, int64_t transA = 0,
int64_t transB = 0, float alpha = 1.0f, float beta = 1.0f);
Node *CreateReshape(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs,
const std::vector<int64_t> &oshape);
Node *CreateConv(Graph *graph, Node *node, const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs,
const std::vector<int64_t> &dilations = {1, 1},
int64_t group = 1,
const std::vector<int64_t> &kernel_shape = {},
const std::vector<int64_t> &pads = {0, 0, 0, 0},
const std::vector<int64_t> &strides = {1, 1});
Node *CreateSoftmaxOpset11(Graph *graph, Node *node,
const std::vector<Node *> &inputs,
const std::vector<Node *> &outputs, int64_t axis);
} // namespace ipu
} // namespace platform
} // namespace paddle
......@@ -195,3 +195,5 @@ OP_DECL(popart_sqrt, aiOnnxOpset.sqrt, NONE) // NOLINT
OP_DECL(popart_tanh, aiOnnxOpset.tanh, NONE) // NOLINT
OP_DECL(popart_tile, aiOnnxOpset.tile, NONE) // NOLINT
OP_DECL(popart_transpose, aiOnnxOpset.transpose, ARG(INT_VEC,perm) ) // NOLINT
// clang-format on
......@@ -16,6 +16,9 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#endif
#include "glog/logging.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -96,8 +99,9 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
if (it == device_contexts_.end()) {
PADDLE_THROW(platform::errors::Unimplemented(
"Place %s is not supported. Please check that your paddle compiles "
"with WITH_GPU, WITH_XPU or WITH_ASCEND_CL option or check that "
"your train process set the correct device id if you use Executor.",
"with WITH_GPU, WITH_XPU, WITH_IPU or WITH_ASCEND_CL option or check "
"that your train process set the correct device id if you use "
"Executor.",
place));
}
return it->second.get().get();
......@@ -158,6 +162,14 @@ DeviceContextPool::DeviceContextPool(
PADDLE_THROW(
platform::errors::Unimplemented("XPUPlace is not supported. Please "
"re-compile with WITH_XPU option."));
#endif
} else if (platform::is_ipu_place(p)) {
#ifdef PADDLE_WITH_IPU
EmplaceDeviceContext<IPUDeviceContext, IPUPlace>(&device_contexts_, p);
#else
PADDLE_THROW(
platform::errors::Unimplemented("IPUPlace is not supported. Please "
"re-compile with WITH_IPU option."));
#endif
} else if (platform::is_npu_place(p)) {
#ifdef PADDLE_WITH_ASCEND_CL
......@@ -195,6 +207,22 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place CPUDeviceContext::GetPlace() const { return place_; }
#ifdef PADDLE_WITH_IPU
IPUDeviceContext::IPUDeviceContext(IPUPlace place) : place_(place) {
int id = place.GetDeviceId();
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
device_ = ipu_backend->GetDevice(id);
}
Place IPUDeviceContext::GetPlace() const { return place_; }
void IPUDeviceContext::Wait() const {
/*! \brief Wait for all operations completion in the stream. */
}
IPUDeviceContext::~IPUDeviceContext() {}
#endif
#ifdef PADDLE_WITH_XPU
XPUDeviceContext::XPUDeviceContext() {
context_ = xpu::create_context();
......
......@@ -36,6 +36,7 @@ class PlacePrinter : public boost::static_visitor<> {
void operator()(const XPUPlace &p) { os_ << "XPUPlace(" << p.device << ")"; }
void operator()(const NPUPlace &p) { os_ << "NPUPlace(" << p.device << ")"; }
void operator()(const NPUPinnedPlace &p) { os_ << "NPUPinnedPlace"; }
void operator()(const IPUPlace &p) { os_ << "IPUPlace(" << p.device << ")"; }
void operator()(const CUDAPinnedPlace &p) { os_ << "CUDAPinnedPlace"; }
private:
......@@ -56,6 +57,10 @@ bool is_npu_place(const Place &p) {
return boost::apply_visitor(IsNPUPlace(), p);
}
bool is_ipu_place(const Place &p) {
return boost::apply_visitor(IsIPUPlace(), p);
}
bool is_cpu_place(const Place &p) {
return boost::apply_visitor(IsCPUPlace(), p);
}
......@@ -80,6 +85,8 @@ bool is_same_place(const Place &p1, const Place &p2) {
return BOOST_GET_CONST(XPUPlace, p1) == BOOST_GET_CONST(XPUPlace, p2);
} else if (is_npu_place(p1)) {
return BOOST_GET_CONST(NPUPlace, p1) == BOOST_GET_CONST(NPUPlace, p2);
} else if (is_ipu_place(p1)) {
return BOOST_GET_CONST(IPUPlace, p1) == BOOST_GET_CONST(IPUPlace, p2);
} else {
return BOOST_GET_CONST(CUDAPlace, p1) == BOOST_GET_CONST(CUDAPlace, p2);
}
......
......@@ -95,12 +95,25 @@ struct NPUPinnedPlace {
inline bool operator!=(const NPUPinnedPlace &) const { return false; }
inline bool operator<(const NPUPinnedPlace &) const { return false; }
};
struct IPUPlace {
IPUPlace() : IPUPlace(0) {}
explicit IPUPlace(int d) : device(d) {}
inline int GetDeviceId() const { return device; }
// needed for variant equality comparison
inline bool operator==(const IPUPlace &o) const { return device == o.device; }
inline bool operator!=(const IPUPlace &o) const { return !(*this == o); }
inline bool operator<(const IPUPlace &o) const { return device < o.device; }
int device;
};
struct IsCUDAPlace : public boost::static_visitor<bool> {
bool operator()(const CPUPlace &) const { return false; }
bool operator()(const XPUPlace &) const { return false; }
bool operator()(const NPUPlace &) const { return false; }
bool operator()(const NPUPinnedPlace &) const { return false; }
bool operator()(const IPUPlace &) const { return false; }
bool operator()(const CUDAPlace &) const { return true; }
bool operator()(const CUDAPinnedPlace &) const { return false; }
};
......@@ -110,6 +123,7 @@ struct IsCPUPlace : public boost::static_visitor<bool> {
bool operator()(const XPUPlace &) const { return false; }
bool operator()(const NPUPlace &) const { return false; }
bool operator()(const NPUPinnedPlace &) const { return false; }
bool operator()(const IPUPlace &) const { return false; }
bool operator()(const CUDAPlace &) const { return false; }
bool operator()(const CUDAPinnedPlace &) const { return false; }
};
......@@ -119,6 +133,7 @@ struct IsCUDAPinnedPlace : public boost::static_visitor<bool> {
bool operator()(const XPUPlace &) const { return false; }
bool operator()(const NPUPlace &) const { return false; }
bool operator()(const NPUPinnedPlace &) const { return false; }
bool operator()(const IPUPlace &) const { return false; }
bool operator()(const CUDAPlace &) const { return false; }
bool operator()(const CUDAPinnedPlace &cuda_pinned) const { return true; }
};
......@@ -128,6 +143,7 @@ struct IsXPUPlace : public boost::static_visitor<bool> {
bool operator()(const XPUPlace &) const { return true; }
bool operator()(const NPUPlace &) const { return false; }
bool operator()(const NPUPinnedPlace &) const { return false; }
bool operator()(const IPUPlace &) const { return false; }
bool operator()(const CUDAPlace &) const { return false; }
bool operator()(const CUDAPinnedPlace &) const { return false; }
};
......@@ -137,6 +153,7 @@ struct IsNPUPlace : public boost::static_visitor<bool> {
bool operator()(const XPUPlace &) const { return false; }
bool operator()(const NPUPlace &) const { return true; }
bool operator()(const NPUPinnedPlace &) const { return false; }
bool operator()(const IPUPlace &) const { return false; }
bool operator()(const CUDAPlace &) const { return false; }
bool operator()(const CUDAPinnedPlace &) const { return false; }
};
......@@ -145,22 +162,33 @@ struct IsNPUPinnedPlace : public boost::static_visitor<bool> {
bool operator()(const CPUPlace &) const { return false; }
bool operator()(const XPUPlace &) const { return false; }
bool operator()(const NPUPlace &) const { return false; }
bool operator()(const IPUPlace &) const { return false; }
bool operator()(const CUDAPlace &) const { return false; }
bool operator()(const CUDAPinnedPlace &) const { return false; }
bool operator()(const NPUPinnedPlace &) const { return true; }
};
struct IsIPUPlace : public boost::static_visitor<bool> {
bool operator()(const CPUPlace &) const { return false; }
bool operator()(const XPUPlace &) const { return false; }
bool operator()(const NPUPlace &) const { return false; }
bool operator()(const IPUPlace &) const { return true; }
bool operator()(const CUDAPlace &) const { return false; }
bool operator()(const CUDAPinnedPlace &) const { return false; }
bool operator()(const NPUPinnedPlace &) const { return false; }
};
class Place : public boost::variant<CUDAPlace, XPUPlace, NPUPlace, CPUPlace,
CUDAPinnedPlace, NPUPinnedPlace> {
CUDAPinnedPlace, NPUPinnedPlace, IPUPlace> {
private:
using PlaceBase = boost::variant<CUDAPlace, XPUPlace, NPUPlace, CPUPlace,
CUDAPinnedPlace, NPUPinnedPlace>;
CUDAPinnedPlace, NPUPinnedPlace, IPUPlace>;
public:
Place() = default;
Place(const CPUPlace &cpu_place) : PlaceBase(cpu_place) {} // NOLINT
Place(const XPUPlace &xpu_place) : PlaceBase(xpu_place) {} // NOLINT
Place(const NPUPlace &npu_place) : PlaceBase(npu_place) {} // NOLINT
Place(const IPUPlace &ipu_place) : PlaceBase(ipu_place) {} // NOLINT
Place(const CUDAPlace &cuda_place) : PlaceBase(cuda_place) {} // NOLINT
Place(const CUDAPinnedPlace &cuda_pinned_place) // NOLINT
: PlaceBase(cuda_pinned_place) {}
......@@ -180,6 +208,7 @@ using PlaceList = std::vector<Place>;
bool is_gpu_place(const Place &);
bool is_xpu_place(const Place &);
bool is_npu_place(const Place &);
bool is_ipu_place(const Place &);
bool is_cpu_place(const Place &);
bool is_cuda_pinned_place(const Place &);
bool is_npu_pinned_place(const Place &);
......@@ -228,6 +257,15 @@ struct PlaceVisitorWrapper
return typename Visitor::result_type();
#endif
}
typename Visitor::result_type operator()(const IPUPlace &ipu) const {
#ifdef PADDLE_WITH_IPU
return visitor_(ipu);
#else
PADDLE_THROW(platform::errors::Unavailable(
"Paddle is not compiled with IPU. Cannot visit ipu device"));
return typename Visitor::result_type();
#endif
}
typename Visitor::result_type operator()(const CUDAPlace &cuda) const {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
......
pybind.h
op_function_impl.h
\ No newline at end of file
op_function_impl.h
eager_op_function_impl.h
此差异已折叠。
......@@ -37,6 +37,9 @@ PADDLE_DEFINE_EXPORTED_bool(
"If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing");
// disable auto conversion to list in Python
PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray);
namespace paddle {
namespace pybind {
......
......@@ -313,6 +313,21 @@ void SetTensorFromPyArrayT(
PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use XPUPlace in CPU/GPU version, "
"Please recompile or reinstall Paddle with XPU support."));
#endif
} else if (paddle::platform::is_ipu_place(place)) {
#ifdef PADDLE_WITH_IPU
if (zero_copy) {
auto holder = std::make_shared<details::NumpyAllocation<T>>(array);
auto type = framework::ToDataType(std::type_index(typeid(T)));
self->ResetHolderWithType(holder, type);
} else {
auto dst = self->mutable_data<T>(place);
std::memcpy(dst, array.data(), array.nbytes());
}
#else
PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use IPUPlace in CPU/GPU/XPU/NPU version, "
"Please recompile or reinstall Paddle with IPU support."));
#endif
} else if (paddle::platform::is_npu_place(place)) {
#ifdef PADDLE_WITH_ASCEND_CL
......
......@@ -22,6 +22,10 @@ set(api_source_file ${CMAKE_SOURCE_DIR}/paddle/pten/api/lib/api.cc)
set(api_header_file_tmp ${api_header_file}.tmp)
set(api_source_file_tmp ${api_source_file}.tmp)
if (NOT PYTHON_EXECUTABLE)
find_package(PythonInterp REQUIRED)
endif()
add_custom_command(
OUTPUT ${api_header_file} ${api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/core/kernel_registry.h"
// TODO(chenweihang) After the kernel is split into a single file,
// the kernel declare statement is automatically generated according to the
// file name of the kernel, and this header file will be removed
PT_DECLARE_KERNEL(full_like, CPU);
PT_DECLARE_KERNEL(dot, CPU);
PT_DECLARE_KERNEL(flatten, CPU);
PT_DECLARE_KERNEL(sign, CPU);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(full_like, CUDA);
PT_DECLARE_KERNEL(dot, CUDA);
PT_DECLARE_KERNEL(flatten, CUDA);
PT_DECLARE_KERNEL(sign, CUDA);
#endif
#ifdef PADDLE_WITH_XPU
PT_DECLARE_KERNEL(flatten, XPU);
#endif
......@@ -25,10 +25,14 @@ limitations under the License. */
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
PT_DECLARE_MODULE(UtilsCPU);
PT_DECLARE_KERNEL(copy, CPU);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_MODULE(UtilsCUDA);
PT_DECLARE_KERNEL(copy, CUDA);
#endif
#ifdef PADDLE_WITH_XPU
PT_DECLARE_KERNEL(copy, XPU);
#endif
namespace paddle {
......
......@@ -27,13 +27,13 @@ const std::unordered_map<std::string, std::string> kernel_alias_name_map = {
{"fill_any_like", "full_like"},
{"fill_constant", "full"},
{"flatten_contiguous_range", "flatten"},
// {"matmul_v2", "matmul"},
{"matmul_v2", "matmul"},
{"reduce_mean", "mean"},
{"reduce_sum", "sum"},
{"reshape2", "reshape"},
// fluid kernel "mean/reshape/matmul/flatten/sum" should be deprecated
{"flatten", "deprecated"},
// {"matmul", "deprecated"},
{"matmul", "deprecated"},
{"mean", "deprecated"},
{"reshape", "deprecated"},
{"sum", "deprecated"}};
......
此差异已折叠。
此差异已折叠。
......@@ -61,9 +61,7 @@ void FillConstant(const CPUContext& dev_ctx,
} // namespace pten
PT_REGISTER_MODULE(CreationCPU);
PT_REGISTER_KERNEL("full_like",
PT_REGISTER_KERNEL(full_like,
CPU,
ANY,
pten::FillAnyLike,
......@@ -74,7 +72,7 @@ PT_REGISTER_KERNEL("full_like",
bool,
paddle::platform::float16) {}
PT_REGISTER_KERNEL("full",
PT_REGISTER_KERNEL(full,
CPU,
ANY,
pten::FillConstant,
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册