提交 5cedad40 编写于 作者: D DesmonDay

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_dlpack_for

...@@ -61,7 +61,7 @@ repos: ...@@ -61,7 +61,7 @@ repos:
- id: black - id: black
files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/pycqa/isort - repo: https://github.com/pycqa/isort
rev: 5.10.1 rev: 5.11.5
hooks: hooks:
- id: isort - id: isort
- repo: https://github.com/PyCQA/flake8 - repo: https://github.com/PyCQA/flake8
......
...@@ -40,7 +40,10 @@ set(CINN_OPTIONAL_ARGS ...@@ -40,7 +40,10 @@ set(CINN_OPTIONAL_ARGS
-DWITH_MKL_CBLAS=${WITH_MKL} -DWITH_MKL_CBLAS=${WITH_MKL}
-DWITH_MKLDNN=${WITH_MKL} -DWITH_MKLDNN=${WITH_MKL}
-DPUBLISH_LIBS=ON -DPUBLISH_LIBS=ON
-DWITH_TESTING=ON) -DWITH_TESTING=ON
-DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}
-DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR}
-DPYTHON_LIBRARIES=${PYTHON_LIBRARIES})
set(CINN_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target cinnapi -j) set(CINN_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target cinnapi -j)
set(CINN_BINARY_DIR ${CINN_PREFIX_DIR}/src/external_cinn-build) set(CINN_BINARY_DIR ${CINN_PREFIX_DIR}/src/external_cinn-build)
set(CINN_LIB_NAME "libcinnapi.so") set(CINN_LIB_NAME "libcinnapi.so")
......
...@@ -411,6 +411,17 @@ function(op_library TARGET) ...@@ -411,6 +411,17 @@ function(op_library TARGET)
set(pybind_flag 1) set(pybind_flag 1)
endif() endif()
# pybind USE_OP_ITSELF
set(op_name "")
# Add PHI Kernel Registry Message
find_register(${cc_src} "REGISTER_ACTIVATION_OP" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
# hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
set(op_name "") set(op_name "")
find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name) find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name)
if(NOT ${op_name} EQUAL "") if(NOT ${op_name} EQUAL "")
......
...@@ -50,14 +50,17 @@ void ComputeInterceptor::IncreaseReady(int64_t up_id) { ...@@ -50,14 +50,17 @@ void ComputeInterceptor::IncreaseReady(int64_t up_id) {
auto max_ready_size = it->second.first; auto max_ready_size = it->second.first;
auto ready_size = it->second.second; auto ready_size = it->second.second;
ready_size += 1; ready_size += 1;
PADDLE_ENFORCE_LE(ready_size, if (max_ready_size != INFINITE_BUFFER_SIZE) {
max_ready_size, PADDLE_ENFORCE_LE(
platform::errors::OutOfRange( ready_size,
"upstream=%lld ready_size must <= max_ready_size, but " max_ready_size,
"now ready_size=%lld, max_ready_size=%lld", platform::errors::OutOfRange(
up_id, "upstream=%lld ready_size must <= max_ready_size, but "
ready_size, "now ready_size=%lld, max_ready_size=%lld",
max_ready_size)); up_id,
ready_size,
max_ready_size));
}
it->second.second = ready_size; it->second.second = ready_size;
} }
...@@ -96,6 +99,9 @@ bool ComputeInterceptor::CanWriteOutput() { ...@@ -96,6 +99,9 @@ bool ComputeInterceptor::CanWriteOutput() {
for (auto& outs : out_buffs_) { for (auto& outs : out_buffs_) {
auto max_buffer_size = outs.second.first; auto max_buffer_size = outs.second.first;
auto used_size = outs.second.second; auto used_size = outs.second.second;
if (max_buffer_size == INFINITE_BUFFER_SIZE) {
continue;
}
// full, return false // full, return false
if (used_size == max_buffer_size) { if (used_size == max_buffer_size) {
VLOG(3) << "Interceptor " << GetInterceptorId() VLOG(3) << "Interceptor " << GetInterceptorId()
...@@ -112,15 +118,17 @@ void ComputeInterceptor::SendDataReadyToDownStream() { ...@@ -112,15 +118,17 @@ void ComputeInterceptor::SendDataReadyToDownStream() {
auto max_buff_size = outs.second.first; auto max_buff_size = outs.second.first;
auto used_size = outs.second.second; auto used_size = outs.second.second;
used_size += 1; used_size += 1;
PADDLE_ENFORCE_LE( if (max_buff_size != INFINITE_BUFFER_SIZE) {
used_size, PADDLE_ENFORCE_LE(
max_buff_size, used_size,
platform::errors::OutOfRange("downstream=%lld used buff size must <= " max_buff_size,
"max_buff_size, but now used_size=%lld, " platform::errors::OutOfRange("downstream=%lld used buff size must <= "
"max_buff_size=%lld", "max_buff_size, but now used_size=%lld, "
down_id, "max_buff_size=%lld",
used_size, down_id,
max_buff_size)); used_size,
max_buff_size));
}
outs.second.second = used_size; outs.second.second = used_size;
InterceptorMessage ready_msg; InterceptorMessage ready_msg;
......
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
const int64_t INFINITE_BUFFER_SIZE = -1;
class ComputeInterceptor : public Interceptor { class ComputeInterceptor : public Interceptor {
public: public:
ComputeInterceptor(int64_t interceptor_id, TaskNode* node); ComputeInterceptor(int64_t interceptor_id, TaskNode* node);
......
...@@ -111,21 +111,22 @@ void FleetExecutor::Init( ...@@ -111,21 +111,22 @@ void FleetExecutor::Init(
task_node->SetUnusedVars(unused_vars); task_node->SetUnusedVars(unused_vars);
if (task_node->type() == "Cond") { if (task_node->type() == "Cond") {
std::vector<std::string> while_block_vars; std::vector<std::string> while_block_vars;
std::vector<std::string> vars_in_parent; VLOG(3) << "Vars in while sub block:";
std::vector<std::string> vars_in_sub;
for (auto& var : program_desc.Block(0).AllVars()) {
vars_in_parent.emplace_back(var->Name());
}
for (auto& var : program_desc.Block(1).AllVars()) { for (auto& var : program_desc.Block(1).AllVars()) {
vars_in_sub.emplace_back(var->Name()); VLOG(3) << var->Name();
while_block_vars.emplace_back(var->Name());
}
for (const auto& pair : unused_vars) {
if (pair.first->Type() == "while") {
for (const auto& var_name : pair.second) {
while_block_vars.emplace_back(var_name);
}
}
}
VLOG(3) << "Vars below will be removed after while:";
for (const auto& name : while_block_vars) {
VLOG(3) << name;
} }
std::sort(vars_in_parent.begin(), vars_in_parent.end());
std::sort(vars_in_sub.begin(), vars_in_sub.end());
std::set_difference(vars_in_sub.begin(),
vars_in_sub.end(),
vars_in_parent.begin(),
vars_in_parent.end(),
std::back_inserter(while_block_vars));
task_node->SetWhileBlockVars(while_block_vars); task_node->SetWhileBlockVars(while_block_vars);
} }
int64_t interceptor_id = task_node->task_id(); int64_t interceptor_id = task_node->task_id();
......
...@@ -330,7 +330,7 @@ NODE_CC_FILE_TEMPLATE = """ ...@@ -330,7 +330,7 @@ NODE_CC_FILE_TEMPLATE = """
#include "paddle/fluid/eager/nan_inf_utils.h" #include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/phi/api/include/sparse_api.h" #include "paddle/phi/api/include/sparse_api.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h" #include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/fluid/prim/api/all.h" #include "paddle/fluid/prim/api/all.h"
#include "paddle/fluid/prim/utils/utils.h" #include "paddle/fluid/prim/utils/utils.h"
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
......
...@@ -26,6 +26,11 @@ namespace framework { ...@@ -26,6 +26,11 @@ namespace framework {
using FeedType = using FeedType =
paddle::variant<phi::DenseTensor, Strings, phi::SparseCooTensor>; paddle::variant<phi::DenseTensor, Strings, phi::SparseCooTensor>;
template <>
struct PhiVectorType<FeedType> {
const char *type_name = "PhiVectorFeedType";
};
using FeedList = paddle::framework::PhiVector<FeedType>; using FeedList = paddle::framework::PhiVector<FeedType>;
using FetchType = paddle::variant<phi::DenseTensor, using FetchType = paddle::variant<phi::DenseTensor,
......
...@@ -144,6 +144,7 @@ if(WITH_TENSORRT) ...@@ -144,6 +144,7 @@ if(WITH_TENSORRT)
pass_library(trt_support_nhwc_pass inference) pass_library(trt_support_nhwc_pass inference)
pass_library(elementwise_groupnorm_act_pass inference) pass_library(elementwise_groupnorm_act_pass inference)
pass_library(preln_elementwise_groupnorm_act_pass inference) pass_library(preln_elementwise_groupnorm_act_pass inference)
pass_library(groupnorm_act_pass inference)
pass_library(trt_embedding_eltwise_layernorm_fuse_pass inference) pass_library(trt_embedding_eltwise_layernorm_fuse_pass inference)
pass_library(preln_embedding_eltwise_layernorm_fuse_pass inference) pass_library(preln_embedding_eltwise_layernorm_fuse_pass inference)
endif() endif()
...@@ -386,22 +387,10 @@ if(WITH_MKLDNN) ...@@ -386,22 +387,10 @@ if(WITH_MKLDNN)
test_depthwise_conv_mkldnn_pass test_depthwise_conv_mkldnn_pass
SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc
DEPS depthwise_conv_mkldnn_pass) DEPS depthwise_conv_mkldnn_pass)
cc_test(
test_conv_bias_mkldnn_fuse_pass_cc
SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
DEPS conv_bias_mkldnn_fuse_pass naive_executor)
cc_test( cc_test(
test_conv_activation_mkldnn_fuse_pass test_conv_activation_mkldnn_fuse_pass
SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc
DEPS conv_activation_mkldnn_fuse_pass) DEPS conv_activation_mkldnn_fuse_pass)
cc_test(
test_conv_concat_relu_mkldnn_fuse_pass
SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc
DEPS conv_activation_mkldnn_fuse_pass)
cc_test_old(
test_conv_elementwise_add_mkldnn_fuse_pass SRCS
mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS
conv_elementwise_add_mkldnn_fuse_pass pass_test_util)
cc_test_old( cc_test_old(
test_int8_scale_calculation_mkldnn_pass SRCS test_int8_scale_calculation_mkldnn_pass SRCS
mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc DEPS mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc DEPS
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
...@@ -28,7 +29,7 @@ namespace patterns { ...@@ -28,7 +29,7 @@ namespace patterns {
// Declare patterns for multi head attention. // Declare patterns for multi head attention.
// Can detect: // Can detect:
// 1. Pre layer norm, post layer norm or sandwich layer norm. // 1. Pre layer norm or post layer norm.
// 2. Add attn mask for qk product before the softmax or not. // 2. Add attn mask for qk product before the softmax or not.
// 3. Do attn dropout or not. // 3. Do attn dropout or not.
// 4. Add residual to the out linear result or not. // 4. Add residual to the out linear result or not.
...@@ -37,11 +38,10 @@ struct FusedAttentionPattern : public PatternBase { ...@@ -37,11 +38,10 @@ struct FusedAttentionPattern : public PatternBase {
: PatternBase(pattern, name_scope, "fused_attention_pattern") {} : PatternBase(pattern, name_scope, "fused_attention_pattern") {}
PDNode* operator()(PDNode* x, PDNode* operator()(PDNode* x,
bool pre_layer_norm, // do pre ln or not bool pre_layer_norm, // do pre ln or not
bool post_layer_norm, // do post ln or not bool has_attn_mask, // add attn mask to qk or not
bool has_attn_mask, // add attn mask to qk or not bool do_dropout, // dropout the softmax(qk) or not
bool do_dropout, // dropout the softmax(qk) or not bool add_residual); // add residual to out linear or not
bool add_residual); // add residual to out linear or not
// pre layer norm // pre layer norm
PATTERN_DECL_NODE(pre_layer_norm_op); PATTERN_DECL_NODE(pre_layer_norm_op);
...@@ -134,11 +134,10 @@ struct FusedAttentionGradPattern : public PatternBase { ...@@ -134,11 +134,10 @@ struct FusedAttentionGradPattern : public PatternBase {
: PatternBase(pattern, name_scope, "fused_attention_pattern") {} : PatternBase(pattern, name_scope, "fused_attention_pattern") {}
PDNode* operator()(PDNode* x, PDNode* operator()(PDNode* x,
bool pre_layer_norm, // pre ln bool pre_layer_norm, // pre ln
bool post_layer_norm, // post ln bool has_attn_mask, // add attn mask to qk or not
bool has_attn_mask, // add attn mask to qk or not bool do_dropout, // dropout the softmax(qk) or not
bool do_dropout, // dropout the softmax(qk) or not bool add_residual); // add residual to out linear or not
bool add_residual); // add residual to out linear or not
// post layer norm grad // post layer norm grad
PATTERN_DECL_NODE(post_layer_norm_grad_op); PATTERN_DECL_NODE(post_layer_norm_grad_op);
...@@ -254,6 +253,31 @@ struct FusedAttentionGradPattern : public PatternBase { ...@@ -254,6 +253,31 @@ struct FusedAttentionGradPattern : public PatternBase {
} // namespace patterns } // namespace patterns
class FusedAttentionPassCache {
public:
ir::Node* GetNodeFromCache(const std::string name) {
if (var_name_to_ir_node_cache_.count(name)) {
return var_name_to_ir_node_cache_.find(name)->second;
}
PADDLE_THROW(platform::errors::InvalidArgument(
"The key (%d) of FusedAttentionCache does not exist.", name));
}
void InsertIntoCache(const std::string name, ir::Node* node) {
if (!var_name_to_ir_node_cache_.count(name)) {
var_name_to_ir_node_cache_.insert({name, node});
} else {
PADDLE_THROW(platform::errors::AlreadyExists(
"The key (%d) of FusedAttentionCache already exist.", name));
}
}
void ResetCache() { var_name_to_ir_node_cache_.clear(); }
private:
std::unordered_map<std::string, ir::Node*> var_name_to_ir_node_cache_;
};
class FusedAttentionsPass : public FusePassBase { class FusedAttentionsPass : public FusePassBase {
public: public:
virtual ~FusedAttentionsPass() {} virtual ~FusedAttentionsPass() {}
...@@ -275,9 +299,17 @@ class FusedAttentionsPass : public FusePassBase { ...@@ -275,9 +299,17 @@ class FusedAttentionsPass : public FusePassBase {
// If true, the function name will have an abbreviation part. // If true, the function name will have an abbreviation part.
// If false, the function name won't contain an abbreviation for it. // If false, the function name won't contain an abbreviation for it.
ir::Graph* PreMaskDropResPostFwd(Graph* graph) const; ir::Graph* PreMaskDropResFwd(Graph* graph,
FusedAttentionPassCache* cache) const;
ir::Graph* PreMaskDropResBwd(Graph* graph,
FusedAttentionPassCache* cache) const;
ir::Graph* PreMaskDropResPostBwd(Graph* graph) const; const std::string GenerateCacheKey(const std::string anchor,
const std::string var_name,
int block_id) const {
return anchor + "_" + std::to_string(block_id) + "_" + var_name;
}
}; };
} // namespace ir } // namespace ir
......
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/groupnorm_act_pass.h"
#include <string>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle {
namespace framework {
namespace ir {
class Node;
} // namespace ir
} // namespace framework
} // namespace paddle
namespace paddle {
namespace framework {
namespace ir {
namespace patterns {
struct GroupNormAct : public PatternBase {
GroupNormAct(PDPattern *pattern, const std::string &name_scope)
: PatternBase(pattern, name_scope, "groupnorm_act") {}
void operator()(PDNode *x);
// declare operator node's name
PATTERN_DECL_NODE(group_norm);
// declare variable node's name
PATTERN_DECL_NODE(elementwise_out);
PATTERN_DECL_NODE(group_norm_bias);
PATTERN_DECL_NODE(group_norm_scale);
PATTERN_DECL_NODE(group_norm_out);
PATTERN_DECL_NODE(act);
PATTERN_DECL_NODE(act_out);
};
void GroupNormAct::operator()(PDNode *x) {
// Create nodes for group_norm op.
auto *group_norm =
pattern->NewNode(group_norm_repr())->assert_is_op("group_norm");
auto *group_norm_bias_var = pattern->NewNode(group_norm_bias_repr())
->AsInput()
->assert_is_persistable_var()
->assert_is_op_input("group_norm", "Bias");
auto *group_norm_scale_var = pattern->NewNode(group_norm_scale_repr())
->AsInput()
->assert_is_persistable_var()
->assert_is_op_input("group_norm", "Scale");
auto *group_norm_out_var = pattern->NewNode(group_norm_out_repr())
->AsOutput()
->assert_is_op_output("group_norm", "Y")
->assert_is_op_input("silu", "X");
// Add links for group_norm op.
group_norm->LinksFrom({x, group_norm_bias_var, group_norm_scale_var})
.LinksTo({group_norm_out_var});
auto *act = pattern->NewNode(act_repr())->assert_is_op("silu");
auto *act_out = pattern->NewNode(act_out_repr())
->AsOutput()
->assert_is_op_output("silu", "Out");
act->LinksFrom({group_norm_out_var}).LinksTo({act_out});
}
} // namespace patterns
int GroupNormActFusePass::ApplyGNSiluPattern(ir::Graph *graph) const {
PADDLE_ENFORCE_NOT_NULL(
graph, platform::errors::PreconditionNotMet("graph should not be null."));
FusePassBase::Init("groupnorm_silu_fuse", graph);
int found_subgraph_count = 0;
GraphPatternDetector gpd;
PDNode *x = nullptr;
x = gpd.mutable_pattern()
->NewNode("groupnorm_act_fuse/x")
->AsInput()
->assert_var_not_persistable()
->assert_is_op_input("group_norm", "X");
patterns::GroupNormAct fused_pattern(gpd.mutable_pattern(),
"groupnorm_act_fuse");
fused_pattern(x);
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *graph) {
if (subgraph.count(x) <= 0) {
LOG(WARNING) << "The subgraph is empty.";
return;
}
VLOG(4) << "handle groupnorm act fuse";
GET_IR_NODE_FROM_SUBGRAPH(group_norm, group_norm, fused_pattern);
GET_IR_NODE_FROM_SUBGRAPH(group_norm_bias, group_norm_bias, fused_pattern);
GET_IR_NODE_FROM_SUBGRAPH(
group_norm_scale, group_norm_scale, fused_pattern);
GET_IR_NODE_FROM_SUBGRAPH(group_norm_out, group_norm_out, fused_pattern);
GET_IR_NODE_FROM_SUBGRAPH(act, act, fused_pattern);
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, fused_pattern);
if (!IsCompat(subgraph, graph)) {
LOG(WARNING) << "groupnorm act pass in op compat failed.";
return;
}
std::unordered_set<const Node *> del_node_set;
// Create an skip_groupnorm_act op node
OpDesc new_desc(*group_norm->Op());
new_desc.SetAttr("with_silu", true);
new_desc.SetOutput("Y", {act_out->Name()});
new_desc.Flush();
auto fused_node = graph->CreateOpNode(&new_desc); // OpDesc will be copied.
del_node_set.insert(group_norm);
del_node_set.insert(group_norm_out);
del_node_set.insert(act);
GraphSafeRemoveNodes(graph, del_node_set);
IR_NODE_LINK_TO(subgraph.at(x), fused_node);
IR_NODE_LINK_TO(group_norm_scale, fused_node);
IR_NODE_LINK_TO(group_norm_bias, fused_node);
IR_NODE_LINK_TO(fused_node, act_out);
found_subgraph_count++;
};
gpd(graph, handler);
return found_subgraph_count;
}
void GroupNormActFusePass::ApplyImpl(ir::Graph *graph) const {
FusePassBase::Init("groupnorm_act_fuse_pass", graph);
int found_subgraph_count = ApplyGNSiluPattern(graph);
AddStatis(found_subgraph_count);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(groupnorm_act_pass, paddle::framework::ir::GroupNormActFusePass);
REGISTER_PASS_CAPABILITY(groupnorm_act_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("silu", 0)
.EQ("group_norm", 0));
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
namespace paddle {
namespace framework {
namespace ir {
//
// | |
// group_norm group_norm
// | -> |
// silu
// |
class Graph;
class GroupNormActFusePass : public FusePassBase {
public:
GroupNormActFusePass() {
AddOpCompat(OpCompat("group_norm"))
.AddInput("X")
.IsTensor()
.End()
.AddInput("Scale")
.IsTensor()
.End()
.AddInput("Bias")
.IsTensor()
.End()
.AddOutput("Y")
.IsTensor()
.End()
.AddOutput("Mean")
.IsTensor()
.End()
.AddOutput("Variance")
.IsTensor()
.End()
.AddAttr("epsilon")
.IsNumGE(0.0f)
.IsNumLE(1.0f)
.End()
.AddAttr("groups")
.IsNumGE(1)
.End()
.AddAttr("data_layout")
.IsStringIn({"NCHW"})
.End();
AddOpCompat(OpCompat("silu"))
.AddInput("X")
.IsTensor()
.End()
.AddOutput("Out")
.IsTensor()
.End();
}
virtual ~GroupNormActFusePass() {}
protected:
void ApplyImpl(ir::Graph* graph) const override;
int ApplyGNSiluPattern(ir::Graph* graph) const;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/phi/common/place.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog,
const std::string& type,
const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
if (type == "conv2d") {
const std::vector<int> strides({1, 1});
const std::vector<int> paddings({0, 0});
const std::vector<int> dilations({1, 1});
op->SetAttr("use_mkldnn", true);
op->SetAttr("name", name);
op->SetAttr("strides", strides);
op->SetAttr("groups", 1);
op->SetAttr("paddings", paddings);
op->SetAttr("padding_algorithm", std::string("EXPLICIT"));
op->SetAttr("dilations", dilations);
op->SetAttr("data_format", std::string("NCHW"));
op->SetOutput("Output", outputs);
op->SetInput("Input", {inputs[0]});
op->SetInput("Filter", {inputs[1]});
if (inputs.size() > 2)
op->SetInput("Bias", {inputs[2]});
else
op->SetInput("Bias", {});
} else if (type == "elementwise_add") {
op->SetAttr("use_mkldnn", true);
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0]});
op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", outputs);
}
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
static_cast<int>(OpRole::kForward));
}
// (c, weights)->conv->f
// (f)->elementwise_add->g
ProgramDesc BuildProgramDesc(bool convWithExistingBias) {
ProgramDesc prog;
std::vector<std::string> nodes{"c", "weights", "f", "eltwise_bias", "g"};
if (convWithExistingBias) nodes.push_back("conv_bias");
for (auto& v : nodes) {
auto* var = prog.MutableBlock(0)->Var(v);
var->SetType(proto::VarType::LOD_TENSOR);
if (v == "weights" || v == "conv_bias" || v == "eltwise_bias") {
var->SetPersistable(true);
}
}
// conv+bias, both with MKL-DNN
if (convWithExistingBias) {
SetOp(&prog,
"conv2d",
"conv",
std::vector<std::string>({"c", "weights", "conv_bias"}),
std::vector<std::string>({"f"}));
} else {
SetOp(&prog,
"conv2d",
"conv",
std::vector<std::string>({"c", "weights"}),
std::vector<std::string>({"f"}));
}
SetOp(&prog,
"elementwise_add",
"eltwise",
std::vector<std::string>({"f", "eltwise_bias"}),
std::vector<std::string>({"g"}));
return prog;
}
void InitTensorHolder(Scope* scope,
const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<phi::DenseTensor>();
tensor->mutable_data(
place, framework::TransToPhiDataType(proto::VarType::FP32), 1);
}
void MainTest(bool convWithExistingBias) {
auto prog = BuildProgramDesc(convWithExistingBias);
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto place = phi::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
// Init scope, as it is used in pass
exe.CreateVariables(prog, 0, true, &scope);
if (convWithExistingBias) {
InitTensorHolder(&scope, place, "conv_bias");
InitTensorHolder(&scope, place, "eltwise_bias");
}
graph->SetNotOwned(kParamScopeAttr, &scope);
auto pass = PassRegistry::Instance().Get("conv_bias_mkldnn_fuse_pass");
int original_nodes_num = graph->Nodes().size();
graph.reset(pass->Apply(graph.release()));
int current_nodes_num = graph->Nodes().size();
// Remove 3 Nodes: Conv, Bias, conv_out
// Add 1 Node: ConvBias
EXPECT_EQ(original_nodes_num - 2, current_nodes_num);
// Assert conv_bias op in newly generated graph
int conv_bias_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp() && (node->Op()->Type() == "conv2d" ||
node->Op()->Type() == "fused_conv2d")) {
auto* op = node->Op();
ASSERT_TRUE(op->HasAttr("use_mkldnn"));
EXPECT_TRUE(PADDLE_GET_CONST(bool, op->GetAttr("use_mkldnn")));
// check if "conv" convolution is fused
auto op_name = PADDLE_GET_CONST(std::string, op->GetAttr("name"));
if (op_name == "conv") {
auto input_names = op->InputNames();
ASSERT_TRUE(std::find(input_names.begin(), input_names.end(), "Bias") !=
input_names.end());
auto bias = op->Input("Bias");
if (bias.size()) {
++conv_bias_count;
}
}
}
}
EXPECT_EQ(conv_bias_count, 1);
}
TEST(ConvBiasFusePass, bias_free_conv) { MainTest(false); }
TEST(ConvBiasFusePass, conv_with_existing_bias) { MainTest(true); }
TEST(ConvBiasFusePass, conv3d) {
Conv3DBiasFusePass pass;
ASSERT_EQ(pass.type(), std::string("conv3d"));
}
TEST(ConvBiasFusePass, conv2d_transpose) {
Conv2DTransposeBiasFusePass pass;
ASSERT_EQ(pass.type(), std::string("conv2d_transpose"));
}
TEST(ConvBiasFusePass, pass_op_version_check) {
ASSERT_TRUE(
paddle::framework::compatible::PassVersionCheckerRegistrar::GetInstance()
.IsPassCompatible("conv_bias_mkldnn_fuse_pass"));
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(conv_bias_mkldnn_fuse_pass);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_proto_maker.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog,
const std::string& type,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
bool use_mkldnn = true) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
if (type == "conv2d") {
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("fuse_activation", std::string(""));
op->SetInput("Input", {inputs[0]});
op->SetInput("Filter", {inputs[1]});
if (inputs.size() > 2) {
op->SetInput("Bias", {inputs[2]});
}
op->SetOutput("Output", outputs);
} else if (type == "relu") {
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
} else if (type == "pool2d") {
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
} else if (type == "concat") {
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("axis", 0);
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
}
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
static_cast<int>(OpRole::kForward));
}
// (a1,w1)->conv1->c1
// (a2,w2,b2)->conv2->c2
// if put_only_convs_before_concat=true
// (a3,w3)->conv3->c3
// else
// a3->pool1->c3
//
// (c1,c2,c3)->concat1->d
// d->relu1->e
ProgramDesc BuildProgramDesc(bool put_only_convs_before_concat,
bool all_convs_use_mkldnn) {
ProgramDesc prog;
for (auto& v : std::initializer_list<std::string>({"a1",
"w1",
"c1",
"a2",
"w2",
"b2",
"c2",
"a3",
"w3",
"c3",
"d",
"e"})) {
auto* var = prog.MutableBlock(0)->Var(v);
var->SetType(proto::VarType::SELECTED_ROWS);
if (v.find("w") == 0 || v.find("b") == 0) {
var->SetPersistable(true);
}
}
SetOp(&prog, "conv2d", {"a1", "w1", "b1"}, {"c1"}, all_convs_use_mkldnn);
SetOp(&prog, "conv2d", {"a2", "w2", "b2"}, {"c2"});
if (put_only_convs_before_concat) {
SetOp(&prog, "conv2d", {"a3", "w3", "b3"}, {"c3"});
} else {
SetOp(&prog, "pool2d", {"a3"}, {"c3"});
}
SetOp(&prog, "concat", {"c1", "c2", "c3"}, {"d"});
SetOp(&prog, "relu", {"d"}, {"e"});
return prog;
}
void MainTest(const ProgramDesc& prog, bool fuse_relu) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num = graph->Nodes().size();
auto pass = PassRegistry::Instance().Get("conv_activation_mkldnn_fuse_pass");
graph.reset(pass->Apply(graph.release()));
int current_nodes_num = graph->Nodes().size();
if (fuse_relu) {
// Remove 2 nodes: concat_out, relu
EXPECT_EQ(original_nodes_num - 2, current_nodes_num);
} else {
EXPECT_EQ(original_nodes_num, current_nodes_num);
}
int relu_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "conv2d") {
ASSERT_TRUE(op->HasAttr("fuse_activation"));
bool fuse_relu_attr =
(PADDLE_GET_CONST(std::string, op->GetAttr("fuse_activation")) ==
"relu");
EXPECT_EQ(fuse_relu, fuse_relu_attr);
} else if (op->Type() == "relu") {
relu_count++;
}
}
}
EXPECT_EQ(relu_count, fuse_relu ? 0 : 1);
}
TEST(ConvConcatReLUFusePass, only_convs_before_concat) {
bool all_convs_use_mkldnn = true;
bool put_only_convs_before_concat = true;
auto prog =
BuildProgramDesc(put_only_convs_before_concat, all_convs_use_mkldnn);
bool expect_relu_fuse = true;
MainTest(prog, expect_relu_fuse);
}
TEST(ConvConcatReLUFusePass, only_convs_before_concat_but_one_non_mkldnn) {
bool all_convs_use_mkldnn = false;
bool put_only_convs_before_concat = true;
auto prog =
BuildProgramDesc(put_only_convs_before_concat, all_convs_use_mkldnn);
bool expect_relu_fuse = false;
MainTest(prog, expect_relu_fuse);
}
TEST(ConvConcatReLUFusePass, convs_and_pool_before_concat) {
bool all_convs_use_mkldnn = true;
bool put_only_convs_before_concat = false;
auto prog =
BuildProgramDesc(put_only_convs_before_concat, all_convs_use_mkldnn);
bool expect_relu_fuse = false;
MainTest(prog, expect_relu_fuse);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(conv_activation_mkldnn_fuse_pass);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/ir/pass_test_util.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle {
namespace framework {
namespace ir {
constexpr int nodes_removed = 3;
constexpr int nodes_added = 1;
OpDesc* Create_Op_con2d(ProgramDesc* prog,
const std::string& op_type_name,
const std::vector<test::InOutVarNamePair>& inputs,
const std::vector<test::InOutVarNamePair>& outputs,
const bool use_mkldnn = true) {
auto* op = prog->MutableBlock(0)->AppendOp();
const std::vector<int> strides({1, 1});
const std::vector<int> paddings({0, 0});
const std::vector<int> dilations({1, 1});
op->SetType(op_type_name);
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("strides", strides);
op->SetAttr("groups", 1);
op->SetAttr("paddings", paddings);
op->SetAttr("padding_algorithm", std::string("EXPLICIT"));
op->SetAttr("dilations", dilations);
op->SetAttr("data_format", std::string("NCHW"));
for (const auto& input : inputs) {
op->SetInput(input.first, {input.second});
}
for (const auto& output : outputs) {
op->SetOutput(output.first, {output.second});
}
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
static_cast<int>(OpRole::kForward));
return op;
}
OpDesc* Create_Op_elemntwise_add(
ProgramDesc* prog,
const std::string& op_type_name,
const std::vector<test::InOutVarNamePair>& inputs,
const std::vector<test::InOutVarNamePair>& outputs,
bool use_mkldnn = true) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(op_type_name);
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("axis", -1);
for (const auto& input : inputs) {
op->SetInput(input.first, {input.second});
}
for (const auto& output : outputs) {
op->SetOutput(output.first, {output.second});
}
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
static_cast<int>(OpRole::kForward));
return op;
}
TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsYWithElementwiseAddRelu) {
auto prog =
test::BuildProgramDesc({"a", "b", "c", "d", "e"}, {"bias", "weights"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
Create_Op_con2d(&prog,
"conv2d",
{{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
{{"Output", "c"}});
Create_Op_elemntwise_add(
&prog, "elementwise_add", {{"X", "a"}, {"Y", "c"}}, {{"Out", "d"}});
test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"conv_elementwise_add_mkldnn_fuse_pass",
"a",
"relu",
nodes_removed,
nodes_added));
EXPECT_TRUE(test::AssertOpsCount(
graph, {{"fused_conv2d", 1}, {"elementwise_add", 0}}));
}
TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionProjectionAsYWithElementwiseAddRelu) {
auto prog = test::BuildProgramDesc({"a", "b", "c", "d", "e", "f"},
{"bias", "weights", "bias2", "weights2"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
// right branch
Create_Op_con2d(&prog,
"conv2d",
{{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
{{"Output", "c"}});
// left branch
Create_Op_con2d(&prog,
"conv2d",
{{"Input", "a"}, {"Bias", "bias2"}, {"Filter", "weights2"}},
{{"Output", "f"}});
Create_Op_elemntwise_add(
&prog, "elementwise_add", {{"X", "f"}, {"Y", "c"}}, {{"Out", "d"}});
test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"conv_elementwise_add_mkldnn_fuse_pass",
"a",
"relu",
nodes_removed,
nodes_added));
EXPECT_TRUE(test::AssertOpsCount(
graph, {{"conv2d", 1}, {"fused_conv2d", 1}, {"elementwise_add", 0}}));
}
TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionAsYWithElementwiseAddReluNoBias) {
auto prog = test::BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
Create_Op_con2d(&prog,
"conv2d",
{{"Input", "b"}, {"Filter", "weights"}},
{{"Output", "c"}});
Create_Op_elemntwise_add(
&prog, "elementwise_add", {{"X", "a"}, {"Y", "c"}}, {{"Out", "d"}});
test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"conv_elementwise_add_mkldnn_fuse_pass",
"a",
"relu",
nodes_removed,
nodes_added));
EXPECT_TRUE(test::AssertOpsCount(
graph, {{"fused_conv2d", 1}, {"elementwise_add", 0}}));
}
TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsXWithElementwiseAddRelu) {
auto prog =
test::BuildProgramDesc({"a", "b", "c", "d", "e"}, {"bias", "weights"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
Create_Op_con2d(&prog,
"conv2d",
{{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
{{"Output", "c"}});
Create_Op_elemntwise_add(
&prog, "elementwise_add", {{"X", "c"}, {"Y", "a"}}, {{"Out", "d"}});
test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"conv_elementwise_add_mkldnn_fuse_pass",
"a",
"relu",
nodes_removed,
nodes_added));
EXPECT_TRUE(test::AssertOpsCount(
graph, {{"fused_conv2d", 1}, {"elementwise_add", 0}}));
}
TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionAsXWithElementwiseAddReluNoBias) {
auto prog = test::BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
Create_Op_con2d(&prog,
"conv2d",
{{"Input", "b"}, {"Filter", "weights"}},
{{"Output", "c"}});
Create_Op_elemntwise_add(
&prog, "elementwise_add", {{"X", "c"}, {"Y", "a"}}, {{"Out", "d"}});
test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"conv_elementwise_add_mkldnn_fuse_pass",
"a",
"relu",
nodes_removed,
nodes_added));
EXPECT_TRUE(test::AssertOpsCount(
graph, {{"fused_conv2d", 1}, {"elementwise_add", 0}}));
}
TEST(ConvElementwiseAddMKLDNNFusePass, NoFusion) {
auto prog =
test::BuildProgramDesc({"a", "b", "c", "d", "e", "f", "g"}, {"weights"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
Create_Op_con2d(&prog,
"conv2d",
{{"Input", "b"}, {"Filter", "weights"}},
{{"Output", "c"}});
Create_Op_con2d(&prog,
"conv2d",
{{"Input", "d"}, {"Filter", "weights"}},
{{"Output", "e"}});
Create_Op_elemntwise_add(
&prog, "elementwise_add", {{"X", "c"}, {"Y", "e"}}, {{"Out", "f"}});
test::CreateOp(&prog, "relu", {{"X", "f"}}, {{"Out", "g"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(
&graph, "conv_elementwise_add_mkldnn_fuse_pass", "a", "g", 0, 0));
EXPECT_TRUE(
test::AssertOpsCount(graph, {{"conv2d", 2}, {"elementwise_add", 1}}));
}
TEST(ConvElementwiseAddMKLDNNFusePass, pass_op_version_check) {
ASSERT_TRUE(
paddle::framework::compatible::PassVersionCheckerRegistrar::GetInstance()
.IsPassCompatible("conv_elementwise_add_mkldnn_fuse_pass"));
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(conv_elementwise_add_mkldnn_fuse_pass);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -26,7 +26,7 @@ using string::PrettyLogDetail; ...@@ -26,7 +26,7 @@ using string::PrettyLogDetail;
void MatmulActivationMkldnnFusePass::ApplyImpl(Graph* graph) const { void MatmulActivationMkldnnFusePass::ApplyImpl(Graph* graph) const {
auto act_types = GetSupportedActivations(); auto act_types = GetSupportedActivations();
auto matmul_types = {"matmul", "matmul_v2"}; auto matmul_types = {"fused_matmul", "matmul", "matmul_v2"};
for (const auto& matmul_type : matmul_types) for (const auto& matmul_type : matmul_types)
for (auto& act_type : act_types) { for (auto& act_type : act_types) {
...@@ -61,8 +61,17 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct( ...@@ -61,8 +61,17 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct(
GET_IR_NODE_FROM_SUBGRAPH( GET_IR_NODE_FROM_SUBGRAPH(
activation_out, activation_out, matmul_act_pattern); activation_out, activation_out, matmul_act_pattern);
SetActivationAttrs(matmul->Op(), activation->Op(), act_type); OpDesc* matmul_op = matmul->Op();
matmul->Op()->SetOutput("Out", {activation_out->Name()});
matmul_op->SetType("fused_matmul");
if (matmul_type == "matmul") {
matmul_op->SetAttr("trans_x", matmul_op->GetAttr("transpose_X"));
matmul_op->SetAttr("trans_y", matmul_op->GetAttr("transpose_Y"));
matmul_op->SetAttr("matmul_alpha", matmul_op->GetAttr("alpha"));
}
SetActivationAttrs(matmul_op, activation->Op(), act_type);
matmul_op->SetOutput("Out", {activation_out->Name()});
IR_NODE_LINK_TO(matmul, activation_out); IR_NODE_LINK_TO(matmul, activation_out);
GraphSafeRemoveNodes(graph, {activation, matmul_out}); GraphSafeRemoveNodes(graph, {activation, matmul_out});
...@@ -88,11 +97,6 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() { ...@@ -88,11 +97,6 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() {
.AddInput("Y") .AddInput("Y")
.IsTensor() .IsTensor()
.End() .End()
.AddInput(
"ResidualData") // Extra tensor used in matmul+elementwise_add fuse
.IsTensor()
.IsOptional()
.End()
.AddOutput("Out") .AddOutput("Out")
.IsTensor() .IsTensor()
.End() .End()
...@@ -113,8 +117,24 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() { ...@@ -113,8 +117,24 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() {
.AddInput("Y") .AddInput("Y")
.IsTensor() .IsTensor()
.End() .End()
.AddInput( .AddOutput("Out")
"ResidualData") // Extra tensor used in matmul+elementwise_add fuse .IsTensor()
.End()
.AddAttr("trans_x")
.IsType<bool>()
.End()
.AddAttr("trans_y")
.IsType<bool>()
.End();
AddOpCompat(OpCompat("fused_matmul"))
.AddInput("X")
.IsTensor()
.End()
.AddInput("Y")
.IsTensor()
.End()
.AddInput("ResidualData")
.IsTensor() .IsTensor()
.IsOptional() .IsOptional()
.End() .End()
...@@ -126,6 +146,50 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() { ...@@ -126,6 +146,50 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() {
.End() .End()
.AddAttr("trans_y") .AddAttr("trans_y")
.IsType<bool>() .IsType<bool>()
.End()
.AddAttr("matmul_alpha")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fuse_activation")
.IsType<std::string>()
.IsOptional()
.End()
.AddAttr("fuse_alpha")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fuse_beta")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fused_output_scale")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fused_reshape_X")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_X")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_reshape_Y")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_Y")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_reshape_Out")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_Out")
.IsType<std::vector<int>>()
.IsOptional()
.End(); .End();
AddOpCompat(OpCompat("abs")) AddOpCompat(OpCompat("abs"))
...@@ -279,6 +343,7 @@ REGISTER_PASS(matmul_activation_mkldnn_fuse_pass, ...@@ -279,6 +343,7 @@ REGISTER_PASS(matmul_activation_mkldnn_fuse_pass,
REGISTER_PASS_CAPABILITY(matmul_activation_mkldnn_fuse_pass) REGISTER_PASS_CAPABILITY(matmul_activation_mkldnn_fuse_pass)
.AddCombination( .AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination() paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("fused_matmul", 0)
.LE("matmul", 1) .LE("matmul", 1)
.EQ("matmul_v2", 0) .EQ("matmul_v2", 0)
.EQ("abs", 0) .EQ("abs", 0)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -25,7 +25,7 @@ namespace ir { ...@@ -25,7 +25,7 @@ namespace ir {
using string::PrettyLogDetail; using string::PrettyLogDetail;
void MatmulElementwiseAddMKLDNNFusePass::ApplyImpl(Graph* graph) const { void MatmulElementwiseAddMKLDNNFusePass::ApplyImpl(Graph* graph) const {
auto matmul_types = {"matmul", "matmul_v2"}; auto matmul_types = {"fused_matmul", "matmul", "matmul_v2"};
auto matmul_as_x = {true, false}; auto matmul_as_x = {true, false};
for (const auto& matmul_type : matmul_types) for (const auto& matmul_type : matmul_types)
...@@ -65,6 +65,12 @@ void MatmulElementwiseAddMKLDNNFusePass::FuseMatmulElementwiseAdd( ...@@ -65,6 +65,12 @@ void MatmulElementwiseAddMKLDNNFusePass::FuseMatmulElementwiseAdd(
return; return;
} }
matmul->Op()->SetType("fused_matmul");
if (matmul_type == "matmul") {
matmul->Op()->SetAttr("trans_x", matmul->Op()->GetAttr("transpose_X"));
matmul->Op()->SetAttr("trans_y", matmul->Op()->GetAttr("transpose_Y"));
matmul->Op()->SetAttr("matmul_alpha", matmul->Op()->GetAttr("alpha"));
}
matmul->Op()->SetInput("ResidualData", {elementwise_addend->Name()}); matmul->Op()->SetInput("ResidualData", {elementwise_addend->Name()});
matmul->Op()->SetOutput("Out", {elementwise_add_out->Name()}); matmul->Op()->SetOutput("Out", {elementwise_add_out->Name()});
...@@ -125,6 +131,71 @@ MatmulElementwiseAddMKLDNNFusePass::MatmulElementwiseAddMKLDNNFusePass() { ...@@ -125,6 +131,71 @@ MatmulElementwiseAddMKLDNNFusePass::MatmulElementwiseAddMKLDNNFusePass() {
.IsType<bool>() .IsType<bool>()
.End(); .End();
AddOpCompat(OpCompat("fused_matmul"))
.AddInput("X")
.IsTensor()
.End()
.AddInput("Y")
.IsTensor()
.End()
.AddInput("ResidualData")
.IsTensor()
.IsOptional()
.End()
.AddOutput("Out")
.IsTensor()
.End()
.AddAttr("trans_x")
.IsType<bool>()
.End()
.AddAttr("trans_y")
.IsType<bool>()
.End()
.AddAttr("matmul_alpha")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fuse_activation")
.IsType<std::string>()
.IsOptional()
.End()
.AddAttr("fuse_alpha")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fuse_beta")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fused_output_scale")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fused_reshape_X")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_X")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_reshape_Y")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_Y")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_reshape_Out")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_Out")
.IsType<std::vector<int>>()
.IsOptional()
.End();
AddOpCompat(OpCompat("elementwise_add")) AddOpCompat(OpCompat("elementwise_add"))
.AddInput("X") .AddInput("X")
.IsTensor() .IsTensor()
...@@ -149,6 +220,7 @@ REGISTER_PASS(matmul_elementwise_add_mkldnn_fuse_pass, ...@@ -149,6 +220,7 @@ REGISTER_PASS(matmul_elementwise_add_mkldnn_fuse_pass,
REGISTER_PASS_CAPABILITY(matmul_elementwise_add_mkldnn_fuse_pass) REGISTER_PASS_CAPABILITY(matmul_elementwise_add_mkldnn_fuse_pass)
.AddCombination( .AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination() paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("fused_matmul", 0)
.LE("matmul", 1) .LE("matmul", 1)
.EQ("matmul_v2", 0) .EQ("matmul_v2", 0)
.LE("elementwise_add", 1)); .LE("elementwise_add", 1));
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -24,7 +24,7 @@ namespace ir { ...@@ -24,7 +24,7 @@ namespace ir {
using string::PrettyLogDetail; using string::PrettyLogDetail;
void MatmulTransposeReshapeMKLDNNPass::ApplyImpl(Graph *graph) const { void MatmulTransposeReshapeMKLDNNPass::ApplyImpl(Graph *graph) const {
auto matmul_types = {"matmul", "matmul_v2"}; auto matmul_types = {"fused_matmul", "matmul", "matmul_v2"};
for (const auto &matmul_type : matmul_types) { for (const auto &matmul_type : matmul_types) {
Fuse(graph, matmul_type); Fuse(graph, matmul_type);
...@@ -84,6 +84,12 @@ void MatmulTransposeReshapeMKLDNNPass::Fuse( ...@@ -84,6 +84,12 @@ void MatmulTransposeReshapeMKLDNNPass::Fuse(
} }
OpDesc *matmul_desc = matmul_op->Op(); OpDesc *matmul_desc = matmul_op->Op();
matmul_desc->SetType("fused_matmul");
if (matmul_type == "matmul") {
matmul_desc->SetAttr("trans_x", matmul_desc->GetAttr("transpose_X"));
matmul_desc->SetAttr("trans_y", matmul_desc->GetAttr("transpose_Y"));
matmul_desc->SetAttr("matmul_alpha", matmul_desc->GetAttr("alpha"));
}
matmul_desc->SetOutput("Out", {reshape_out->Name()}); matmul_desc->SetOutput("Out", {reshape_out->Name()});
matmul_desc->SetAttr("fused_reshape_Out", reshape_shape); matmul_desc->SetAttr("fused_reshape_Out", reshape_shape);
matmul_desc->SetAttr("fused_transpose_Out", transpose_axis); matmul_desc->SetAttr("fused_transpose_Out", transpose_axis);
...@@ -149,6 +155,71 @@ MatmulTransposeReshapeMKLDNNPass::MatmulTransposeReshapeMKLDNNPass() { ...@@ -149,6 +155,71 @@ MatmulTransposeReshapeMKLDNNPass::MatmulTransposeReshapeMKLDNNPass() {
.IsType<bool>() .IsType<bool>()
.End(); .End();
AddOpCompat(OpCompat("fused_matmul"))
.AddInput("X")
.IsTensor()
.End()
.AddInput("Y")
.IsTensor()
.End()
.AddInput("ResidualData")
.IsTensor()
.IsOptional()
.End()
.AddOutput("Out")
.IsTensor()
.End()
.AddAttr("trans_x")
.IsType<bool>()
.End()
.AddAttr("trans_y")
.IsType<bool>()
.End()
.AddAttr("matmul_alpha")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fuse_activation")
.IsType<std::string>()
.IsOptional()
.End()
.AddAttr("fuse_alpha")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fuse_beta")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fused_output_scale")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fused_reshape_X")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_X")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_reshape_Y")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_Y")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_reshape_Out")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_Out")
.IsType<std::vector<int>>()
.IsOptional()
.End();
AddOpCompat(OpCompat("transpose2")) AddOpCompat(OpCompat("transpose2"))
.AddInput("X") .AddInput("X")
.IsTensor() .IsTensor()
...@@ -189,6 +260,7 @@ REGISTER_PASS(matmul_transpose_reshape_mkldnn_fuse_pass, ...@@ -189,6 +260,7 @@ REGISTER_PASS(matmul_transpose_reshape_mkldnn_fuse_pass,
REGISTER_PASS_CAPABILITY(matmul_transpose_reshape_mkldnn_fuse_pass) REGISTER_PASS_CAPABILITY(matmul_transpose_reshape_mkldnn_fuse_pass)
.AddCombination( .AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination() paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("fused_matmul", 0)
.LE("matmul", 1) .LE("matmul", 1)
.EQ("matmul_v2", 0) .EQ("matmul_v2", 0)
.EQ("transpose2", 0) .EQ("transpose2", 0)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -27,6 +27,7 @@ using string::PrettyLogDetail; ...@@ -27,6 +27,7 @@ using string::PrettyLogDetail;
void FuseOperatorScaleOneDNNPass::ApplyImpl(Graph *graph) const { void FuseOperatorScaleOneDNNPass::ApplyImpl(Graph *graph) const {
const std::vector<std::string> fusable_ops{ const std::vector<std::string> fusable_ops{
"fc", "fc",
"fused_matmul",
"matmul", "matmul",
"matmul_v2", "matmul_v2",
"elementwise_add", "elementwise_add",
...@@ -85,6 +86,19 @@ void FuseOperatorScaleOneDNNPass::FuseScale(Graph *graph, ...@@ -85,6 +86,19 @@ void FuseOperatorScaleOneDNNPass::FuseScale(Graph *graph,
scale = *(scale_tensor->data<float>()); scale = *(scale_tensor->data<float>());
} }
if (op_type == "matmul") {
operator_op->Op()->SetType("fused_matmul");
operator_op->Op()->SetAttr("trans_x",
operator_op->Op()->GetAttr("transpose_X"));
operator_op->Op()->SetAttr("trans_y",
operator_op->Op()->GetAttr("transpose_Y"));
operator_op->Op()->SetAttr("matmul_alpha",
operator_op->Op()->GetAttr("alpha"));
}
if (op_type == "matmul_v2") {
operator_op->Op()->SetType("fused_matmul");
}
operator_op->Op()->SetAttr("fused_output_scale", scale); operator_op->Op()->SetAttr("fused_output_scale", scale);
operator_op->Op()->SetOutput("Out", {scale_out->Name()}); operator_op->Op()->SetOutput("Out", {scale_out->Name()});
...@@ -111,6 +125,7 @@ REGISTER_PASS_CAPABILITY(operator_scale_onednn_fuse_pass) ...@@ -111,6 +125,7 @@ REGISTER_PASS_CAPABILITY(operator_scale_onednn_fuse_pass)
.AddCombination( .AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination() paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("fc", 0) .EQ("fc", 0)
.EQ("fused_matmul", 0)
.LE("matmul", 1) .LE("matmul", 1)
.EQ("matmul_v2", 0) .EQ("matmul_v2", 0)
.LE("elementwise_add", 1) .LE("elementwise_add", 1)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -22,7 +22,7 @@ namespace framework { ...@@ -22,7 +22,7 @@ namespace framework {
namespace ir { namespace ir {
void ReshapeTransposeMatmulMkldnnFusePass::ApplyImpl(Graph *graph) const { void ReshapeTransposeMatmulMkldnnFusePass::ApplyImpl(Graph *graph) const {
auto matmul_types = {"matmul", "matmul_v2"}; auto matmul_types = {"matmul", "matmul_v2", "fused_matmul"};
for (const auto &matmul_type : matmul_types) { for (const auto &matmul_type : matmul_types) {
Fuse(graph, Fuse(graph,
...@@ -102,6 +102,25 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse( ...@@ -102,6 +102,25 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse(
matmul_type + " encountered."); matmul_type + " encountered.");
} }
// Return if input of fused_matmul is already fused
if (matmul_type == "fused_matmul") {
auto is_already_fused_X =
matmul_desc->HasAttr("fused_reshape_X")
? !(PADDLE_GET_CONST(std::vector<int>,
matmul_desc->GetAttr("fused_reshape_X"))
.empty())
: false;
if (is_already_fused_X && matmul_input_name == "X") return;
auto is_already_fused_Y =
matmul_desc->HasAttr("fused_reshape_Y")
? !(PADDLE_GET_CONST(std::vector<int>,
matmul_desc->GetAttr("fused_reshape_Y"))
.empty())
: false;
if (is_already_fused_Y && matmul_input_name == "Y") return;
}
auto reshape_shape = auto reshape_shape =
paddle::get<std::vector<int>>(reshape_op->Op()->GetAttr("shape")); paddle::get<std::vector<int>>(reshape_op->Op()->GetAttr("shape"));
auto transpose_axis = auto transpose_axis =
...@@ -123,6 +142,12 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse( ...@@ -123,6 +142,12 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse(
return; return;
} }
matmul_desc->SetType("fused_matmul");
if (matmul_type == "matmul") {
matmul_desc->SetAttr("trans_x", matmul_desc->GetAttr("transpose_X"));
matmul_desc->SetAttr("trans_y", matmul_desc->GetAttr("transpose_Y"));
matmul_desc->SetAttr("matmul_alpha", matmul_desc->GetAttr("alpha"));
}
matmul_desc->SetInput(matmul_input_name, {(reshape_in)->Name()}); matmul_desc->SetInput(matmul_input_name, {(reshape_in)->Name()});
matmul_desc->SetAttr("fused_reshape_" + matmul_input_name, reshape_shape); matmul_desc->SetAttr("fused_reshape_" + matmul_input_name, reshape_shape);
matmul_desc->SetAttr("fused_transpose_" + matmul_input_name, matmul_desc->SetAttr("fused_transpose_" + matmul_input_name,
...@@ -220,6 +245,71 @@ ReshapeTransposeMatmulMkldnnFusePass::ReshapeTransposeMatmulMkldnnFusePass() { ...@@ -220,6 +245,71 @@ ReshapeTransposeMatmulMkldnnFusePass::ReshapeTransposeMatmulMkldnnFusePass() {
.AddAttr("trans_y") .AddAttr("trans_y")
.IsType<bool>() .IsType<bool>()
.End(); .End();
AddOpCompat(OpCompat("fused_matmul"))
.AddInput("X")
.IsTensor()
.End()
.AddInput("Y")
.IsTensor()
.End()
.AddInput("ResidualData")
.IsTensor()
.IsOptional()
.End()
.AddOutput("Out")
.IsTensor()
.End()
.AddAttr("trans_x")
.IsType<bool>()
.End()
.AddAttr("trans_y")
.IsType<bool>()
.End()
.AddAttr("matmul_alpha")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fuse_activation")
.IsType<std::string>()
.IsOptional()
.End()
.AddAttr("fuse_alpha")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fuse_beta")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fused_output_scale")
.IsType<float>()
.IsOptional()
.End()
.AddAttr("fused_reshape_X")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_X")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_reshape_Y")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_Y")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_reshape_Out")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("fused_transpose_Out")
.IsType<std::vector<int>>()
.IsOptional()
.End();
} }
} // namespace ir } // namespace ir
...@@ -234,5 +324,6 @@ REGISTER_PASS_CAPABILITY(reshape_transpose_matmul_mkldnn_fuse_pass) ...@@ -234,5 +324,6 @@ REGISTER_PASS_CAPABILITY(reshape_transpose_matmul_mkldnn_fuse_pass)
paddle::framework::compatible::OpVersionComparatorCombination() paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("reshape2", 0) .EQ("reshape2", 0)
.EQ("transpose2", 0) .EQ("transpose2", 0)
.EQ("fused_matmul", 0)
.EQ("matmul", 1) .EQ("matmul", 1)
.EQ("matmul_v2", 0)); .EQ("matmul_v2", 0));
...@@ -129,6 +129,24 @@ void PrelnResidualBias::operator()(PDNode *x, PDNode *y) { ...@@ -129,6 +129,24 @@ void PrelnResidualBias::operator()(PDNode *x, PDNode *y) {
} // namespace patterns } // namespace patterns
void setIntermediateOut(OpDesc *desc,
const std::string &out_name,
const std::string &scope_name) {
std::string new_name = scope_name + "/at." + out_name + ".new";
desc->SetOutput(out_name, {new_name});
}
void addIntermediateOut(Node *op_node,
const std::string &out_name,
const std::string &scope_name,
Graph *graph) {
std::string new_name = scope_name + "/at." + out_name + ".new";
VarDesc out_var(new_name);
out_var.SetPersistable(false);
auto *node_var = graph->CreateVarNode(&out_var);
IR_NODE_LINK_TO(op_node, node_var);
}
int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
bool with_bias) const { bool with_bias) const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
...@@ -207,7 +225,7 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, ...@@ -207,7 +225,7 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
// on each other, so we make below check to ensure only one // on each other, so we make below check to ensure only one
// PrelnResidualBias pattern is delalted with. // PrelnResidualBias pattern is delalted with.
for (auto op : elementwise1_out->inputs) { for (auto op : elementwise1_out->inputs) {
if (op->Name() == "preln_residual_bias") return; if (op->Name() == "fused_bias_dropout_residual_layer_norm") return;
} }
if (!IsCompat(subgraph, graph)) { if (!IsCompat(subgraph, graph)) {
...@@ -218,31 +236,37 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, ...@@ -218,31 +236,37 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
std::unordered_set<const Node *> del_node_set; std::unordered_set<const Node *> del_node_set;
// Create an PrelnResidualBias op node // Create an PrelnResidualBias op node
OpDesc new_desc; OpDesc new_desc;
new_desc.SetType("preln_residual_bias"); new_desc.SetType("fused_bias_dropout_residual_layer_norm");
// inputs // inputs
new_desc.SetInput("X", {subgraph.at(x)->Name()}); new_desc.SetInput("X", {subgraph.at(x)->Name()});
new_desc.SetInput("Y", {subgraph.at(y)->Name()}); new_desc.SetInput("Residual", {subgraph.at(y)->Name()});
new_desc.SetInput("Scale", {layer_norm_scale->Name()}); new_desc.SetInput("LnScale", {layer_norm_scale->Name()});
new_desc.SetInput("Bias", {layer_norm_bias->Name()}); new_desc.SetInput("LnBias", {layer_norm_bias->Name()});
if (with_bias) { if (with_bias) {
new_desc.SetInput("EleBias", {elementwise_bias->Name()}); new_desc.SetInput("Bias", {elementwise_bias->Name()});
} }
// outputs // outputs
new_desc.SetOutput("Out_0", {layer_norm_out->Name()}); new_desc.SetOutput("Y", {layer_norm_out->Name()});
new_desc.SetOutput("Out_1", {elementwise1_out->Name()}); new_desc.SetOutput("BiasDropoutResidualOut", {elementwise1_out->Name()});
new_desc.SetOutput("LnMean", {layer_norm_mean->Name()});
new_desc.SetOutput("LnVariance", {layer_norm_variance->Name()});
setIntermediateOut(&new_desc, "DropoutMaskOut", "preln_residual_bias_fuse");
// attrs // attrs
new_desc.SetAttr("epsilon", layer_norm->Op()->GetAttr("epsilon")); new_desc.SetAttr("ln_epsilon", layer_norm->Op()->GetAttr("epsilon"));
new_desc.SetAttr("dropout_rate", 0.0f);
new_desc.SetAttr("is_test", true);
new_desc.SetAttr("begin_norm_axis", new_desc.SetAttr("begin_norm_axis",
layer_norm->Op()->GetAttr("begin_norm_axis")); layer_norm->Op()->GetAttr("begin_norm_axis"));
auto fused_node = graph->CreateOpNode(&new_desc); // OpDesc will be copied. auto fused_node = graph->CreateOpNode(&new_desc); // OpDesc will be copied.
addIntermediateOut(
fused_node, "DropoutMaskOut", "preln_residual_bias_fuse", graph);
if (with_bias) { if (with_bias) {
del_node_set.insert(elementwise0); del_node_set.insert(elementwise0);
del_node_set.insert(elementwise0_out); del_node_set.insert(elementwise0_out);
} }
del_node_set.insert(elementwise1); del_node_set.insert(elementwise1);
del_node_set.insert(layer_norm); del_node_set.insert(layer_norm);
del_node_set.insert(layer_norm_mean);
del_node_set.insert(layer_norm_variance);
GraphSafeRemoveNodes(graph, del_node_set); GraphSafeRemoveNodes(graph, del_node_set);
IR_NODE_LINK_TO(subgraph.at(x), fused_node); IR_NODE_LINK_TO(subgraph.at(x), fused_node);
IR_NODE_LINK_TO(subgraph.at(y), fused_node); IR_NODE_LINK_TO(subgraph.at(y), fused_node);
...@@ -253,6 +277,9 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, ...@@ -253,6 +277,9 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
IR_NODE_LINK_TO(layer_norm_bias, fused_node); IR_NODE_LINK_TO(layer_norm_bias, fused_node);
IR_NODE_LINK_TO(fused_node, layer_norm_out); IR_NODE_LINK_TO(fused_node, layer_norm_out);
IR_NODE_LINK_TO(fused_node, elementwise1_out); IR_NODE_LINK_TO(fused_node, elementwise1_out);
IR_NODE_LINK_TO(fused_node, layer_norm_mean);
IR_NODE_LINK_TO(fused_node, layer_norm_variance);
found_subgraph_count++; found_subgraph_count++;
}; };
...@@ -261,6 +288,8 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, ...@@ -261,6 +288,8 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
} }
void PrelnResidualBiasFusePass::ApplyImpl(ir::Graph *graph) const { void PrelnResidualBiasFusePass::ApplyImpl(ir::Graph *graph) const {
VLOG(1) << "Fuse PrelnResidualBias into "
"fused_bias_dropout_residual_layer_norm op with dropout rate = 0";
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
graph, platform::errors::PreconditionNotMet("graph should not be null.")); graph, platform::errors::PreconditionNotMet("graph should not be null."));
FusePassBase::Init("preln_residual_bias_fuse", graph); FusePassBase::Init("preln_residual_bias_fuse", graph);
......
...@@ -170,7 +170,7 @@ void TrtSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { ...@@ -170,7 +170,7 @@ void TrtSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const {
// attrs // attrs
new_desc.SetAttr("epsilon", layer_norm->Op()->GetAttr("epsilon")); new_desc.SetAttr("epsilon", layer_norm->Op()->GetAttr("epsilon"));
if (new_desc.HasAttr("begin_norm_axis")) { if (layer_norm->Op()->HasAttr("begin_norm_axis")) {
int32_t begin_norm_axis = PADDLE_GET_CONST( int32_t begin_norm_axis = PADDLE_GET_CONST(
int32_t, layer_norm->Op()->GetAttr("begin_norm_axis")); int32_t, layer_norm->Op()->GetAttr("begin_norm_axis"));
int32_t input_rank = int32_t input_rank =
......
...@@ -69,9 +69,15 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build( ...@@ -69,9 +69,15 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
instructions_ = &instructions; instructions_ = &instructions;
op_num_ = instructions_->size(); op_num_ = instructions_->size();
ops_before_.assign(op_num_, {});
ops_behind_.assign(op_num_, {});
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
BuildDownstreamMap(); BuildDownstreamMap();
BuildOpHappensBefore(); VLOG(6) << "Finish BuildDownstreamMap";
ShrinkDownstreamMap(); ShrinkDownstreamMap();
VLOG(6) << "Finish ShrinkDownstreamMap";
if (FLAGS_new_executor_sequential_run) { if (FLAGS_new_executor_sequential_run) {
AddDependencyForSequentialRun(); AddDependencyForSequentialRun();
...@@ -81,18 +87,22 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build( ...@@ -81,18 +87,22 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
if (FLAGS_add_dependency_for_communication_op) { if (FLAGS_add_dependency_for_communication_op) {
AddDependencyForCommunicationOp(); AddDependencyForCommunicationOp();
VLOG(6) << "Finish AddDependencyForSequentialRun";
} }
AddDependencyForRandomOp(); AddDependencyForRandomOp();
AddDependencyForReadOp(); VLOG(6) << "Finish AddDependencyForRandomOp";
is_build_ = true; AddDependencyForReadOp();
VLOG(6) << "Finish AddDependencyForReadOp";
VLOG(8) << "Finish build dependency"; VLOG(6) << "Finish build dependency";
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_); VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
VLOG(8) << "downstream_map: " << std::endl VLOG(8) << "downstream_map: " << std::endl
<< StringizeDownstreamMap(op_downstream_map_); << StringizeDownstreamMap(op_downstream_map_);
is_build_ = true;
return op_downstream_map_; return op_downstream_map_;
} }
...@@ -106,15 +116,6 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap() ...@@ -106,15 +116,6 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap()
return op_downstream_map_; return op_downstream_map_;
} }
bool DependencyBuilder::OpHappensBefore(size_t prior_op_idx,
size_t posterior_op_idx) const {
PADDLE_ENFORCE_GE(
op_happens_before_.size(),
0,
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
}
void DependencyBuilder::AddDependencyForCoalesceTensorOp() { void DependencyBuilder::AddDependencyForCoalesceTensorOp() {
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
if (instructions_->at(op_idx).OpBase()->Type() == kCoalesceTensor) { if (instructions_->at(op_idx).OpBase()->Type() == kCoalesceTensor) {
...@@ -287,7 +288,7 @@ void DependencyBuilder::AddDependencyForReadOp() { ...@@ -287,7 +288,7 @@ void DependencyBuilder::AddDependencyForReadOp() {
for (size_t read_op_idx : read_ops) { for (size_t read_op_idx : read_ops) {
for (size_t downstream_op_idx : startup_ops) { for (size_t downstream_op_idx : startup_ops) {
if (read_op_idx != downstream_op_idx && if (read_op_idx != downstream_op_idx &&
!op_happens_before_[downstream_op_idx][read_op_idx]) { !OpHappensBefore(downstream_op_idx, read_op_idx)) {
AddDownstreamOp(read_op_idx, downstream_op_idx); AddDownstreamOp(read_op_idx, downstream_op_idx);
} }
} }
...@@ -308,42 +309,56 @@ void DependencyBuilder::AddDependencyForSequentialRun() { ...@@ -308,42 +309,56 @@ void DependencyBuilder::AddDependencyForSequentialRun() {
void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx, void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
size_t posterior_op_idx) { size_t posterior_op_idx) {
std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx]; PADDLE_ENFORCE_EQ(
OpHappensBefore(posterior_op_idx, prior_op_idx),
false,
phi::errors::Unavailable(
"Can not add dependency %d->%d because %d is run before %d",
prior_op_idx,
posterior_op_idx,
posterior_op_idx,
prior_op_idx));
if (op_happens_before_.size() != 0) { std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
PADDLE_ENFORCE_EQ( // NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
op_happens_before_[posterior_op_idx][prior_op_idx], // ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
false, // a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
phi::errors::Unavailable( // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by
"Can not add dependency %d->%d because %d is run before %d", // ShrinkDownstreamMap.
prior_op_idx, for (size_t op_idx : downstream_ops) {
posterior_op_idx, if (OpHappensBefore(op_idx, posterior_op_idx)) {
posterior_op_idx, VLOG(7) << "Find dependencies " << prior_op_idx << "->" << op_idx << "->"
prior_op_idx)); << posterior_op_idx << ", skip adding " << prior_op_idx << "->"
<< posterior_op_idx;
for (size_t op_idx : downstream_ops) { return;
if (op_happens_before_[op_idx][posterior_op_idx]) {
VLOG(7) << "Find dependencies " << prior_op_idx << "->" << op_idx
<< "->" << posterior_op_idx << ", skip adding " << prior_op_idx
<< "->" << posterior_op_idx;
return;
}
} }
} }
downstream_ops.insert(posterior_op_idx); downstream_ops.insert(posterior_op_idx);
if (op_happens_before_.size() != 0) { std::vector<size_t> prior_of_prior = ops_before_[prior_op_idx];
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { std::vector<size_t> posterior_of_posterior = ops_behind_[posterior_op_idx];
if (op_happens_before_[op_idx][prior_op_idx]) {
op_happens_before_[op_idx][posterior_op_idx] = true;
}
if (op_happens_before_[posterior_op_idx][op_idx]) { auto update_op_happen_before = [this](size_t prior_op_idx,
op_happens_before_[prior_op_idx][op_idx] = true; size_t posterior_op_idx) {
} if (!op_happens_before_[prior_op_idx][posterior_op_idx]) {
op_happens_before_[prior_op_idx][posterior_op_idx] = true;
ops_before_[posterior_op_idx].push_back(prior_op_idx);
ops_behind_[prior_op_idx].push_back(posterior_op_idx);
} }
};
update_op_happen_before(prior_op_idx, posterior_op_idx);
// All ops before prior-op are also before posterior-op
for (size_t op_idx : prior_of_prior) {
update_op_happen_before(op_idx, posterior_op_idx);
}
// All ops after posterior-op are also after prior-op
for (size_t op_idx : posterior_of_posterior) {
update_op_happen_before(prior_op_idx, op_idx);
} }
VLOG(8) << prior_op_idx << "->" << posterior_op_idx; VLOG(8) << prior_op_idx << "->" << posterior_op_idx;
VLOG(8) << "Add dependency from " VLOG(8) << "Add dependency from "
<< instructions_->at(prior_op_idx).OpBase()->Type() << "(" << instructions_->at(prior_op_idx).OpBase()->Type() << "("
...@@ -468,46 +483,6 @@ void DependencyBuilder::BuildDownstreamMap() { ...@@ -468,46 +483,6 @@ void DependencyBuilder::BuildDownstreamMap() {
} }
} }
void DependencyBuilder::BuildOpHappensBefore() {
// happens_before[i][j] means i should be executed before j
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
// bfs to get all next ops
auto bfs = [&](size_t op_idx) {
std::queue<size_t> q;
std::vector<bool> visited(op_num_, false);
q.push(op_idx);
while (!q.empty()) {
size_t op = q.front();
q.pop();
visited[op] = true;
if (!op_downstream_map_.count(op)) {
continue;
}
for (auto next : op_downstream_map_.at(op)) {
if (!visited[next]) {
PADDLE_ENFORCE_EQ(op_happens_before_[next][op_idx],
false,
paddle::platform::errors::AlreadyExists(
"There exists circle in graph, expected "
"%d->%d, but already got %d->%d",
op_idx,
next,
next,
op_idx));
op_happens_before_[op_idx][next] = true;
VLOG(10) << "happens before: " << op_idx << " " << next;
q.push(next);
}
}
}
};
for (size_t i = 0; i < op_num_; ++i) {
bfs(i);
}
}
void DependencyBuilder::ShrinkDownstreamMap() { void DependencyBuilder::ShrinkDownstreamMap() {
// remove unnecessary downstream ops // remove unnecessary downstream ops
// for example, a->b->c // for example, a->b->c
...@@ -529,7 +504,7 @@ void DependencyBuilder::ShrinkDownstreamMap() { ...@@ -529,7 +504,7 @@ void DependencyBuilder::ShrinkDownstreamMap() {
bool not_after_any = true; bool not_after_any = true;
// find the op that is not executed after any // find the op that is not executed after any
for (size_t other_item : op_downstream_map_.at(i)) { for (size_t other_item : op_downstream_map_.at(i)) {
if (op_happens_before_[other_item][item]) { if (OpHappensBefore(other_item, item)) {
VLOG(8) << "happens_before: " << other_item << "->" << item VLOG(8) << "happens_before: " << other_item << "->" << item
<< ", so skip " << item; << ", so skip " << item;
not_after_any = false; not_after_any = false;
...@@ -541,6 +516,8 @@ void DependencyBuilder::ShrinkDownstreamMap() { ...@@ -541,6 +516,8 @@ void DependencyBuilder::ShrinkDownstreamMap() {
minumum_nexts.insert(item); minumum_nexts.insert(item);
} }
} }
// NOTE(Ruibiao): op_happens_before will not be changed when shrink
// dowstream map
op_downstream_map_.at(i) = minumum_nexts; op_downstream_map_.at(i) = minumum_nexts;
} }
VLOG(8) << "Finish shrink downstream map"; VLOG(8) << "Finish shrink downstream map";
......
...@@ -40,7 +40,13 @@ class DependencyBuilder { ...@@ -40,7 +40,13 @@ class DependencyBuilder {
const std::map<size_t, std::set<size_t>>& OpDownstreamMap() const; const std::map<size_t, std::set<size_t>>& OpDownstreamMap() const;
bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const; bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const {
PADDLE_ENFORCE_GE(
op_happens_before_.size(),
0,
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
}
private: private:
void AddDependencyForCoalesceTensorOp(); void AddDependencyForCoalesceTensorOp();
...@@ -53,21 +59,27 @@ class DependencyBuilder { ...@@ -53,21 +59,27 @@ class DependencyBuilder {
void BuildDownstreamMap(); void BuildDownstreamMap();
void BuildOpHappensBefore();
void ShrinkDownstreamMap(); void ShrinkDownstreamMap();
bool is_build_; bool is_build_;
const std::vector<Instruction>* instructions_; // not_own const std::vector<Instruction>* instructions_; // not_own
size_t op_num_; size_t op_num_;
// op_happens_before_[i][j] == true means op[i] happens before op[j] // ops_behind_ is the adjacency list about op to its posterior-ops, that is to
std::vector<std::vector<bool>> op_happens_before_; // say, op_behind_[i] == {a, b, c} means op[a], op[b] and op[c] depend on
// op[i] directly or indirectly. ops_before_ is the revered adjacency list of
// ops_behind_.
std::vector<std::vector<size_t>> ops_before_;
std::vector<std::vector<size_t>> ops_behind_;
// op_downstream_map_ is the mapping from op to its downstream-op set, that is // op_downstream_map_ is the mapping from op to its downstream-op set, that is
// to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c] // to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c]
// should be dispatched after op[i] // depend on op[i] directly.
std::map<size_t, std::set<size_t>> op_downstream_map_; std::map<size_t, std::set<size_t>> op_downstream_map_;
// op_happens_before_ is a matrix form of ops_before_ and ops_behind_, it is
// used to speed up the query.
std::vector<std::vector<bool>> op_happens_before_;
}; };
} // namespace interpreter } // namespace interpreter
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include "cinn/common/target.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_cache_key.h" #include "paddle/fluid/framework/paddle2cinn/cinn_cache_key.h"
...@@ -75,8 +76,8 @@ class CinnCompiler { ...@@ -75,8 +76,8 @@ class CinnCompiler {
const CinnCompiledObject& Compile( const CinnCompiledObject& Compile(
const ir::Graph& graph, const ir::Graph& graph,
const std::map<std::string, const phi::DenseTensor*>& input_tensors, const std::map<std::string, const phi::DenseTensor*>& input_tensors = {},
const ::cinn::common::Target& target, const ::cinn::common::Target& target = ::cinn::common::DefaultTarget(),
void* stream = nullptr); void* stream = nullptr);
const CinnCompiledObject& Compile( const CinnCompiledObject& Compile(
......
...@@ -102,6 +102,14 @@ class Vocab : public phi::ExtendedTensor, ...@@ -102,6 +102,14 @@ class Vocab : public phi::ExtendedTensor,
// Kernel. It can be used when you define a non-tensor type that needs to be // Kernel. It can be used when you define a non-tensor type that needs to be
// stored in a vector as PHI kernel argument. // stored in a vector as PHI kernel argument.
template <typename T>
struct PhiVectorType;
template <>
struct PhiVectorType<std::string> {
const char* type_name = "PhiVectorString";
};
template <typename T> template <typename T>
class PhiVector : public phi::ExtendedTensor, class PhiVector : public phi::ExtendedTensor,
public phi::TypeInfoTraits<phi::TensorBase, PhiVector<T>> { public phi::TypeInfoTraits<phi::TensorBase, PhiVector<T>> {
...@@ -129,9 +137,7 @@ class PhiVector : public phi::ExtendedTensor, ...@@ -129,9 +137,7 @@ class PhiVector : public phi::ExtendedTensor,
public: public:
/// \brief Returns the name of the class for type traits. /// \brief Returns the name of the class for type traits.
/// \return The name of the class. /// \return The name of the class.
static const char* name() { static const char* name() { return PhiVectorType<T>().type_name; }
return (std::string("PhiVector_") + std::string(typeid(T).name())).c_str();
}
size_t size() const { return data_.size(); } size_t size() const { return data_.size(); }
......
...@@ -1086,6 +1086,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -1086,6 +1086,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
} }
void AnalysisPredictor::PrepareArgument() { void AnalysisPredictor::PrepareArgument() {
VLOG(3) << "AnalysisPredictor::PrepareArgument";
// Init std::unique_ptr argument_. // Init std::unique_ptr argument_.
argument_.reset(new Argument); argument_.reset(new Argument);
argument_->SetUseGPU(config_.use_gpu()); argument_->SetUseGPU(config_.use_gpu());
...@@ -2246,10 +2247,12 @@ AnalysisPredictor::~AnalysisPredictor() { ...@@ -2246,10 +2247,12 @@ AnalysisPredictor::~AnalysisPredictor() {
} }
std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) { std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
VLOG(3) << "AnalysisPredictor::Clone";
std::lock_guard<std::mutex> lk(clone_mutex_); std::lock_guard<std::mutex> lk(clone_mutex_);
auto *x = new AnalysisPredictor(config_); auto *x = new AnalysisPredictor(config_);
x->status_is_cloned_ = true; x->status_is_cloned_ = true;
x->root_predictor_id_ = this->root_predictor_id_; x->root_predictor_id_ = this->root_predictor_id_;
x->config_.apply_optim_ = false;
if (config_.use_external_stream_ && stream == nullptr) { if (config_.use_external_stream_ && stream == nullptr) {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"config has been configured to use external stream, but the Clone " "config has been configured to use external stream, but the Clone "
...@@ -2461,7 +2464,7 @@ USE_TRT_CONVERTER(rsqrt); ...@@ -2461,7 +2464,7 @@ USE_TRT_CONVERTER(rsqrt);
USE_TRT_CONVERTER(fused_preln_embedding_eltwise_layernorm) USE_TRT_CONVERTER(fused_preln_embedding_eltwise_layernorm)
USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm); USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm);
USE_TRT_CONVERTER(preln_skip_layernorm) USE_TRT_CONVERTER(preln_skip_layernorm)
USE_TRT_CONVERTER(preln_residual_bias) USE_TRT_CONVERTER(fused_bias_dropout_residual_layer_norm)
USE_TRT_CONVERTER(c_allreduce_sum) USE_TRT_CONVERTER(c_allreduce_sum)
USE_TRT_CONVERTER(roll) USE_TRT_CONVERTER(roll)
USE_TRT_CONVERTER(strided_slice) USE_TRT_CONVERTER(strided_slice)
......
...@@ -136,6 +136,7 @@ const std::vector<std::string> kTRTSubgraphPasses({ ...@@ -136,6 +136,7 @@ const std::vector<std::string> kTRTSubgraphPasses({
#else #else
"elementwise_groupnorm_act_pass", // "elementwise_groupnorm_act_pass", //
"preln_elementwise_groupnorm_act_pass", // "preln_elementwise_groupnorm_act_pass", //
"groupnorm_act_pass", //
#endif #endif
"tensorrt_subgraph_pass", // "tensorrt_subgraph_pass", //
"conv_bn_fuse_pass", // "conv_bn_fuse_pass", //
......
...@@ -46,6 +46,11 @@ class GroupNormOpConverter : public OpConverter { ...@@ -46,6 +46,11 @@ class GroupNormOpConverter : public OpConverter {
std::string scale_name = op_desc.Input("Scale").front(); std::string scale_name = op_desc.Input("Scale").front();
std::string bias_name = op_desc.Input("Bias").front(); std::string bias_name = op_desc.Input("Bias").front();
bool with_silu = false;
if (op_desc.HasAttr("with_silu")) {
with_silu = PADDLE_GET_CONST(bool, op_desc.GetAttr("with_silu"));
}
// get the presistable var's data // get the presistable var's data
auto GetWeight = [&](const std::string& var_name, auto GetWeight = [&](const std::string& var_name,
framework::DDim* dims) -> TensorRTEngine::Weight { framework::DDim* dims) -> TensorRTEngine::Weight {
...@@ -77,6 +82,7 @@ class GroupNormOpConverter : public OpConverter { ...@@ -77,6 +82,7 @@ class GroupNormOpConverter : public OpConverter {
groups, groups,
mean_shape, mean_shape,
variance_shape, variance_shape,
with_silu,
with_fp16); with_fp16);
nvinfer1::ILayer* groupnorm_layer = nvinfer1::ILayer* groupnorm_layer =
engine_->AddDynamicPlugin(&input_itensor, 1, plugin); engine_->AddDynamicPlugin(&input_itensor, 1, plugin);
......
...@@ -26,16 +26,12 @@ class PrelnResidualBiasOpConverter : public OpConverter { ...@@ -26,16 +26,12 @@ class PrelnResidualBiasOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, const framework::Scope& scope,
bool test_mode) override { bool test_mode) override {
VLOG(4) << "convert fused preln_residual_bias op to tensorrt layer"; VLOG(4) << "convert fused_bias_dropout_residual_layer_norm op with "
if (!engine_->with_dynamic_shape()) { "drop_rate = 0 to preln_residual_bias tensorrt layer";
PADDLE_THROW(
platform::errors::Fatal("Unsupported static graph mode. Please set "
"dynamic shape of inputs."));
}
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]); auto* input2 = engine_->GetITensor(op_desc.Input("Residual")[0]);
std::vector<nvinfer1::ITensor*> inputs; std::vector<nvinfer1::ITensor*> inputs;
inputs.push_back(input1); inputs.push_back(input1);
inputs.push_back(input2); inputs.push_back(input2);
...@@ -50,18 +46,18 @@ class PrelnResidualBiasOpConverter : public OpConverter { ...@@ -50,18 +46,18 @@ class PrelnResidualBiasOpConverter : public OpConverter {
return temp_data; return temp_data;
}; };
framework::DDim bias_dims, scale_dims, ele_bias_dims; framework::DDim bias_dims, scale_dims, ele_bias_dims;
auto* bias = get_persistable_data("Bias", &bias_dims); auto* bias = get_persistable_data("LnBias", &bias_dims);
auto* scale = get_persistable_data("Scale", &scale_dims); auto* scale = get_persistable_data("LnScale", &scale_dims);
auto const& vars = op_desc.Inputs(false); auto const& vars = op_desc.Inputs(false);
bool has_bias = vars.find("EleBias") != vars.end(); bool has_bias = vars.find("Bias") != vars.end();
float* ele_bias = float* ele_bias =
has_bias ? get_persistable_data("EleBias", &ele_bias_dims) : nullptr; has_bias ? get_persistable_data("Bias", &ele_bias_dims) : nullptr;
int bias_size = phi::product(bias_dims); int bias_size = phi::product(bias_dims);
int scale_size = phi::product(scale_dims); int scale_size = phi::product(scale_dims);
int ele_bias_size = has_bias ? phi::product(ele_bias_dims) : 0; int ele_bias_size = has_bias ? phi::product(ele_bias_dims) : 0;
float epsilon = PADDLE_GET_CONST(float, op_desc.GetAttr("epsilon")); float epsilon = PADDLE_GET_CONST(float, op_desc.GetAttr("ln_epsilon"));
bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
if (engine_->precision() == AnalysisConfig::Precision::kInt8) { if (engine_->precision() == AnalysisConfig::Precision::kInt8) {
with_fp16 = true; with_fp16 = true;
...@@ -102,8 +98,8 @@ class PrelnResidualBiasOpConverter : public OpConverter { ...@@ -102,8 +98,8 @@ class PrelnResidualBiasOpConverter : public OpConverter {
plugin_inputs.emplace_back(input2); plugin_inputs.emplace_back(input2);
layer = engine_->AddDynamicPlugin(plugin_inputs.data(), 2, plugin); layer = engine_->AddDynamicPlugin(plugin_inputs.data(), 2, plugin);
std::vector<std::string> output_names; std::vector<std::string> output_names;
output_names.push_back(op_desc.Output("Out_0")[0]); output_names.push_back(op_desc.Output("Y")[0]);
output_names.push_back(op_desc.Output("Out_1")[0]); output_names.push_back(op_desc.Output("BiasDropoutResidualOut")[0]);
RreplenishLayerAndOutput( RreplenishLayerAndOutput(
layer, "preln_residual_bias", output_names, test_mode); layer, "preln_residual_bias", output_names, test_mode);
} }
...@@ -113,4 +109,5 @@ class PrelnResidualBiasOpConverter : public OpConverter { ...@@ -113,4 +109,5 @@ class PrelnResidualBiasOpConverter : public OpConverter {
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_TRT_OP_CONVERTER(preln_residual_bias, PrelnResidualBiasOpConverter); REGISTER_TRT_OP_CONVERTER(fused_bias_dropout_residual_layer_norm,
PrelnResidualBiasOpConverter);
...@@ -1495,7 +1495,21 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -1495,7 +1495,21 @@ struct SimpleOpTypeSetTeller : public Teller {
return false; return false;
} }
} }
if (op_type == "fused_bias_dropout_residual_layer_norm") {
if (!with_dynamic_shape) {
VLOG(3) << "fused_bias_dropout_residual_layer_norm should run on "
"dynamic shape mode.";
return false;
}
float dropout_rate =
PADDLE_GET_CONST(float, desc.GetAttr("dropout_rate"));
if (dropout_rate != 0.0f) {
VLOG(4) << "preln_residual_bias trt layer can not work with "
"fused_bias_dropout_residual_layer_norm op in which the "
"dropout_rate != 0, stop convert";
return false;
}
}
if (op_type == "fused_preln_embedding_eltwise_layernorm") { if (op_type == "fused_preln_embedding_eltwise_layernorm") {
if (!with_dynamic_shape) { if (!with_dynamic_shape) {
VLOG(3) << "fused_preln_embedding_eltwise_layernorm should run on " VLOG(3) << "fused_preln_embedding_eltwise_layernorm should run on "
...@@ -2594,7 +2608,7 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -2594,7 +2608,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"slice", "slice",
"strided_slice", "strided_slice",
"fused_preln_embedding_eltwise_layernorm", "fused_preln_embedding_eltwise_layernorm",
"preln_residual_bias", "fused_bias_dropout_residual_layer_norm",
"c_allreduce_sum", "c_allreduce_sum",
"c_allreduce_min", "c_allreduce_min",
"c_allreduce_max", "c_allreduce_max",
...@@ -2744,7 +2758,7 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -2744,7 +2758,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"strided_slice", "strided_slice",
"fused_preln_embedding_eltwise_layernorm", "fused_preln_embedding_eltwise_layernorm",
"preln_skip_layernorm", "preln_skip_layernorm",
"preln_residual_bias", "fused_bias_dropout_residual_layer_norm",
"c_allreduce_sum", "c_allreduce_sum",
"c_allreduce_min", "c_allreduce_min",
"c_allreduce_max", "c_allreduce_max",
......
...@@ -49,8 +49,8 @@ struct GroupNormNHWCParams { ...@@ -49,8 +49,8 @@ struct GroupNormNHWCParams {
int32_t c; int32_t c;
// The number of groups. // The number of groups.
int32_t groups; int32_t groups;
// Do we apply the Swish activation function? // Do we apply the Silu activation function?
bool withSwish; bool withSilu;
// Precomputed values and parameters to control the execution of the kernels. // Precomputed values and parameters to control the execution of the kernels.
......
...@@ -247,8 +247,8 @@ __global__ void groupNormNHWCScaleKernel(const GroupNormNHWCParams params) { ...@@ -247,8 +247,8 @@ __global__ void groupNormNHWCScaleKernel(const GroupNormNHWCParams params) {
f2.x = gammaF2.x * f2.x + betaF2.x; f2.x = gammaF2.x * f2.x + betaF2.x;
f2.y = gammaF2.y * f2.y + betaF2.y; f2.y = gammaF2.y * f2.y + betaF2.y;
// Apply Swish if needed. // Apply Silu if needed.
if (params.withSwish) { if (params.withSilu) {
f2.x = f2.x * sigmoid(f2.x); f2.x = f2.x * sigmoid(f2.x);
f2.y = f2.y * sigmoid(f2.y); f2.y = f2.y * sigmoid(f2.y);
} }
...@@ -457,7 +457,7 @@ bool GroupNormPluginDynamic::supportsFormatCombination( ...@@ -457,7 +457,7 @@ bool GroupNormPluginDynamic::supportsFormatCombination(
if (pos == 0) { if (pos == 0) {
if (with_fp16_) { if (with_fp16_) {
return ((in.type == nvinfer1::DataType::kHALF) && return ((in.type == nvinfer1::DataType::kHALF) &&
(in.format == nvinfer1::PluginFormat::kLINEAR || ((!with_silu_ && in.format == nvinfer1::PluginFormat::kLINEAR) ||
in.format == nvinfer1::PluginFormat::kHWC8)); in.format == nvinfer1::PluginFormat::kHWC8));
} else { } else {
return (in.type == nvinfer1::DataType::kFLOAT) && return (in.type == nvinfer1::DataType::kFLOAT) &&
...@@ -624,7 +624,7 @@ int GroupNormPluginDynamic::enqueue( ...@@ -624,7 +624,7 @@ int GroupNormPluginDynamic::enqueue(
cPerBlock = 8; cPerBlock = 8;
} }
params_.withSwish = false; params_.withSilu = with_silu_;
params_.dst = static_cast<half *>(outputs[0]); params_.dst = static_cast<half *>(outputs[0]);
params_.srcX = static_cast<half const *>(inputs[0]); params_.srcX = static_cast<half const *>(inputs[0]);
params_.gamma = scale_gpu_; params_.gamma = scale_gpu_;
......
...@@ -164,11 +164,13 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { ...@@ -164,11 +164,13 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT {
int groups, int groups,
std::vector<int64_t> mean_shape, std::vector<int64_t> mean_shape,
std::vector<int64_t> variance_shape, std::vector<int64_t> variance_shape,
bool with_silu,
bool with_fp16) bool with_fp16)
: groups_(groups), : groups_(groups),
eps_(eps), eps_(eps),
mean_shape_(mean_shape), mean_shape_(mean_shape),
variance_shape_(variance_shape), variance_shape_(variance_shape),
with_silu_(with_silu),
with_fp16_(with_fp16) { with_fp16_(with_fp16) {
scale_.resize(scale_num); scale_.resize(scale_num);
bias_.resize(bias_num); bias_.resize(bias_num);
...@@ -183,6 +185,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { ...@@ -183,6 +185,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT {
DeserializeValue(&serialData, &serialLength, &groups_); DeserializeValue(&serialData, &serialLength, &groups_);
DeserializeValue(&serialData, &serialLength, &mean_shape_); DeserializeValue(&serialData, &serialLength, &mean_shape_);
DeserializeValue(&serialData, &serialLength, &variance_shape_); DeserializeValue(&serialData, &serialLength, &variance_shape_);
DeserializeValue(&serialData, &serialLength, &with_silu_);
DeserializeValue(&serialData, &serialLength, &with_fp16_); DeserializeValue(&serialData, &serialLength, &with_fp16_);
} }
nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override {
...@@ -194,6 +197,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { ...@@ -194,6 +197,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT {
groups_, groups_,
mean_shape_, mean_shape_,
variance_shape_, variance_shape_,
with_silu_,
with_fp16_); with_fp16_);
ptr->scale_gpu_ = scale_gpu_; ptr->scale_gpu_ = scale_gpu_;
ptr->bias_gpu_ = bias_gpu_; ptr->bias_gpu_ = bias_gpu_;
...@@ -210,7 +214,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { ...@@ -210,7 +214,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT {
return SerializedSize(scale_) + SerializedSize(bias_) + return SerializedSize(scale_) + SerializedSize(bias_) +
SerializedSize(eps_) + SerializedSize(groups_) + SerializedSize(eps_) + SerializedSize(groups_) +
SerializedSize(mean_shape_) + SerializedSize(variance_shape_) + SerializedSize(mean_shape_) + SerializedSize(variance_shape_) +
SerializedSize(with_fp16_); SerializedSize(with_silu_) + SerializedSize(with_fp16_);
} }
void serialize(void* buffer) const TRT_NOEXCEPT override { void serialize(void* buffer) const TRT_NOEXCEPT override {
SerializeValue(&buffer, scale_); SerializeValue(&buffer, scale_);
...@@ -219,6 +223,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { ...@@ -219,6 +223,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT {
SerializeValue(&buffer, groups_); SerializeValue(&buffer, groups_);
SerializeValue(&buffer, mean_shape_); SerializeValue(&buffer, mean_shape_);
SerializeValue(&buffer, variance_shape_); SerializeValue(&buffer, variance_shape_);
SerializeValue(&buffer, with_silu_);
SerializeValue(&buffer, with_fp16_); SerializeValue(&buffer, with_fp16_);
} }
nvinfer1::DimsExprs getOutputDimensions( nvinfer1::DimsExprs getOutputDimensions(
...@@ -277,6 +282,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { ...@@ -277,6 +282,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT {
std::vector<int64_t> mean_shape_; std::vector<int64_t> mean_shape_;
std::vector<int64_t> variance_shape_; std::vector<int64_t> variance_shape_;
GroupNormNHWCParams params_; GroupNormNHWCParams params_;
bool with_silu_;
bool with_fp16_; bool with_fp16_;
}; };
class GroupNormPluginDynamicCreator : public TensorRTPluginCreator { class GroupNormPluginDynamicCreator : public TensorRTPluginCreator {
......
...@@ -330,8 +330,8 @@ __global__ void prelnGroupNormNHWCScaleKernel(GroupNormNHWCParams params) { ...@@ -330,8 +330,8 @@ __global__ void prelnGroupNormNHWCScaleKernel(GroupNormNHWCParams params) {
f2.x = gammaF2.x * f2.x + betaF2.x; f2.x = gammaF2.x * f2.x + betaF2.x;
f2.y = gammaF2.y * f2.y + betaF2.y; f2.y = gammaF2.y * f2.y + betaF2.y;
// Apply Swish if needed. // Apply Silu if needed.
if (params.withSwish) { if (params.withSilu) {
f2.x = f2.x * sigmoid(f2.x); f2.x = f2.x * sigmoid(f2.x);
f2.y = f2.y * sigmoid(f2.y); f2.y = f2.y * sigmoid(f2.y);
} }
...@@ -431,7 +431,7 @@ int PrelnGroupnormActPluginDynamic::enqueue( ...@@ -431,7 +431,7 @@ int PrelnGroupnormActPluginDynamic::enqueue(
if (cPerBlock > input_desc[0].dims.d[1]) { if (cPerBlock > input_desc[0].dims.d[1]) {
cPerBlock = 8; cPerBlock = 8;
} }
params_.withSwish = with_silu_; params_.withSilu = with_silu_;
params_.dst = static_cast<half *>(outputs[1]); params_.dst = static_cast<half *>(outputs[1]);
params_.eleOut = static_cast<half *>(outputs[0]); params_.eleOut = static_cast<half *>(outputs[0]);
params_.srcX = static_cast<half const *>(inputs[0]); params_.srcX = static_cast<half const *>(inputs[0]);
......
...@@ -340,8 +340,8 @@ __global__ void skipGroupNormNHWCScaleKernel(GroupNormNHWCParams params) { ...@@ -340,8 +340,8 @@ __global__ void skipGroupNormNHWCScaleKernel(GroupNormNHWCParams params) {
f2.x = gammaF2.x * f2.x + betaF2.x; f2.x = gammaF2.x * f2.x + betaF2.x;
f2.y = gammaF2.y * f2.y + betaF2.y; f2.y = gammaF2.y * f2.y + betaF2.y;
// Apply Swish if needed. // Apply Silu if needed.
if (params.withSwish) { if (params.withSilu) {
f2.x = f2.x * sigmoid(f2.x); f2.x = f2.x * sigmoid(f2.x);
f2.y = f2.y * sigmoid(f2.y); f2.y = f2.y * sigmoid(f2.y);
} }
...@@ -439,7 +439,7 @@ int SkipGroupnormActPluginDynamic::enqueue( ...@@ -439,7 +439,7 @@ int SkipGroupnormActPluginDynamic::enqueue(
if (cPerBlock > input_desc[0].dims.d[1]) { if (cPerBlock > input_desc[0].dims.d[1]) {
cPerBlock = 8; cPerBlock = 8;
} }
params_.withSwish = true; params_.withSilu = true;
params_.dst = static_cast<half *>(outputs[0]); params_.dst = static_cast<half *>(outputs[0]);
params_.srcX = static_cast<half const *>(inputs[0]); params_.srcX = static_cast<half const *>(inputs[0]);
params_.srcY = static_cast<half const *>(inputs[1]); params_.srcY = static_cast<half const *>(inputs[1]);
......
...@@ -38,5 +38,13 @@ void CompilationUnit::SetEngine(const std::string &name, ...@@ -38,5 +38,13 @@ void CompilationUnit::SetEngine(const std::string &name,
const jit::EngineMap &CompilationUnit::EngineMap() const { return engine_map_; } const jit::EngineMap &CompilationUnit::EngineMap() const { return engine_map_; }
std::shared_ptr<CompilationUnit> CompilationUnit::Clone(void *stream) {
auto x = std::make_shared<CompilationUnit>();
for (auto &it : engine_map_) {
x->SetEngine(it.first, std::move(it.second->Clone(stream)));
}
return x;
}
} // namespace jit } // namespace jit
} // namespace paddle } // namespace paddle
...@@ -36,6 +36,8 @@ class CompilationUnit { ...@@ -36,6 +36,8 @@ class CompilationUnit {
const jit::EngineMap &EngineMap() const; const jit::EngineMap &EngineMap() const;
std::shared_ptr<CompilationUnit> Clone(void *stream = nullptr);
private: private:
jit::EngineMap engine_map_; jit::EngineMap engine_map_;
}; };
......
...@@ -29,6 +29,8 @@ class BaseEngine { ...@@ -29,6 +29,8 @@ class BaseEngine {
virtual std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) = 0; virtual std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) = 0;
virtual std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) = 0;
virtual ~BaseEngine() {} virtual ~BaseEngine() {}
}; };
......
...@@ -25,17 +25,18 @@ ...@@ -25,17 +25,18 @@
namespace paddle { namespace paddle {
namespace jit { namespace jit {
InterpreterEngine::InterpreterEngine(const std::shared_ptr<FunctionInfo> &info, InterpreterEngine::InterpreterEngine(
const VariableMap &params_dict, const std::shared_ptr<FunctionInfo> &info,
const phi::Place &place) const std::shared_ptr<VariableMap> &params_dict,
: info_(info), place_(place) { const phi::Place &place)
: info_(info), params_dict_(params_dict), place_(place) {
info_->RemoveDescFeedFetch(); info_->RemoveDescFeedFetch();
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
static_cast<int64_t>(info_->ProgramDesc().Block(0).OpSize()), static_cast<int64_t>(info_->ProgramDesc().Block(0).OpSize()),
0, 0,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"There is no operator in ProgramDesc.")); "There is no operator in ProgramDesc."));
utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_); utils::ShareParamsIntoScope(info_->ParamNames(), params_dict_, &scope_);
VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_);
CreateInterpreterCore(); CreateInterpreterCore();
} }
...@@ -98,5 +99,10 @@ const std::shared_ptr<FunctionInfo> &InterpreterEngine::Info() const { ...@@ -98,5 +99,10 @@ const std::shared_ptr<FunctionInfo> &InterpreterEngine::Info() const {
return info_; return info_;
} }
std::unique_ptr<BaseEngine> InterpreterEngine::Clone(void *stream) {
auto *x = new InterpreterEngine(info_, params_dict_, place_);
return std::unique_ptr<BaseEngine>(x);
}
} // namespace jit } // namespace jit
} // namespace paddle } // namespace paddle
...@@ -36,21 +36,25 @@ using InterpreterCore = framework::InterpreterCore; ...@@ -36,21 +36,25 @@ using InterpreterCore = framework::InterpreterCore;
class InterpreterEngine : public BaseEngine { class InterpreterEngine : public BaseEngine {
public: public:
InterpreterEngine(const std::shared_ptr<FunctionInfo> &info, InterpreterEngine(const std::shared_ptr<FunctionInfo> &info,
const VariableMap &params_dict, const std::shared_ptr<VariableMap> &params_dict,
const phi::Place &place); const phi::Place &place);
~InterpreterEngine() noexcept {} ~InterpreterEngine() noexcept {}
void CreateInterpreterCore(); void CreateInterpreterCore();
std::vector<Tensor> operator()(const std::vector<Tensor> &inputs); std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) override;
std::vector<DenseTensor> operator()(const std::vector<DenseTensor> &inputs); std::vector<DenseTensor> operator()(
const std::vector<DenseTensor> &inputs) override;
const std::shared_ptr<FunctionInfo> &Info() const; const std::shared_ptr<FunctionInfo> &Info() const;
std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) override;
private: private:
std::shared_ptr<FunctionInfo> info_; std::shared_ptr<FunctionInfo> info_;
std::shared_ptr<VariableMap> params_dict_;
framework::Scope scope_; framework::Scope scope_;
phi::Place place_; phi::Place place_;
std::shared_ptr<framework::InterpreterCore> inner_interpreter_; std::shared_ptr<framework::InterpreterCore> inner_interpreter_;
......
...@@ -27,11 +27,15 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, ...@@ -27,11 +27,15 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt,
DenseTensor *t, DenseTensor *t,
const platform::Place &place); const platform::Place &place);
PredictorEngine::PredictorEngine(const std::shared_ptr<FunctionInfo> &info, PredictorEngine::PredictorEngine(
const VariableMap &params_dict, const std::shared_ptr<FunctionInfo> &info,
const phi::Place &place) const std::shared_ptr<VariableMap> &params_dict,
: info_(info), scope_(new framework::Scope()), place_(place) { const phi::Place &place)
utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, scope_.get()); : info_(info),
params_dict_(params_dict),
scope_(new framework::Scope()),
place_(place) {
utils::ShareParamsIntoScope(info_->ParamNames(), params_dict_, scope_.get());
VLOG(6) << framework::GenScopeTreeDebugInfo(scope_.get()); VLOG(6) << framework::GenScopeTreeDebugInfo(scope_.get());
// TODO(Aurelius84): Expose AnalysisConfig to user. // TODO(Aurelius84): Expose AnalysisConfig to user.
...@@ -55,6 +59,23 @@ PredictorEngine::PredictorEngine(const std::shared_ptr<FunctionInfo> &info, ...@@ -55,6 +59,23 @@ PredictorEngine::PredictorEngine(const std::shared_ptr<FunctionInfo> &info,
scope_, std::make_shared<framework::ProgramDesc>(info_->ProgramDesc())); scope_, std::make_shared<framework::ProgramDesc>(info_->ProgramDesc()));
} }
PredictorEngine::PredictorEngine(
const std::shared_ptr<FunctionInfo> &info,
const std::shared_ptr<framework::Scope> &scope,
const phi::Place &place,
const std::shared_ptr<PaddlePredictor> &predictor)
: info_(info),
scope_(scope),
place_(place),
predictor_(std::dynamic_pointer_cast<AnalysisPredictor, PaddlePredictor>(
predictor)) {}
std::unique_ptr<BaseEngine> PredictorEngine::Clone(void *stream) {
auto *x = new PredictorEngine(
info_, scope_, place_, std::move(predictor_->Clone(stream)));
return std::unique_ptr<BaseEngine>(x);
}
std::vector<Tensor> PredictorEngine::operator()( std::vector<Tensor> PredictorEngine::operator()(
const std::vector<Tensor> &inputs) { const std::vector<Tensor> &inputs) {
auto dense_tensors = utils::ToDenseTensors(inputs); auto dense_tensors = utils::ToDenseTensors(inputs);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
namespace paddle { namespace paddle {
class AnalysisPredictor; class AnalysisPredictor;
class PaddlePredictor;
namespace framework { namespace framework {
class Scope; class Scope;
...@@ -30,17 +31,26 @@ namespace jit { ...@@ -30,17 +31,26 @@ namespace jit {
class PredictorEngine : public BaseEngine { class PredictorEngine : public BaseEngine {
public: public:
PredictorEngine(const std::shared_ptr<FunctionInfo> &info, PredictorEngine(const std::shared_ptr<FunctionInfo> &info,
const VariableMap &params_dict, const std::shared_ptr<VariableMap> &params_dict,
const phi::Place &place); const phi::Place &place);
PredictorEngine(const std::shared_ptr<FunctionInfo> &info,
const std::shared_ptr<framework::Scope> &scope,
const phi::Place &place,
const std::shared_ptr<PaddlePredictor> &predictor);
~PredictorEngine() noexcept {} ~PredictorEngine() noexcept {}
std::vector<Tensor> operator()(const std::vector<Tensor> &inputs); std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) override;
std::vector<DenseTensor> operator()(
const std::vector<DenseTensor> &inputs) override;
std::vector<DenseTensor> operator()(const std::vector<DenseTensor> &inputs); std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) override;
private: private:
std::shared_ptr<FunctionInfo> info_; std::shared_ptr<FunctionInfo> info_;
std::shared_ptr<VariableMap> params_dict_;
std::shared_ptr<framework::Scope> scope_; std::shared_ptr<framework::Scope> scope_;
phi::Place place_; phi::Place place_;
std::shared_ptr<AnalysisPredictor> predictor_; std::shared_ptr<AnalysisPredictor> predictor_;
......
...@@ -71,18 +71,18 @@ void ShareIntoScope(const std::vector<std::string> &ordered_input_names, ...@@ -71,18 +71,18 @@ void ShareIntoScope(const std::vector<std::string> &ordered_input_names,
} }
void ShareParamsIntoScope(const std::vector<std::string> &param_names, void ShareParamsIntoScope(const std::vector<std::string> &param_names,
const VariableMap &params_dict, const std::shared_ptr<VariableMap> &params_dict,
framework::Scope *scope) { framework::Scope *scope) {
for (size_t i = 0; i < param_names.size(); ++i) { for (size_t i = 0; i < param_names.size(); ++i) {
std::string name = param_names[i]; std::string name = param_names[i];
PADDLE_ENFORCE_EQ(params_dict.count(name), PADDLE_ENFORCE_EQ(params_dict->count(name),
1, 1,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Parameter named %s is not existed in params_dict. " "Parameter named %s is not existed in params_dict. "
"Please check that your model was saved correctly", "Please check that your model was saved correctly",
name)); name));
auto &param = params_dict.find(name)->second; auto &param = params_dict->find(name)->second;
auto &dense_tensor = param->Get<DenseTensor>(); auto &dense_tensor = param->Get<DenseTensor>();
auto *var = scope->Var(name); auto *var = scope->Var(name);
auto *dst_tensor = var->GetMutable<DenseTensor>(); auto *dst_tensor = var->GetMutable<DenseTensor>();
......
...@@ -51,14 +51,14 @@ void ShareIntoScope(const std::vector<std::string> &ordered_input_names, ...@@ -51,14 +51,14 @@ void ShareIntoScope(const std::vector<std::string> &ordered_input_names,
framework::Scope *scope); framework::Scope *scope);
void ShareParamsIntoScope(const std::vector<std::string> &param_names, void ShareParamsIntoScope(const std::vector<std::string> &param_names,
const VariableMap &params_dict, const std::shared_ptr<VariableMap> &params_dict,
framework::Scope *scope); framework::Scope *scope);
void RemoveFeedFetch(framework::ProgramDesc *program_desc); void RemoveFeedFetch(framework::ProgramDesc *program_desc);
template <typename T> template <typename T>
std::shared_ptr<T> MakeEngine(const std::shared_ptr<FunctionInfo> &info, std::shared_ptr<T> MakeEngine(const std::shared_ptr<FunctionInfo> &info,
const VariableMap &params_dict, const std::shared_ptr<VariableMap> &params_dict,
const phi::Place &place) { const phi::Place &place) {
return std::make_shared<T>(info, params_dict, place); return std::make_shared<T>(info, params_dict, place);
} }
......
...@@ -26,11 +26,14 @@ ...@@ -26,11 +26,14 @@
namespace paddle { namespace paddle {
namespace jit { namespace jit {
Layer::Layer(const VariableMap& params_map, Layer::Layer(const std::shared_ptr<VariableMap>& params_map,
const VariableMap& attrs_map, const std::shared_ptr<VariableMap>& attrs_map,
const FunctionInfoMap& info_map, const FunctionInfoMap& info_map,
const phi::Place& place) const phi::Place& place)
: params_map_(params_map), attrs_map_(attrs_map), info_map_(info_map) { : params_map_(params_map),
attrs_map_(attrs_map),
info_map_(info_map),
place_(place) {
unit_.reset(new CompilationUnit()); unit_.reset(new CompilationUnit());
} }
...@@ -77,12 +80,12 @@ std::vector<std::string> Layer::FunctionNames() const { ...@@ -77,12 +80,12 @@ std::vector<std::string> Layer::FunctionNames() const {
#define PD_SPECIALZE_ATTRIBUTE_TYPE(T) \ #define PD_SPECIALZE_ATTRIBUTE_TYPE(T) \
template <> \ template <> \
T Layer::Attribute<T>(const std::string& name) const { \ T Layer::Attribute<T>(const std::string& name) const { \
if (attrs_map_.find(name) == attrs_map_.end()) { \ if (attrs_map_->find(name) == attrs_map_->end()) { \
PADDLE_THROW(phi::errors::NotFound( \ PADDLE_THROW(phi::errors::NotFound( \
"Attribute can not found %s, please check if it exists.")); \ "Attribute can not found %s, please check if it exists.")); \
return T(); \ return T(); \
} \ } \
auto var = attrs_map_.at(name); \ auto var = attrs_map_->at(name); \
T ret = var->Get<T>(); \ T ret = var->Get<T>(); \
return ret; \ return ret; \
} }
...@@ -94,5 +97,12 @@ PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<int>) ...@@ -94,5 +97,12 @@ PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<int>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<float>) PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<float>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<std::string>) PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<std::string>)
std::shared_ptr<Layer> Layer::Clone(void* stream) {
std::shared_ptr<Layer> x =
std::make_shared<Layer>(params_map_, attrs_map_, info_map_, place_);
x->unit_ = unit_->Clone(stream);
return x;
}
} // namespace jit } // namespace jit
} // namespace paddle } // namespace paddle
...@@ -43,8 +43,8 @@ using FunctionInfoMap = ...@@ -43,8 +43,8 @@ using FunctionInfoMap =
class Layer { class Layer {
public: public:
Layer(const VariableMap& params_map, Layer(const std::shared_ptr<VariableMap>& params_map,
const VariableMap& attrs_map_, const std::shared_ptr<VariableMap>& attrs_map_,
const FunctionInfoMap& info_map, const FunctionInfoMap& info_map,
const phi::Place& place); const phi::Place& place);
...@@ -67,10 +67,13 @@ class Layer { ...@@ -67,10 +67,13 @@ class Layer {
std::vector<std::string> FunctionNames() const; std::vector<std::string> FunctionNames() const;
std::shared_ptr<Layer> Clone(void* stream = nullptr);
private: private:
VariableMap params_map_; std::shared_ptr<VariableMap> params_map_;
VariableMap attrs_map_; std::shared_ptr<VariableMap> attrs_map_;
FunctionInfoMap info_map_; FunctionInfoMap info_map_;
phi::Place place_;
std::shared_ptr<CompilationUnit> unit_; std::shared_ptr<CompilationUnit> unit_;
}; };
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/phi/api/include/api.h" #include "paddle/phi/api/include/api.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
...@@ -78,7 +79,11 @@ TEST(CpuLayerTest, Function) { ...@@ -78,7 +79,11 @@ TEST(CpuLayerTest, Function) {
TEST(CpuLayerTest, Construct) { TEST(CpuLayerTest, Construct) {
auto place = phi::CPUPlace(); auto place = phi::CPUPlace();
std::string path = "./multi_program_load/export"; std::string path = "./multi_program_load/export";
paddle::platform::Timer timer;
timer.Start();
auto layer = jit::Load(path, place); auto layer = jit::Load(path, place);
timer.Pause();
std::cout << "jit::Load coast" << timer.ElapsedMS() << std::endl;
float fbias = layer.Attribute<float>("fbias"); float fbias = layer.Attribute<float>("fbias");
EXPECT_FLOAT_EQ(fbias, 1.4); EXPECT_FLOAT_EQ(fbias, 1.4);
...@@ -119,6 +124,41 @@ TEST(CpuLayerTest, Construct) { ...@@ -119,6 +124,41 @@ TEST(CpuLayerTest, Construct) {
EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6); EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6);
} }
TEST(CpuLayerTest, Clone) {
auto place = phi::CPUPlace();
std::string path = "./multi_program_load/export";
paddle::platform::Timer timer;
timer.Start();
auto layer = jit::Load(path, place);
timer.Pause();
std::cout << "jit::Load cost " << timer.ElapsedMS() << " ms" << std::endl;
timer.Start();
auto layer2 = layer.Clone();
timer.Pause();
std::cout << "jit::Layer::Clone cost " << timer.ElapsedMS() << " ms"
<< std::endl;
float fbias = layer2->Attribute<float>("fbias");
EXPECT_FLOAT_EQ(fbias, 1.4);
auto inputs = PrepareInputs(place);
auto outs = layer2->forward(inputs);
auto out_data = outs[0].data<float>();
EXPECT_NEAR(out_data[0], 0.02194316, 1e-6);
auto func = layer2->Function("infer");
EXPECT_TRUE(func.IsValid());
outs = func(inputs);
out_data = outs[0].data<float>();
EXPECT_NEAR(out_data[0], 1.41562390, 1e-6);
auto pow_out =
paddle::experimental::pow(outs[0], paddle::experimental::Scalar(2));
out_data = pow_out.data<float>();
EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6);
}
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
TEST(GpuLayerTest, Construct) { TEST(GpuLayerTest, Construct) {
auto place = phi::GPUPlace(); auto place = phi::GPUPlace();
...@@ -147,6 +187,22 @@ TEST(GpuLayerTest, Construct) { ...@@ -147,6 +187,22 @@ TEST(GpuLayerTest, Construct) {
out_data = cpu_tensor.data<float>(); out_data = cpu_tensor.data<float>();
EXPECT_NEAR(out_data[0], sqrt(1.41562390), 1e-6); EXPECT_NEAR(out_data[0], sqrt(1.41562390), 1e-6);
} }
TEST(GpuLayerTest, Clone) {
auto place = phi::GPUPlace();
std::string path = "./multi_program_load/export";
auto layer = jit::Load(path, place);
auto inputs = PrepareInputs(place);
auto layer2 = layer.Clone();
auto outs = layer2->forward(inputs);
auto gpu_tensor = outs[0];
auto cpu_tensor =
paddle::experimental::copy_to(gpu_tensor, phi::CPUPlace(), true);
auto out_data = cpu_tensor.data<float>();
EXPECT_NEAR(out_data[0], 0.02194316, 1e-6);
}
#endif #endif
} // namespace jit } // namespace jit
......
...@@ -30,8 +30,10 @@ DECLARE_string(jit_engine_type); ...@@ -30,8 +30,10 @@ DECLARE_string(jit_engine_type);
namespace paddle { namespace paddle {
namespace jit { namespace jit {
using FunctionInfoMap = using FunctionInfoMap =
std::unordered_map<std::string, std::shared_ptr<FunctionInfo>>; std::unordered_map<std::string, std::shared_ptr<FunctionInfo>>;
Layer Deserializer::operator()(const std::string& path, Layer Deserializer::operator()(const std::string& path,
const phi::Place& place) { const phi::Place& place) {
const auto& pdmodel_paths = utils::PdmodelFilePaths(path); const auto& pdmodel_paths = utils::PdmodelFilePaths(path);
...@@ -56,12 +58,12 @@ Layer Deserializer::operator()(const std::string& path, ...@@ -56,12 +58,12 @@ Layer Deserializer::operator()(const std::string& path,
info_map[func_name]->SetProgramFilePath(it.second); info_map[func_name]->SetProgramFilePath(it.second);
} }
VariableMap params_dict; auto params_dict = std::make_shared<VariableMap>();
VariableMap attrs_dict; auto attrs_dict = std::make_shared<VariableMap>();
ReadTensorData(path + PDPARAMS_SUFFIX, param_names_set, place, &params_dict); ReadTensorData(path + PDPARAMS_SUFFIX, param_names_set, place, params_dict);
if (utils::FileExists(path + PROPERTY_SUFFIX)) { if (utils::FileExists(path + PROPERTY_SUFFIX)) {
ReadAttributeData(path + PROPERTY_SUFFIX, &attrs_dict); ReadAttributeData(path + PROPERTY_SUFFIX, attrs_dict);
VLOG(3) << "Read Property Success!"; VLOG(3) << "Read Property Success!";
} }
...@@ -88,10 +90,11 @@ Layer Deserializer::operator()(const std::string& path, ...@@ -88,10 +90,11 @@ Layer Deserializer::operator()(const std::string& path,
return layer; return layer;
} }
void Deserializer::ReadTensorData(const std::string& file_name, void Deserializer::ReadTensorData(
const std::set<std::string>& var_name, const std::string& file_name,
const phi::Place& place, const std::set<std::string>& var_name,
VariableMap* params_dict) const { const phi::Place& place,
std::shared_ptr<VariableMap> params_dict) const {
VLOG(3) << "ReadTensorData from: " << file_name; VLOG(3) << "ReadTensorData from: " << file_name;
std::ifstream fin(file_name, std::ios::binary); std::ifstream fin(file_name, std::ios::binary);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
...@@ -106,12 +109,15 @@ void Deserializer::ReadTensorData(const std::string& file_name, ...@@ -106,12 +109,15 @@ void Deserializer::ReadTensorData(const std::string& file_name,
} }
} }
void Deserializer::ReadAttributeData(const std::string& file_path, void Deserializer::ReadAttributeData(
VariableMap* attrs_dict) const { const std::string& file_path,
std::shared_ptr<VariableMap> attrs_dict) const {
VLOG(3) << "ReadPropertyData from: " << file_path; VLOG(3) << "ReadPropertyData from: " << file_path;
Property p; Property p;
p.Deserialization(file_path); p.Deserialization(file_path);
*attrs_dict = static_cast<VariableMap>(p.Values()); for (auto& it : p.Values()) {
attrs_dict->emplace(it.first, it.second);
}
return; return;
} }
......
...@@ -55,11 +55,11 @@ class Deserializer { ...@@ -55,11 +55,11 @@ class Deserializer {
void ReadTensorData(const std::string& file_name, void ReadTensorData(const std::string& file_name,
const std::set<std::string>& var_name, const std::set<std::string>& var_name,
const phi::Place& place, const phi::Place& place,
VariableMap* params_dict) const; std::shared_ptr<VariableMap> params_dict) const;
// property pb // property pb
void ReadAttributeData(const std::string& file_path, void ReadAttributeData(const std::string& file_path,
VariableMap* attrs_dict) const; std::shared_ptr<VariableMap> attrs_dict) const;
// void ReadExtraInfo(const std::string& file_name) const; // void ReadExtraInfo(const std::string& file_name) const;
......
...@@ -177,26 +177,6 @@ $$out = \min(\max(0, x), threshold)$$ ...@@ -177,26 +177,6 @@ $$out = \min(\max(0, x), threshold)$$
} }
}; };
class PowOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "Input of Pow operator");
AddInput("FactorTensor",
"(Tensor<float>, optional). If provided, pow will use this"
"The shape of FactorTensor MUST BE [1]."
"it has higher priority than attr(factor).")
.AsDispensable();
AddOutput("Out", "Output of Pow operator");
AddAttr<float>("factor", "The exponential factor of Pow").SetDefault(1.0f);
AddComment(R"DOC(
Pow Activation Operator.
$$out = x^{factor}$$
)DOC");
}
};
class STanhOpMaker : public framework::OpProtoAndCheckerMaker { class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
...@@ -403,138 +383,6 @@ DECLARE_INPLACE_OP_INFERER(ActivationDoubleGradOpInplaceInferer, ...@@ -403,138 +383,6 @@ DECLARE_INPLACE_OP_INFERER(ActivationDoubleGradOpInplaceInferer,
DECLARE_INPLACE_OP_INFERER(ActivationTripleGradOpInplaceInferer, DECLARE_INPLACE_OP_INFERER(ActivationTripleGradOpInplaceInferer,
{"DDX", "D_DOut"}); {"DDX", "D_DOut"});
template <typename T>
class PowGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("pow_grad");
op->SetInput("X", this->Input("X"));
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetOutput(framework ::GradVarName("X"), this->InputGrad("X"));
op->SetInput("FactorTensor", this->Input("FactorTensor"));
op->SetAttrMap(this->Attrs());
}
};
template <typename T>
class PowDoubleGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("pow_double_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
op->SetInput("DDX", this->OutputGrad(framework ::GradVarName("X")));
op->SetOutput("DX", this->InputGrad("X"));
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
op->SetInput("FactorTensor", this->Input("FactorTensor"));
op->SetAttrMap(this->Attrs());
}
};
template <typename T>
class PowTripleGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("pow_triple_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("DOut", this->Input("DOut"));
op->SetInput("DDX", this->Input("DDX"));
op->SetInput("D_DX", this->OutputGrad("DX"));
op->SetInput("D_DDOut", this->OutputGrad("DDOut"));
op->SetOutput("D_X", this->InputGrad("X"));
op->SetOutput("D_DOut", this->InputGrad("DOut"));
op->SetOutput("D_DDX", this->InputGrad("DDX"));
op->SetInput("FactorTensor", this->Input("FactorTensor"));
op->SetAttrMap(this->Attrs());
}
};
class PowOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out");
}
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "X");
}
phi::KernelKey GetKernelTypeForVar(
const std::string& var_name,
const phi::DenseTensor& tensor,
const phi::KernelKey& expected_kernel_type) const override {
if (var_name == "FactorTensor") {
return phi::KernelKey(phi::Backend::ALL_BACKEND,
expected_kernel_type.layout(),
expected_kernel_type.dtype());
}
return phi::KernelKey(
tensor.place(), tensor.layout(), expected_kernel_type.dtype());
}
};
class PowOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
auto out_grad_name = framework::GradVarName("Out");
ctx->ShareDim(out_grad_name, framework::GradVarName("X"));
ctx->ShareLoD(out_grad_name, framework::GradVarName("X"));
}
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, framework::GradVarName("Out"));
}
phi::KernelKey GetKernelTypeForVar(
const std::string& var_name,
const phi::DenseTensor& tensor,
const phi::KernelKey& expected_kernel_type) const override {
if (var_name == "FactorTensor") {
return phi::KernelKey(phi::Backend::ALL_BACKEND,
expected_kernel_type.layout(),
expected_kernel_type.dtype());
}
return phi::KernelKey(
tensor.place(), tensor.layout(), expected_kernel_type.dtype());
}
};
class PowOpDoubleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "X");
}
};
class PowOpTripleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "X");
}
};
DECLARE_INPLACE_OP_INFERER(ActFwdInplaceInferer, {"X", "Out"}); DECLARE_INPLACE_OP_INFERER(ActFwdInplaceInferer, {"X", "Out"});
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -582,40 +430,6 @@ REGISTER_ACTIVATION_OP(hard_swish, ...@@ -582,40 +430,6 @@ REGISTER_ACTIVATION_OP(hard_swish,
HardSwishGradFunctor); HardSwishGradFunctor);
REGISTER_ACTIVATION_OP(swish, Swish, SwishFunctor, SwishGradFunctor); REGISTER_ACTIVATION_OP(swish, Swish, SwishFunctor, SwishGradFunctor);
/* ========================== pow register ============================ */
DECLARE_INFER_SHAPE_FUNCTOR(pow_double_grad,
PowDoubleGradInferShapeFunctor,
PD_INFER_META(phi::GeneralBinaryGradInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(pow_triple_grad,
PowTripleGradInferShapeFunctor,
PD_INFER_META(phi::GeneralTernaryGradInferMeta));
REGISTER_OPERATOR(
pow,
ops::PowOp,
ops::PowOpMaker,
ops::ActivationOpInferVarType,
ops::PowGradOpMaker<paddle::framework::OpDesc>,
ops::PowGradOpMaker<paddle::imperative::OpBase>,
std::conditional<ops::CanInplaceAct<ops::PowGradFunctor<float>>(),
ops::ActFwdInplaceInferer,
void>::type);
REGISTER_OPERATOR(pow_grad,
ops::PowOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::PowDoubleGradOpMaker<paddle::framework::OpDesc>,
ops::PowDoubleGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(pow_double_grad,
ops::PowOpDoubleGrad,
ops::ActivationDoubleGradOpInplaceInferer,
ops::PowTripleGradOpMaker<paddle::framework::OpDesc>,
ops::PowTripleGradOpMaker<paddle::imperative::OpBase>,
PowDoubleGradInferShapeFunctor);
REGISTER_OPERATOR(pow_triple_grad,
ops::PowOpTripleGrad,
PowTripleGradInferShapeFunctor);
/* ========================================================================== */
/* ========================== register checkpoint ===========================*/ /* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION(leaky_relu) REGISTER_OP_VERSION(leaky_relu)
.AddCheckpoint( .AddCheckpoint(
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/multiary.h"
namespace paddle {
namespace operators {
using framework::DDim;
class BroadcastTensorsOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
// Broadcast semantics enforces all input variables having the same
// DataType/VarType
// This condition is also checked during VarType Inference
// Here we simply copy input type to output
return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"),
ctx.GetPlace());
}
};
class BroadcastTensorsOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput(
"X",
"A Varaible list. The shape and data type of the list elements"
"should be consistent. Variable can be multi-dimensional Tensor"
"or phi::DenseTensor, and data types can be: bool, float16, float32, "
"float64, int32, "
"int64.")
.AsDuplicable();
AddOutput("Out",
"the sum of input :code:`x`. its shape and data types are "
"consistent with :code:`x`.")
.AsDuplicable();
AddComment(
R"DOC(This OP is used to broadcast a vector of inputs
with phi::DenseTensor type, following broadcast semantics.)DOC");
}
};
class BroadcastTensorsOpVarTypeInference : public framework::VarTypeInference {
public:
void operator()(framework::InferVarTypeContext* ctx) const override {
// We need at least two tensors to satisfy broadcast semantics
size_t input_size = ctx->InputSize("X");
PADDLE_ENFORCE_GT(
input_size,
0,
platform::errors::InvalidArgument(
"BroadcastTensorsOp should have at least one input variables,"
"but only received %d ",
input_size));
// BroadcastTensorsOp takes a vector of variables named "X"
// Here we loop through input variables,
// and check if their DataType/VarType are the same
auto var_type = ctx->GetInputType("X", 0);
auto data_type = ctx->GetInputDataType("X", 0);
for (size_t ind = 1; ind < input_size; ind++) {
auto cur_var_type = ctx->GetInputType("X", ind);
PADDLE_ENFORCE_EQ(
var_type,
cur_var_type,
platform::errors::InvalidArgument(
"inputs to BroadcastTensorsOp should have the same variable type,"
"but detected %d v.s %d ",
framework::ToTypeName(var_type),
framework::ToTypeName(cur_var_type)));
auto cur_data_type = ctx->GetInputDataType("X", ind);
PADDLE_ENFORCE_EQ(
data_type,
cur_data_type,
platform::errors::InvalidArgument(
"inputs to BroadcastTensorsOp should have the same data type,"
"but detected %d v.s %d ",
framework::ToTypeName(var_type),
framework::ToTypeName(cur_var_type)));
}
// Outputs having the same DataType/VarType as inputs
ctx->SetOutputType("Out", var_type, framework::ALL_ELEMENTS);
ctx->SetOutputDataType("Out", data_type, framework::ALL_ELEMENTS);
}
};
/* ------ BroadcastTensorsGradOp ------ */
class BroadcastTensorsGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasOutputs(framework::GradVarName("X")),
"Output",
"X@grad",
"broadcast_tensors");
OP_INOUT_CHECK(ctx->HasInputs("X"), "Input", "X", "broadcast_tensors");
OP_INOUT_CHECK(ctx->HasInputs(framework::GradVarName("Out")),
"Input",
"Out@grad",
"broadcast_tensors");
const auto& forward_input_dims = ctx->GetInputsDim("X");
ctx->SetOutputsDim(framework::GradVarName("X"), forward_input_dims);
ctx->ShareAllLoD("X", /*->*/ framework::GradVarName("X"));
}
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Out")),
ctx.device_context().GetPlace());
}
};
template <typename T>
class BroadcastTensorsGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> grad_op) const override {
grad_op->SetType("broadcast_tensors_grad");
// We need "X" only for backward shape inference
grad_op->SetInput("X", this->Input("X"));
grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
grad_op->SetOutput(framework::GradVarName("X"),
this->InputGrad("X", /* drop_empty_grad */ false));
grad_op->SetAttrMap(this->Attrs());
}
};
class BroadcastTensorsGradOpVarTypeInference
: public framework::VarTypeInference {
public:
void operator()(framework::InferVarTypeContext* ctx) const override {
auto var_type = ctx->GetInputType("X", 0);
auto data_type = ctx->GetInputDataType("X", 0);
ctx->SetOutputType(
framework::GradVarName("X"), var_type, framework::ALL_ELEMENTS);
ctx->SetOutputDataType(
framework::GradVarName("X"), data_type, framework::ALL_ELEMENTS);
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERER(BroadcastTensorsGradNoNeedBufVarsInferer,
"X");
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
DECLARE_INFER_SHAPE_FUNCTOR(broadcast_tensors,
BroadcastTensorsInferShapeFunctor,
PD_INFER_META(phi::BroadcastTensorsInferMeta));
REGISTER_OPERATOR(broadcast_tensors,
ops::BroadcastTensorsOp,
ops::BroadcastTensorsOpMaker,
ops::BroadcastTensorsGradOpMaker<paddle::framework::OpDesc>,
ops::BroadcastTensorsGradOpMaker<paddle::imperative::OpBase>,
ops::BroadcastTensorsOpVarTypeInference,
BroadcastTensorsInferShapeFunctor);
REGISTER_OPERATOR(broadcast_tensors_grad,
ops::BroadcastTensorsGradOp,
ops::BroadcastTensorsGradOpVarTypeInference,
ops::BroadcastTensorsGradNoNeedBufVarsInferer);
...@@ -119,12 +119,16 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph, ...@@ -119,12 +119,16 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph,
// collect variables name list to be skipped in GC // collect variables name list to be skipped in GC
skip_eager_vars_.reserve(input_var_names.size() + output_var_names.size()); skip_eager_vars_.reserve(input_var_names.size() + output_var_names.size());
auto add_skip_var_fn = [&outer_varinfo, this](const std::string& var_name) { auto add_skip_var_fn = [&outer_varinfo, this](const std::string& var_name) {
// Always consider Input/Output of Graph as skip_gc_vars, because
// InterpreterCore has no eager_deletion_op to deal with it.
VLOG(4) << "Append a skip_gc_var for InterpreterCore:" << var_name;
skip_gc_vars_.insert(var_name);
// if a var exists at the outer_varinfo map, that means it will be // if a var exists at the outer_varinfo map, that means it will be
// erased by the following eager_deletion_op of current cinn_launch op // erased by the following eager_deletion_op of current cinn_launch op
if (!outer_varinfo.count(var_name)) { if (!outer_varinfo.count(var_name)) {
skip_eager_vars_.emplace_back(var_name); skip_eager_vars_.emplace_back(var_name);
skip_gc_vars_.insert(var_name); VLOG(4) << "Append a skip_gc_var for PE:" << var_name;
VLOG(4) << "Append a skip_gc_var:" << var_name;
} }
}; };
std::for_each( std::for_each(
......
type: "fused_matmul"
def {
inputs {
name: "X"
}
inputs {
name: "Y"
}
inputs {
name: "ResidualData"
}
outputs {
name: "Out"
}
attrs {
name: "trans_x"
type: BOOLEAN
}
attrs {
name: "trans_y"
type: BOOLEAN
}
}
extra {
attrs {
name: "matmul_alpha"
type: FLOAT
}
attrs {
name: "fuse_activation"
type: STRING
}
attrs {
name: "fuse_alpha"
type: FLOAT
}
attrs {
name: "fuse_beta"
type: FLOAT
}
attrs {
name: "fused_output_scale"
type: FLOAT
}
attrs {
name: "fused_reshape_X"
type: INTS
}
attrs {
name: "fused_transpose_X"
type: INTS
}
attrs {
name: "fused_reshape_Y"
type: INTS
}
attrs {
name: "fused_transpose_Y"
type: INTS
}
attrs {
name: "fused_reshape_Out"
type: INTS
}
attrs {
name: "fused_transpose_Out"
type: INTS
}
attrs {
name: "mkldnn_data_type"
type: STRING
}
attrs {
name: "Scale_x"
type: FLOAT
}
attrs {
name: "Scale_y"
type: FLOAT
}
attrs {
name: "Scale_in_eltwise"
type: FLOAT
}
attrs {
name: "Scale_out"
type: FLOAT
}
attrs {
name: "force_fp32_output"
type: BOOLEAN
}
}
...@@ -39,28 +39,4 @@ extra { ...@@ -39,28 +39,4 @@ extra {
name: "op_device" name: "op_device"
type: STRING type: STRING
} }
attrs {
name: "fused_reshape_X"
type: INTS
}
attrs {
name: "fused_reshape_Y"
type: INTS
}
attrs {
name: "fused_transpose_X"
type: INTS
}
attrs {
name: "fused_transpose_Y"
type: INTS
}
attrs {
name: "fused_reshape_Out"
type: INTS
}
attrs {
name: "fused_transpose_Out"
type: INTS
}
} }
...@@ -267,44 +267,6 @@ PD_REGISTER_GENERAL_KERNEL( ...@@ -267,44 +267,6 @@ PD_REGISTER_GENERAL_KERNEL(
ALL_LAYOUT, ALL_LAYOUT,
paddle::operators::FeedStringsKernel<phi::XPUContext>, paddle::operators::FeedStringsKernel<phi::XPUContext>,
ALL_DTYPE) {} ALL_DTYPE) {}
#elif defined(PADDLE_WITH_ASCEND_CL)
PD_REGISTER_GENERAL_KERNEL(
feed_dense_tensor,
npu,
ALL_LAYOUT,
paddle::operators::FeedDenseTensorKernel<phi::CustomContext>,
ALL_DTYPE) {}
PD_REGISTER_GENERAL_KERNEL(
feed_sparse_coo_tensor,
npu,
ALL_LAYOUT,
paddle::operators::FeedSparseCooTensorKernel<phi::CustomContext>,
ALL_DTYPE) {}
PD_REGISTER_GENERAL_KERNEL(
feed_strings,
npu,
ALL_LAYOUT,
paddle::operators::FeedStringsKernel<phi::CustomContext>,
ALL_DTYPE) {}
#elif defined(PADDLE_WITH_MLU)
PD_REGISTER_GENERAL_KERNEL(
feed_dense_tensor,
CustomMLU,
ALL_LAYOUT,
paddle::operators::FeedDenseTensorKernel<phi::CustomContext>,
ALL_DTYPE) {}
PD_REGISTER_GENERAL_KERNEL(
feed_sparse_coo_tensor,
CustomMLU,
ALL_LAYOUT,
paddle::operators::FeedSparseCooTensorKernel<phi::CustomContext>,
ALL_DTYPE) {}
PD_REGISTER_GENERAL_KERNEL(
feed_strings,
CustomMLU,
ALL_LAYOUT,
paddle::operators::FeedStringsKernel<phi::CustomContext>,
ALL_DTYPE) {}
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
namespace paddle { namespace paddle {
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h" #include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h"
namespace paddle { namespace paddle {
...@@ -67,7 +67,7 @@ class ElementwiseAddCompositeGradOpMaker ...@@ -67,7 +67,7 @@ class ElementwiseAddCompositeGradOpMaker
auto dy_ptr = this->GetOutputPtr(&dy); auto dy_ptr = this->GetOutputPtr(&dy);
std::string dy_name = this->GetOutputName(dy); std::string dy_name = this->GetOutputName(dy);
int axis = static_cast<int>(this->Attr<int>("axis")); int axis = static_cast<int>(this->Attr<int>("axis"));
VLOG(3) << "Runing add_grad composite func"; VLOG(6) << "Runing add_grad composite func";
prim::add_grad<prim::DescTensor>(x, y, out_grad, axis, dx_ptr, dy_ptr); prim::add_grad<prim::DescTensor>(x, y, out_grad, axis, dx_ptr, dy_ptr);
this->RecoverOutputName(dx, dx_name); this->RecoverOutputName(dx, dx_name);
this->RecoverOutputName(dy, dy_name); this->RecoverOutputName(dy, dy_name);
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h" #include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h"
namespace paddle { namespace paddle {
...@@ -84,7 +84,7 @@ class ElementwiseDivCompositeGradOpMaker ...@@ -84,7 +84,7 @@ class ElementwiseDivCompositeGradOpMaker
auto dy_ptr = this->GetOutputPtr(&dy); auto dy_ptr = this->GetOutputPtr(&dy);
std::string dy_name = this->GetOutputName(dy); std::string dy_name = this->GetOutputName(dy);
int axis = static_cast<int>(this->Attr<int>("axis")); int axis = static_cast<int>(this->Attr<int>("axis"));
VLOG(3) << "Runing div_grad composite func"; VLOG(6) << "Runing div_grad composite func";
prim::divide_grad<prim::DescTensor>( prim::divide_grad<prim::DescTensor>(
x, y, out, out_grad, axis, dx_ptr, dy_ptr); x, y, out, out_grad, axis, dx_ptr, dy_ptr);
this->RecoverOutputName(dx, dx_name); this->RecoverOutputName(dx, dx_name);
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h" #include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h"
...@@ -88,7 +88,7 @@ class ElementwiseMulCompositeGradOpMaker ...@@ -88,7 +88,7 @@ class ElementwiseMulCompositeGradOpMaker
static_cast<int>(this->Attr<int>("axis")), static_cast<int>(this->Attr<int>("axis")),
x_grad_p, x_grad_p,
y_grad_p); y_grad_p);
VLOG(3) << "Runing mul_grad composite func"; VLOG(6) << "Runing mul_grad composite func";
this->RecoverOutputName(x_grad, x_grad_name); this->RecoverOutputName(x_grad, x_grad_name);
this->RecoverOutputName(y_grad, y_grad_name); this->RecoverOutputName(y_grad, y_grad_name);
} }
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h" #include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h"
namespace paddle { namespace paddle {
...@@ -70,7 +70,7 @@ class ElementwiseSubCompositeGradOpMaker ...@@ -70,7 +70,7 @@ class ElementwiseSubCompositeGradOpMaker
auto dy_ptr = this->GetOutputPtr(&dy); auto dy_ptr = this->GetOutputPtr(&dy);
std::string dy_name = this->GetOutputName(dy); std::string dy_name = this->GetOutputName(dy);
int axis = static_cast<int>(this->Attr<int>("axis")); int axis = static_cast<int>(this->Attr<int>("axis"));
VLOG(3) << "Runing sub_grad composite func"; VLOG(6) << "Runing sub_grad composite func";
prim::subtract_grad<prim::DescTensor>(x, y, out_grad, axis, dx_ptr, dy_ptr); prim::subtract_grad<prim::DescTensor>(x, y, out_grad, axis, dx_ptr, dy_ptr);
this->RecoverOutputName(dx, dx_name); this->RecoverOutputName(dx, dx_name);
this->RecoverOutputName(dy, dy_name); this->RecoverOutputName(dy, dy_name);
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h" #include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h"
#include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/infermeta_utils.h"
...@@ -206,7 +206,7 @@ class ExpandV2CompositeGradOpMaker : public prim::CompositeGradOpMakerBase { ...@@ -206,7 +206,7 @@ class ExpandV2CompositeGradOpMaker : public prim::CompositeGradOpMakerBase {
auto shape = this->Attr<std::vector<int>>("shape"); auto shape = this->Attr<std::vector<int>>("shape");
prim::expand_grad<prim::DescTensor>( prim::expand_grad<prim::DescTensor>(
x, out_grad, paddle::experimental::IntArray(shape), x_grad_p); x, out_grad, paddle::experimental::IntArray(shape), x_grad_p);
VLOG(3) << "Runing expand_v2 composite func"; VLOG(6) << "Runing expand_v2 composite func";
this->RecoverOutputName(x_grad, x_grad_name); this->RecoverOutputName(x_grad, x_grad_name);
} }
}; };
......
...@@ -375,7 +375,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -375,7 +375,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("BiasDropoutResidualOut", AddOutput("BiasDropoutResidualOut",
"Result of residual + dropout(src + bias).") "Result of residual + dropout(src + bias).")
.AsIntermediate(); .AsIntermediate();
AddOutput("CacheKVOut", "The udpated cache KV."); AddOutput("CacheKVOut", "The udpated cache KV.").AsDispensable();
AddOutput("Y", "Result after attention."); AddOutput("Y", "Result after attention.");
AddAttr<int>("num_heads", "The number head for multi_head_attention.") AddAttr<int>("num_heads", "The number head for multi_head_attention.")
......
...@@ -35,16 +35,17 @@ class FusedBiasDropoutResidualLnOp : public framework::OperatorWithKernel { ...@@ -35,16 +35,17 @@ class FusedBiasDropoutResidualLnOp : public framework::OperatorWithKernel {
"Output", "Output",
"LnVariance", "LnVariance",
"FusedBiasDropoutResidualLnOp"); "FusedBiasDropoutResidualLnOp");
OP_INOUT_CHECK(ctx->HasOutput("BiasDropoutResidualOut"),
"Output",
"BiasDropoutResidualOut",
"FusedBiasDropoutResidualLnOp");
OP_INOUT_CHECK(ctx->HasOutput("DropoutMaskOut"), OP_INOUT_CHECK(ctx->HasOutput("DropoutMaskOut"),
"Output", "Output",
"DropoutMaskOut", "DropoutMaskOut",
"FusedBiasDropoutResidualLnOp"); "FusedBiasDropoutResidualLnOp");
OP_INOUT_CHECK(ctx->HasOutput("BiasDropoutResidualOut"),
"Output",
"BiasDropoutResidualOut",
"FusedBiasDropoutResidualLnOp");
OP_INOUT_CHECK( OP_INOUT_CHECK(
ctx->HasOutput("Y"), "Output", "Y", "FusedBiasDropoutResidualLnOp"); ctx->HasOutput("Y"), "Output", "Y", "FusedBiasDropoutResidualLnOp");
auto x_dim = ctx->GetInputDim("X"); auto x_dim = ctx->GetInputDim("X");
int left = 1; int left = 1;
for (int i = 0; i < x_dim.size() - 1; i++) { for (int i = 0; i < x_dim.size() - 1; i++) {
......
...@@ -54,8 +54,12 @@ class FusedBiasDropoutResidualLnOpKernel : public framework::OpKernel<T> { ...@@ -54,8 +54,12 @@ class FusedBiasDropoutResidualLnOpKernel : public framework::OpKernel<T> {
auto *ln_mean_data = auto *ln_mean_data =
dev_ctx.Alloc<U>(ln_mean, ln_mean->numel() * sizeof(U)); dev_ctx.Alloc<U>(ln_mean, ln_mean->numel() * sizeof(U));
auto *ln_var_data = dev_ctx.Alloc<U>(ln_var, ln_var->numel() * sizeof(U)); auto *ln_var_data = dev_ctx.Alloc<U>(ln_var, ln_var->numel() * sizeof(U));
auto *dropout_mask_out_data = dev_ctx.Alloc<uint8_t>( auto *dropout_mask_out_data =
dropout_mask_out, dropout_mask_out->numel() * sizeof(uint8_t)); (dropout_mask_out == nullptr)
? nullptr
: dev_ctx.Alloc<uint8_t>(
dropout_mask_out,
dropout_mask_out->numel() * sizeof(uint8_t));
auto *y_data = dev_ctx.Alloc<T>(y, y->numel() * sizeof(T)); auto *y_data = dev_ctx.Alloc<T>(y, y->numel() * sizeof(T));
const auto input_x_dims = input_x->dims(); const auto input_x_dims = input_x->dims();
......
...@@ -854,9 +854,10 @@ void LaunchLayernormResidualDropoutBias( ...@@ -854,9 +854,10 @@ void LaunchLayernormResidualDropoutBias(
residual, residual,
rows * cols * sizeof(T), rows * cols * sizeof(T),
ctx.stream()); ctx.stream());
PADDLE_ENFORCE_GPU_SUCCESS(cudaMemsetAsync( if (mask_data != nullptr) {
mask_data, 0, rows * cols * sizeof(MaskType), ctx.stream())); PADDLE_ENFORCE_GPU_SUCCESS(cudaMemsetAsync(
mask_data, 0, rows * cols * sizeof(MaskType), ctx.stream()));
}
// call layernorm forward // call layernorm forward
switch (GetDesiredBlockDim(cols)) { switch (GetDesiredBlockDim(cols)) {
FIXED_BLOCK_DIM_CASE( FIXED_BLOCK_DIM_CASE(
......
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/matmul_v2_op.h"
namespace paddle {
namespace operators {
static std::vector<int64_t> GetInputShape(phi::DDim dim,
std::vector<int> shape,
std::vector<int> axis) {
PADDLE_ENFORCE_GT(dim.size(),
0,
phi::errors::InvalidArgument(
"The Input(%s) has not been initialized properly. The "
"shape of Input(%s) = [%s].",
dim));
auto is_input_fused = (!shape.empty() && !axis.empty());
if (is_input_fused) {
dim = dim.reshape(shape).transpose(axis);
}
return phi::vectorize(dim);
}
class FusedMatmulOp : public MatMulV2Op {
public:
using MatMulV2Op::MatMulV2Op;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "fused_matmul");
OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "fused_matmul");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "fused_matmul");
bool trans_x = ctx->Attrs().Get<bool>("trans_x");
bool trans_y = ctx->Attrs().Get<bool>("trans_y");
std::vector<int64_t> dims_x =
GetInputShape(ctx->GetInputDim("X"),
ctx->Attrs().Get<std::vector<int>>("fused_reshape_X"),
ctx->Attrs().Get<std::vector<int>>("fused_transpose_X"));
std::vector<int64_t> dims_y =
GetInputShape(ctx->GetInputDim("Y"),
ctx->Attrs().Get<std::vector<int>>("fused_reshape_Y"),
ctx->Attrs().Get<std::vector<int>>("fused_transpose_Y"));
auto ndims_x = dims_x.size();
auto ndims_y = dims_y.size();
PADDLE_ENFORCE_GT(ndims_x,
0,
phi::errors::InvalidArgument(
"The Input(X) dims size must be greater than 0,"
" but received dims size is 0. "));
PADDLE_ENFORCE_GT(ndims_y,
0,
phi::errors::InvalidArgument(
"The Input(Y) dims size must be greater than 0,"
" but received dims size is 0. "));
bool x_broadcasted = false;
bool y_broadcasted = false;
if (ndims_x == 1) {
dims_x.insert(dims_x.begin(), 1);
ndims_x = 2;
x_broadcasted = true;
}
if (ndims_y == 1) {
dims_y.push_back(1);
ndims_y = 2;
y_broadcasted = true;
}
size_t M, N;
if (trans_x) {
M = dims_x[ndims_x - 1];
} else {
M = dims_x[ndims_x - 2];
}
if (trans_y) {
N = dims_y[ndims_y - 2];
} else {
N = dims_y[ndims_y - 1];
}
std::vector<int64_t> new_dims;
if (ndims_x > ndims_y) {
new_dims.assign(dims_x.begin(), dims_x.end() - 2);
} else if (ndims_x < ndims_y) {
new_dims.assign(dims_y.begin(), dims_y.end() - 2);
} else {
new_dims.reserve(ndims_x);
for (size_t i = 0; i < ndims_x - 2; ++i) {
new_dims.push_back(std::max(dims_x[i], dims_y[i]));
}
}
if (!x_broadcasted) {
new_dims.push_back(M);
}
if (!y_broadcasted) {
new_dims.push_back(N);
}
if (x_broadcasted && y_broadcasted) {
new_dims.push_back(1);
}
auto ddim_out = phi::make_ddim(new_dims);
auto shape = ctx->Attrs().Get<std::vector<int>>("fused_reshape_Out");
auto axis = ctx->Attrs().Get<std::vector<int>>("fused_transpose_Out");
auto is_output_fused = (!shape.empty() && !axis.empty());
if (is_output_fused) {
ddim_out = ddim_out.transpose(axis).reshape(shape);
}
ctx->SetOutputDim("Out", ddim_out);
ctx->ShareLoD("X", "Out");
}
};
class FusedMatmulOpMaker : public MatMulV2OpMaker {
protected:
void Apply() override {
AddInput("ResidualData",
"Extra input from matmul_elementwise_add_mkldnn_fuse_pass")
.AsDispensable()
.AsExtra();
AddAttr<float>("matmul_alpha", "Output scale used in matmul_v1")
.SetDefault(1.0f);
AddAttr<std::string>(
"fuse_activation",
"Activation type from matmul_activation_mkldnn_fuse_pass")
.SetDefault("");
AddAttr<float>("fuse_alpha",
"Activation alpha from matmul_activation_mkldnn_fuse_pass")
.SetDefault(0.0f);
AddAttr<float>("fuse_beta",
"Activation beta from matmul_activation_mkldnn_fuse_pass")
.SetDefault(0.0f);
AddAttr<float>("fused_output_scale",
"Output scale from operator_scale_onednn_fuse_pass")
.SetDefault(1.0f);
AddAttr<std::vector<int>>("fused_reshape_X",
"Reshape's shape attribute from "
"reshape_transpose_matmul_mkldnn_fuse_pass")
.SetDefault({});
AddAttr<std::vector<int>>("fused_transpose_X",
"Transpose's axis attribute from "
"reshape_transpose_matmul_mkldnn_fuse_pass")
.SetDefault({});
AddAttr<std::vector<int>>("fused_reshape_Y",
"Reshape's shape attribute from "
"reshape_transpose_matmul_mkldnn_fuse_pass")
.SetDefault({});
AddAttr<std::vector<int>>("fused_transpose_Y",
"Transpose's axis attribute from "
"reshape_transpose_matmul_mkldnn_fuse_pass")
.SetDefault({});
AddAttr<std::vector<int>>("fused_reshape_Out",
"Reshape's shape attribute from "
"matmul_transpose_reshape_mkldnn_fuse_pass")
.SetDefault({});
AddAttr<std::vector<int>>("fused_transpose_Out",
"Transpose's axis attribute from "
"matmul_transpose_reshape_mkldnn_fuse_pass")
.SetDefault({});
AddAttr<std::string>("mkldnn_data_type", "oneDNN operator data type")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
AddAttr<float>("Scale_x", "Matmul X input quantization scale")
.SetDefault(1.0f);
AddAttr<float>("Scale_y", "Matmul Y input quantization scale")
.SetDefault(1.0f);
AddAttr<float>("Scale_in_eltwise", "Matmul ResidualData quantization scale")
.SetDefault(0.0f);
AddAttr<float>("Scale_out", "Matmul output quantization scale")
.SetDefault(1.0f);
AddAttr<bool>("force_fp32_output",
"Flag determining if output should be converted to FP32")
.SetDefault(false);
AddComment(
R"DOC(Matrix multiplication extended with oneDNN-specific fusion logic.)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
fused_matmul,
ops::FusedMatmulOp,
ops::FusedMatmulOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h" #include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h"
#include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/infermeta_utils.h"
......
...@@ -665,7 +665,7 @@ class {{op_name | to_composite_grad_opmaker_name}} : public prim::CompositeGradO ...@@ -665,7 +665,7 @@ class {{op_name | to_composite_grad_opmaker_name}} : public prim::CompositeGradO
{%- endmacro %} {%- endmacro %}
{% macro call_composite_backward_api(composite_func_info) %} {% macro call_composite_backward_api(composite_func_info) %}
VLOG(3) << "Runing {{composite_func_info["func_name"]}} composite func"; VLOG(6) << "Runing {{composite_func_info["func_name"]}} composite func";
prim::{{composite_func_info["func_name"]}}<prim::DescTensor>({{composite_func_info["func_args"]}}); prim::{{composite_func_info["func_name"]}}<prim::DescTensor>({{composite_func_info["func_args"]}});
{%- endmacro %} {%- endmacro %}
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -24,168 +24,131 @@ ...@@ -24,168 +24,131 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
static framework::DDim GetDimForInput(const framework::InferShapeContext& ctx, void MatMulV2Op::InferShape(framework::InferShapeContext* ctx) const {
const std::string input_name) { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "matmul_v2");
auto shape = ctx.Attrs().Get<std::vector<int>>("fused_reshape_" + input_name); OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "matmul_v2");
auto axis = OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "matmul_v2");
ctx.Attrs().Get<std::vector<int>>("fused_transpose_" + input_name); bool trans_x = ctx->Attrs().Get<bool>("trans_x");
auto dim = ctx.GetInputDim(input_name); bool trans_y = ctx->Attrs().Get<bool>("trans_y");
PADDLE_ENFORCE_GT(dim.size(), std::vector<int64_t> dims_x = phi::vectorize(ctx->GetInputDim("X"));
std::vector<int64_t> dims_y = phi::vectorize(ctx->GetInputDim("Y"));
auto ndims_x = dims_x.size();
auto ndims_y = dims_y.size();
PADDLE_ENFORCE_GT(ndims_x,
0, 0,
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The Input(%s) has not been initialized properly. The " "The Input(X) dims size must be greater than 0,"
"shape of Input(%s) = [%s].", " but received dims size is 0. "));
dim)); PADDLE_ENFORCE_GT(ndims_y,
0,
if (!shape.empty() && !axis.empty()) { phi::errors::InvalidArgument(
dim = dim.reshape(shape).transpose(axis); "The Input(Y) dims size must be greater than 0,"
} " but received dims size is 0. "));
return dim;
}
class MatMulV2Op : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "matmul_v2");
OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "matmul_v2");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "matmul_v2");
bool trans_x = ctx->Attrs().Get<bool>("trans_x");
bool trans_y = ctx->Attrs().Get<bool>("trans_y");
std::vector<int64_t> dims_x = phi::vectorize(GetDimForInput(*ctx, "X"));
std::vector<int64_t> dims_y = phi::vectorize(GetDimForInput(*ctx, "Y"));
auto ndims_x = dims_x.size();
auto ndims_y = dims_y.size();
PADDLE_ENFORCE_GT(ndims_x,
0,
platform::errors::InvalidArgument(
"The Input(X) dims size must be greater than 0,"
" but received dims size is 0. "));
PADDLE_ENFORCE_GT(ndims_y,
0,
platform::errors::InvalidArgument(
"The Input(Y) dims size must be greater than 0,"
" but received dims size is 0. "));
bool x_broadcasted = false, y_broadcasted = false;
if (ndims_x == 1) {
dims_x.insert(dims_x.begin(), 1);
ndims_x = 2;
x_broadcasted = true;
}
if (ndims_y == 1) {
dims_y.push_back(1);
ndims_y = 2;
y_broadcasted = true;
}
size_t M, N; bool x_broadcasted = false;
if (trans_x) { bool y_broadcasted = false;
M = dims_x[ndims_x - 1];
} else {
M = dims_x[ndims_x - 2];
}
if (trans_y) {
N = dims_y[ndims_y - 2];
} else {
N = dims_y[ndims_y - 1];
}
std::vector<int64_t> new_dims; if (ndims_x == 1) {
if (ndims_x > ndims_y) { dims_x.insert(dims_x.begin(), 1);
new_dims.assign(dims_x.begin(), dims_x.end() - 2); ndims_x = 2;
} else if (ndims_x < ndims_y) { x_broadcasted = true;
new_dims.assign(dims_y.begin(), dims_y.end() - 2); }
} else {
new_dims.reserve(ndims_x);
for (size_t i = 0; i < ndims_x - 2; ++i) {
new_dims.push_back(std::max(dims_x[i], dims_y[i]));
}
}
if (!x_broadcasted) {
new_dims.push_back(M);
}
if (!y_broadcasted) {
new_dims.push_back(N);
}
if (x_broadcasted && y_broadcasted) {
new_dims.push_back(1);
}
auto ddim_out = phi::make_ddim(new_dims); if (ndims_y == 1) {
dims_y.push_back(1);
ndims_y = 2;
y_broadcasted = true;
}
#ifdef PADDLE_WITH_MKLDNN size_t M, N;
auto shape = ctx->Attrs().Get<std::vector<int>>("fused_reshape_Out"); if (trans_x) {
auto axis = ctx->Attrs().Get<std::vector<int>>("fused_transpose_Out"); M = dims_x[ndims_x - 1];
} else {
M = dims_x[ndims_x - 2];
}
if (trans_y) {
N = dims_y[ndims_y - 2];
} else {
N = dims_y[ndims_y - 1];
}
if (!shape.empty() && !axis.empty()) { std::vector<int64_t> new_dims;
ddim_out = ddim_out.transpose(axis).reshape(shape); if (ndims_x > ndims_y) {
new_dims.assign(dims_x.begin(), dims_x.end() - 2);
} else if (ndims_x < ndims_y) {
new_dims.assign(dims_y.begin(), dims_y.end() - 2);
} else {
new_dims.reserve(ndims_x);
for (size_t i = 0; i < ndims_x - 2; ++i) {
new_dims.push_back(std::max(dims_x[i], dims_y[i]));
} }
#endif
ctx->SetOutputDim("Out", ddim_out);
ctx->ShareLoD("X", "Out");
} }
if (!x_broadcasted) {
protected: new_dims.push_back(M);
phi::KernelKey GetExpectedKernelType( }
const framework::ExecutionContext& ctx) const override { if (!y_broadcasted) {
auto input_data_type = new_dims.push_back(N);
OperatorWithKernel::IndicateOrPromoteVarDataTypes(ctx, "X", "Y"); }
return phi::KernelKey(input_data_type, ctx.GetPlace()); if (x_broadcasted && y_broadcasted) {
new_dims.push_back(1);
} }
phi::KernelKey GetKernelTypeForVar( ctx->SetOutputDim("Out", phi::make_ddim(new_dims));
const std::string& var_name, ctx->ShareLoD("X", "Out");
const phi::DenseTensor& tensor, }
const phi::KernelKey& expected_kernel_type) const override {
if (framework::IsComplexType(expected_kernel_type.dtype())) { phi::KernelKey MatMulV2Op::GetExpectedKernelType(
// only promote inputs’s types when contains complex input const framework::ExecutionContext& ctx) const {
return phi::KernelKey(tensor.place(), tensor.layout(), tensor.dtype()); auto input_data_type =
} else { OperatorWithKernel::IndicateOrPromoteVarDataTypes(ctx, "X", "Y");
return phi::KernelKey(input_data_type, ctx.GetPlace());
}
phi::KernelKey MatMulV2Op::GetKernelTypeForVar(
const std::string& var_name,
const phi::DenseTensor& tensor,
const phi::KernelKey& expected_kernel_type) const {
if (framework::IsComplexType(expected_kernel_type.dtype())) {
// only promote inputs’s types when contains complex input
return phi::KernelKey(tensor.place(), tensor.layout(), tensor.dtype());
} else {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// When matmul_v2 is first oneDNN op in a chain (there was some non oneDNN // When matmul_v2 is first oneDNN op in a chain (there was some non oneDNN
// op previously) then we also need to rotate shape NHWC -> NCWH // op previously) then we also need to rotate shape NHWC -> NCWH
if ((expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && if ((expected_kernel_type.layout() == phi::DataLayout::ONEDNN) &&
(tensor.layout() != phi::DataLayout::ONEDNN) && (tensor.layout() != phi::DataLayout::ONEDNN) &&
phi::OneDNNContext::tls().get_cur_paddle_data_layout() == phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
phi::DataLayout::kNHWC) { phi::DataLayout::kNHWC) {
return phi::KernelKey(tensor.place(),
phi::DataLayout::kNHWC,
expected_kernel_type.dtype());
}
#endif
return phi::KernelKey( return phi::KernelKey(
tensor.place(), tensor.layout(), expected_kernel_type.dtype()); tensor.place(), phi::DataLayout::kNHWC, expected_kernel_type.dtype());
} }
#endif
return phi::KernelKey(
tensor.place(), tensor.layout(), expected_kernel_type.dtype());
} }
}; }
class MatMulV2OpMaker : public framework::OpProtoAndCheckerMaker { void MatMulV2OpMaker::Make() {
public: AddInput("X", "tensor of shape (d0, d1 ... M, K)");
void Make() override { AddInput("Y", "tensor of shape (d0, d1 ... K, N)");
AddInput("X", "tensor of shape (d0, d1 ... M, K)"); AddOutput("Out", "tensor of shape (d0, d1 ... M, N)");
AddInput("Y", "tensor of shape (d0, d1 ... K, N)"); AddAttr<bool>("trans_x",
AddOutput("Out", "tensor of shape (d0, d1 ... M, N)"); "Set true to transpose the last two dimensions of X before "
AddAttr<bool>("trans_x", "doing multiplication")
"Set true to transpose the last two dimensions of X before " .SetDefault(false);
"doing multiplication") AddAttr<bool>("trans_y",
.SetDefault(false); "Set true to transpose the last two dimensions of Y before "
AddAttr<bool>("trans_y", "doing multiplication")
"Set true to transpose the last two dimensions of Y before " .SetDefault(false);
"doing multiplication") AddComment(
.SetDefault(false); R"DOC(Matrix multiplication Out = X * Y. A has shape (d0, d1 ... M, K),
AddComment(
R"DOC(Matrix multiplication Out = X * Y. A has shape (d0, d1 ... M, K),
B has shape (d0, d1 ... K, N), Out has shape ((d0, d1 ... M, N)). B has shape (d0, d1 ... K, N), Out has shape ((d0, d1 ... M, N)).
In addition, it also follows the broadcast rule which is similar as In addition, it also follows the broadcast rule which is similar as
numpy.matmul. numpy.matmul.
)DOC"); )DOC");
} Apply();
}; }
class MatMulV2OpGrad : public framework::OperatorWithKernel { class MatMulV2OpGrad : public framework::OperatorWithKernel {
public: public:
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -37,6 +37,29 @@ limitations under the License. */ ...@@ -37,6 +37,29 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class MatMulV2Op : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
phi::KernelKey GetKernelTypeForVar(
const std::string& var_name,
const phi::DenseTensor& tensor,
const phi::KernelKey& expected_kernel_type) const override;
};
class MatMulV2OpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() final;
protected:
virtual void Apply() {}
};
// Reshape a rank-3 tensor from P x M x N to (P * M) x N. // Reshape a rank-3 tensor from P x M x N to (P * M) x N.
// Identity op if the tensor is not of rank 3. // Identity op if the tensor is not of rank 3.
static phi::DenseTensor FoldInitDims(const phi::DenseTensor& input) { static phi::DenseTensor FoldInitDims(const phi::DenseTensor& input) {
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -102,12 +102,6 @@ const std::unordered_map<std::string, ExtraAttrPropertySet> ...@@ -102,12 +102,6 @@ const std::unordered_map<std::string, ExtraAttrPropertySet>
{"fused_output_scale", ExtraAttrProperty::ONEDNN}, {"fused_output_scale", ExtraAttrProperty::ONEDNN},
{"fuse_residual_connection", ExtraAttrProperty::ONEDNN}, {"fuse_residual_connection", ExtraAttrProperty::ONEDNN},
{"fuse_with_relu", ExtraAttrProperty::ONEDNN}, {"fuse_with_relu", ExtraAttrProperty::ONEDNN},
{"fused_reshape_Out", ExtraAttrProperty::ONEDNN},
{"fused_transpose_Out", ExtraAttrProperty::ONEDNN},
{"fused_reshape_X", ExtraAttrProperty::ONEDNN},
{"fused_reshape_Y", ExtraAttrProperty::ONEDNN},
{"fused_transpose_X", ExtraAttrProperty::ONEDNN},
{"fused_transpose_Y", ExtraAttrProperty::ONEDNN},
{"mkldnn_data_type", ExtraAttrProperty::ONEDNN}, {"mkldnn_data_type", ExtraAttrProperty::ONEDNN},
{"scale_x", ExtraAttrProperty::ONEDNN}, {"scale_x", ExtraAttrProperty::ONEDNN},
{"scale_y", ExtraAttrProperty::ONEDNN}, {"scale_y", ExtraAttrProperty::ONEDNN},
...@@ -226,8 +220,7 @@ class ExtraInfoUtils { ...@@ -226,8 +220,7 @@ class ExtraInfoUtils {
std::unordered_map<std::string, std::vector<std::string>> std::unordered_map<std::string, std::vector<std::string>>
g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}}, g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}},
{"conv2d_transpose", {"Bias"}}, {"conv2d_transpose", {"Bias"}},
{"conv2d_grad", {"Bias"}}, {"conv2d_grad", {"Bias"}}};
{"matmul_v2", {"ResidualData"}}};
std::vector<std::string> empty_extra_input_names_; std::vector<std::string> empty_extra_input_names_;
}; };
......
...@@ -27,6 +27,11 @@ inline int MaxPoolOutputSize(int input_size, ...@@ -27,6 +27,11 @@ inline int MaxPoolOutputSize(int input_size,
int filter_size, int filter_size,
int padding, int padding,
int stride) { int stride) {
PADDLE_ENFORCE_NE(
stride,
0,
phi::errors::InvalidArgument(
"The stride of MaxPool shall not be 0, but received %d.", stride));
int output_size = (input_size - filter_size + 2 * padding) / stride + 1; int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
return output_size; return output_size;
} }
......
...@@ -141,8 +141,22 @@ void HandleLargeDim(const framework::ExecutionContext& context, ...@@ -141,8 +141,22 @@ void HandleLargeDim(const framework::ExecutionContext& context,
// transpose to 2D tensor whose shape is {unreduced, reduced}. // transpose to 2D tensor whose shape is {unreduced, reduced}.
const int64_t unreduced = output->numel(); const int64_t unreduced = output->numel();
const int64_t reduced = shuffled_input.numel() / unreduced; const int64_t input_numel = shuffled_input.numel();
// assume: 0 / 0 == 0, which allow process 0 dim tensor
const int64_t reduced = (unreduced != 0) ? (input_numel / unreduced) : 0;
PADDLE_ENFORCE_EQ(
unreduced * reduced,
input_numel,
phi::errors::InvalidArgument(
"Reducing failed in HandleLargeDim, when try to transpose (%d) "
"operands into 2D tensor with shape (%d, %d).",
input_numel,
unreduced,
reduced));
shuffled_input.Resize({unreduced, reduced}); shuffled_input.Resize({unreduced, reduced});
DDim output_dim = output->dims(); DDim output_dim = output->dims();
output->Resize({unreduced}); output->Resize({unreduced});
paddle::operators::ReduceFunctor<DeviceContext, OutT, 2, 1, Functor>( paddle::operators::ReduceFunctor<DeviceContext, OutT, 2, 1, Functor>(
...@@ -163,7 +177,20 @@ void HandleLargeDimGrad(const framework::ExecutionContext& context, ...@@ -163,7 +177,20 @@ void HandleLargeDimGrad(const framework::ExecutionContext& context,
Functor functor, Functor functor,
const std::vector<int>& dims) { const std::vector<int>& dims) {
const int64_t unreduced = out->numel(); const int64_t unreduced = out->numel();
const int64_t reduced = x->numel() / unreduced; const int64_t x_numel = x->numel();
// assume: 0 / 0 == 0, which allow process 0 dim tensor
const int64_t reduced = (unreduced != 0) ? (x_numel / unreduced) : 0;
PADDLE_ENFORCE_EQ(
unreduced * reduced,
x_numel,
phi::errors::InvalidArgument(
"Reducing failed in HandleLargeDimGrad, when try to transpose (%d) "
"operands into 2D tensor with shape (%d, %d).",
x_numel,
unreduced,
reduced));
DDim out_dim(out->dims()); DDim out_dim(out->dims());
DDim x_dim(x->dims()); DDim x_dim(x->dims());
// transpose and reshape X // transpose and reshape X
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <string> #include <string>
#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/unary.h"
...@@ -84,7 +84,7 @@ class ReduceSumCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { ...@@ -84,7 +84,7 @@ class ReduceSumCompositeGradOpMaker : public prim::CompositeGradOpMakerBase {
// get output orginal name // get output orginal name
std::string x_grad_name = this->GetOutputName(x_grad_t); std::string x_grad_name = this->GetOutputName(x_grad_t);
VLOG(3) << "Runing sum_grad composite func"; VLOG(6) << "Runing sum_grad composite func";
// call composite backward func // call composite backward func
prim::sum_grad<prim::DescTensor>( prim::sum_grad<prim::DescTensor>(
x, out_grad, axis, keep_dim, reduce_all, x_grad); x, out_grad, axis, keep_dim, reduce_all, x_grad);
......
...@@ -438,11 +438,32 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -438,11 +438,32 @@ class TensorRTEngineOp : public framework::OperatorBase {
calib_res->calib_.reset(new TRTInt8Calibrator( calib_res->calib_.reset(new TRTInt8Calibrator(
calib_buffers, runtime_batch, calibration_engine_key_, dev_place)); calib_buffers, runtime_batch, calibration_engine_key_, dev_place));
calib_res->thr_.reset(new std::thread([&]() { calib_res->thr_.reset(new std::thread([&]() {
std::map<std::string, std::vector<int>> min_input_shape;
std::map<std::string, std::vector<int>> max_input_shape;
std::map<std::string, std::vector<int>> opt_input_shape;
std::map<std::string, std::vector<int>> min_shape_tensor;
std::map<std::string, std::vector<int>> max_shape_tensor;
std::map<std::string, std::vector<int>> opt_shape_tensor;
if (shape_range_info_path_.size())
inference::DeserializeShapeRangeInfo(shape_range_info_path_,
&min_input_shape,
&max_input_shape,
&opt_input_shape,
&min_shape_tensor,
&max_shape_tensor,
&opt_shape_tensor);
calib_res->engine_.reset(new TensorRTEngine(max_batch_size_, calib_res->engine_.reset(new TensorRTEngine(max_batch_size_,
workspace_size_, workspace_size_,
precision_mode_, precision_mode_,
calib_res->calib_.get(), calib_res->calib_.get(),
dev_place.device)); dev_place.device,
min_input_shape,
max_input_shape,
opt_input_shape,
min_shape_tensor,
max_shape_tensor,
opt_shape_tensor));
VLOG(3) << "start the calib trt engine thread"; VLOG(3) << "start the calib trt engine thread";
PrepareTRTEngine(scope, calib_res->engine_.get()); PrepareTRTEngine(scope, calib_res->engine_.get());
})); }));
......
generated/prim_api/eager_prim_api.cc generated_prim/*.cc
generated/prim_api/tmp_eager_prim_api.cc generated_prim/*.h
generated/prim_api/*.h
add_subdirectory(auto_code_generated) add_subdirectory(auto_code_generated)
add_subdirectory(manual) add_subdirectory(manual_prim)
add_subdirectory(generated) add_subdirectory(generated_prim)
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
......
...@@ -13,6 +13,6 @@ ...@@ -13,6 +13,6 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include "paddle/fluid/prim/api/generated/prim_api/prim_api.h" #include "paddle/fluid/prim/api/generated_prim/prim_generated_api.h"
#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h" #include "paddle/fluid/prim/api/manual_prim/prim_manual_api.h"
#include "paddle/fluid/prim/api/manual/utils/utils.h" #include "paddle/fluid/prim/api/manual_prim/utils/utils.h"
...@@ -5,16 +5,17 @@ set(legacy_api_yaml_path ...@@ -5,16 +5,17 @@ set(legacy_api_yaml_path
"${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/legacy_ops.parsed.yaml" "${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parsed_ops/legacy_ops.parsed.yaml"
) )
set(tmp_eager_prim_api_cc_path set(tmp_eager_prim_api_cc_path
"${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/generated/prim_api/tmp_eager_prim_api.cc" "${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/generated_prim/tmp_eager_prim_api.cc"
) )
set(tmp_prim_api_h_path set(tmp_prim_api_h_path
"${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/generated/prim_api/tmp_prim_api.h" "${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/generated_prim/tmp_prim_generated_api.h"
) )
set(eager_prim_api_cc_path set(eager_prim_api_cc_path
"${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/generated/prim_api/eager_prim_api.cc" "${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/generated_prim/eager_prim_api.cc"
) )
set(prim_api_h_path set(prim_api_h_path
"${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/generated/prim_api/prim_api.h") "${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/generated_prim/prim_generated_api.h"
)
set(prim_api_gen_file set(prim_api_gen_file
${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/auto_code_generated/prim_gen.py) ${PADDLE_SOURCE_DIR}/paddle/fluid/prim/api/auto_code_generated/prim_gen.py)
......
...@@ -28,11 +28,11 @@ def header_include(): ...@@ -28,11 +28,11 @@ def header_include():
""" """
def eager_source_include(header_file_path): def eager_source_include():
return """ return """
#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/prim/api/generated/prim_api/prim_api.h" #include "paddle/fluid/prim/api/generated_prim/prim_generated_api.h"
""" """
...@@ -73,10 +73,7 @@ def generate_api(api_yaml_path, header_file_path, eager_prim_source_file_path): ...@@ -73,10 +73,7 @@ def generate_api(api_yaml_path, header_file_path, eager_prim_source_file_path):
header_file.write(header_include()) header_file.write(header_include())
header_file.write(namespace[0]) header_file.write(namespace[0])
header_file.write(namespace[1]) header_file.write(namespace[1])
include_header_file = ( eager_prim_source_file.write(eager_source_include())
"#include paddle/fluid/prim/api/generated/prim_api/prim_api.h"
)
eager_prim_source_file.write(eager_source_include(include_header_file))
eager_prim_source_file.write(namespace[0]) eager_prim_source_file.write(namespace[0])
for api in apis: for api in apis:
...@@ -106,13 +103,13 @@ def main(): ...@@ -106,13 +103,13 @@ def main():
parser.add_argument( parser.add_argument(
'--prim_api_header_path', '--prim_api_header_path',
help='output of generated prim_api header code file', help='output of generated prim_api header code file',
default='paddle/fluid/prim/api/generated/prim_api/prim_api.h', default='paddle/fluid/prim/api/generated_prim/prim_generated_api.h',
) )
parser.add_argument( parser.add_argument(
'--eager_prim_api_source_path', '--eager_prim_api_source_path',
help='output of generated eager_prim_api source code file', help='output of generated eager_prim_api source code file',
default='paddle/fluid/prim/api/generated/prim_api/eager_prim_api.cc', default='paddle/fluid/prim/api/generated_prim/eager_prim_api.cc',
) )
options = parser.parse_args() options = parser.parse_args()
......
...@@ -13,9 +13,7 @@ ...@@ -13,9 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include "paddle/fluid/prim/api/generated/prim_api/prim_api.h" #include "paddle/fluid/prim/api/all.h"
#include "paddle/fluid/prim/api/manual/prim_api/prim_api.h"
#include "paddle/fluid/prim/api/manual/utils/utils.h"
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/ddim.h"
...@@ -232,8 +230,8 @@ void multiply_grad(const Tensor& x, ...@@ -232,8 +230,8 @@ void multiply_grad(const Tensor& x,
Tensor* y_grad) { Tensor* y_grad) {
if (x_grad) { if (x_grad) {
auto x_grad_unreduce = multiply<T>(out_grad, y); auto x_grad_unreduce = multiply<T>(out_grad, y);
if (x.dims() != y.dims()) { if (x_grad_unreduce.dims() != x.dims()) {
auto axes = get_reduce_dims(x.dims(), y.dims()); auto axes = get_reduce_dims_from_out(x_grad_unreduce.dims(), x.dims());
if (!axes.size()) { if (!axes.size()) {
set_output<T>(x_grad_unreduce, x_grad); set_output<T>(x_grad_unreduce, x_grad);
} else { } else {
...@@ -252,8 +250,8 @@ void multiply_grad(const Tensor& x, ...@@ -252,8 +250,8 @@ void multiply_grad(const Tensor& x,
} }
if (y_grad) { if (y_grad) {
auto y_grad_unreduce = multiply<T>(out_grad, x); auto y_grad_unreduce = multiply<T>(out_grad, x);
if (y.dims() != x.dims()) { if (y_grad_unreduce.dims() != y.dims()) {
auto axes = get_reduce_dims(y.dims(), x.dims()); auto axes = get_reduce_dims_from_out(y_grad_unreduce.dims(), y.dims());
if (!axes.size()) { if (!axes.size()) {
set_output<T>(y_grad_unreduce, y_grad); set_output<T>(y_grad_unreduce, y_grad);
} else { } else {
......
cc_library(
static_prim_api
SRCS static_prim_api.cc
DEPS proto_desc static_utils)
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
eager_prim_api eager_prim_api
......
add_subdirectory(utils)
cc_library(
static_prim_api
SRCS static_prim_api.cc
DEPS proto_desc static_utils)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,15 +12,16 @@ ...@@ -12,15 +12,16 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// prim api which can't be generated
#pragma once #pragma once
#include "paddle/fluid/prim/api/generated_prim/prim_generated_api.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/common/scalar.h" #include "paddle/phi/common/scalar.h"
#include "paddle/utils/optional.h" #include "paddle/utils/optional.h"
// TODO(jiabin): Make this Header only for handwritten api, instead of include
// prim_generated_api.h
namespace paddle { namespace paddle {
namespace prim {} // namespace prim namespace prim {} // namespace prim
} // namespace paddle } // namespace paddle
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/prim/api/manual/utils/utils.h" #include "paddle/fluid/prim/api/manual_prim/utils/utils.h"
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
namespace paddle { namespace paddle {
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册