提交 b21770a2 编写于 作者: N nhzlx

cherry-pick from feature/anakin-engine: Add subgraph fuse support and anakin engine #16018

上级 084310f5
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc activation.cc pool2d.cc concat.cc split.cc DEPS anakin_engine framework_proto scope op_registry) cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter mul_op) elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc DEPS anakin_engine framework_proto scope op_registry)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL) cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} activation_op anakin_op_converter cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
SERIAL) cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter pool_op pooling) cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter concat_op concat_and_split) cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter split_op concat_and_split) cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split)
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS
anakin_op_converter elementwise_add_op)
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL)
...@@ -45,15 +45,11 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op, ...@@ -45,15 +45,11 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
auto output_name = op_desc.Output("Out").front(); auto output_name = op_desc.Output("Out").front();
engine_->AddOp(op_name, "Activation", {input_name}, {output_name}); engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "type", anakin_op_type_); engine_->AddOpAttr(op_name, "type", anakin_op_type_);
if (op_type_ == "relu") {
engine_->AddOpAttr(op_name, "alpha", 0);
}
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter); REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter); REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter);
...@@ -34,13 +34,8 @@ class ActivationOpConverter : public AnakinOpConverter { ...@@ -34,13 +34,8 @@ class ActivationOpConverter : public AnakinOpConverter {
private: private:
std::string op_type_; std::string op_type_;
std::string anakin_op_type_; std::string anakin_op_type_;
std::map<std::string, std::string> anakin_ops_type_{ std::map<std::string, std::string> anakin_ops_type_{{"tanh", "TanH"},
{"relu", "Relu"}, {"tanh", "TanH"}, {"sigmoid", "Sigmoid"}}; {"sigmoid", "Sigmoid"}};
};
class ReluOpConverter : public ActivationOpConverter {
public:
ReluOpConverter() : ActivationOpConverter("relu") {}
}; };
class TanhOpConverter : public ActivationOpConverter { class TanhOpConverter : public ActivationOpConverter {
...@@ -50,7 +45,7 @@ class TanhOpConverter : public ActivationOpConverter { ...@@ -50,7 +45,7 @@ class TanhOpConverter : public ActivationOpConverter {
class SigmoidOpConverter : public ActivationOpConverter { class SigmoidOpConverter : public ActivationOpConverter {
public: public:
SigmoidOpConverter() : ActivationOpConverter("tanh") {} SigmoidOpConverter() : ActivationOpConverter("sigmoid") {}
}; };
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
......
...@@ -32,8 +32,8 @@ void ConcatOpConverter::operator()(const framework::proto::OpDesc &op, ...@@ -32,8 +32,8 @@ void ConcatOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, const framework::Scope &scope,
bool test_mode) { bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
auto input_names = op_desc.Input("X");
int axis = boost::get<int>(op_desc.GetAttr("axis")); int axis = boost::get<int>(op_desc.GetAttr("axis"));
auto input_names = op_desc.Input("X");
PADDLE_ENFORCE(axis > 0, PADDLE_ENFORCE(axis > 0,
"The axis attr of Concat op should be large than 0 for trt"); "The axis attr of Concat op should be large than 0 for trt");
......
...@@ -51,10 +51,11 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, ...@@ -51,10 +51,11 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL); PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
// const int n_output = weight_tensor->dims()[0]; // const int n_output = weight_tensor->dims()[0];
const int n_input = weight_tensor->dims()[1]; // const int n_input = weight_tensor->dims()[1];
const int filter_h = weight_tensor->dims()[2]; const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3]; const int filter_w = weight_tensor->dims()[3];
auto filter_num = n_input * filter_h * filter_w; // auto filter_num = n_input * filter_h * filter_w ;
auto filter_num = weight_tensor->dims()[0];
engine_->AddOpAttr<int>(op_name, "filter_num", filter_num); engine_->AddOpAttr<int>(op_name, "filter_num", filter_num);
engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w}); engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides")); auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
#include <algorithm>
#include <memory>
#include <vector>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
auto input_name = op_desc.Input("Input").front();
auto output_name = op_desc.Output("Output").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(filter_v);
auto *filter_t = filter_v->GetMutable<framework::LoDTensor>();
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(b_v);
auto *b_t = b_v->GetMutable<framework::LoDTensor>();
std::unique_ptr<framework::LoDTensor> weight_tensor(
new framework::LoDTensor());
weight_tensor->Resize(filter_t->dims());
TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get());
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3];
// auto filter_num = n_input * filter_h * filter_w ;
auto filter_num = weight_tensor->dims()[0];
engine_->AddOpAttr<int>(op_name, "filter_num", filter_num);
engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides);
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
engine_->AddOpAttr<PTuple<int>>(op_name, "dilation_rate", dilations);
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
engine_->AddOpAttr(op_name, "group", groups);
engine_->AddOpAttr(op_name, "axis", 1);
engine_->AddOpAttr(op_name, "bias_term", true);
auto weight_shape = framework::vectorize2int(filter_t->dims());
Shape anakin_shape(weight_shape);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1);
auto bias_shape = framework::vectorize2int(b_t->dims());
framework::LoDTensor bias_tensor;
bias_tensor.Resize(b_t->dims());
TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor);
auto *bias_data = bias_tensor.data<float>();
bias_shape.insert(bias_shape.begin(), 1);
bias_shape.insert(bias_shape.begin(), 1);
bias_shape.insert(bias_shape.begin(), 1);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape anakin_bias_shape(bias_shape);
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
anakin_bias_shape);
float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
weight2->d_tensor().set_shape(anakin_bias_shape);
weight2->d_tensor().copy_from(weight2->h_tensor());
engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class Conv2dFusionOpConverter : public AnakinOpConverter {
public:
Conv2dFusionOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~Conv2dFusionOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/elementwise.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto y_name = op_desc.Input("Y").front();
auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
std::string elementwise_type = "Add";
engine_->AddOpAttr<std::string>(op_name, "type", elementwise_type);
std::vector<float> coeff = {1.0, 1.0};
engine_->AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class ElementwiseAddOpConverter : public AnakinOpConverter {
public:
ElementwiseAddOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ElementwiseAddOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/fluid/inference/anakin/convert/fc.h" #include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm> #include <algorithm>
#include <string>
#include <vector>
using anakin::graph::GraphGlobalMem; using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
...@@ -24,28 +26,39 @@ namespace paddle { ...@@ -24,28 +26,39 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void FcOpConverter::operator()(const framework::proto::OpDesc &op, void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) { const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); auto input_names = op_desc.InputNames();
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); bool with_bias = input_names.size() == 3;
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
std::string w_name = "Y";
std::string i_name = "X";
if (with_bias) {
w_name = "W";
i_name = "Input";
}
auto x_name = op_desc.Input("X").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto *y_v = scope.FindVar(op_desc.Input("Y").front());
// get weights
auto *y_v = scope.FindVar(op_desc.Input(w_name).front());
PADDLE_ENFORCE_NOT_NULL(y_v); PADDLE_ENFORCE_NOT_NULL(y_v);
auto *y_t = y_v->GetMutable<framework::LoDTensor>(); auto *y_t = y_v->GetMutable<framework::LoDTensor>();
auto input_name = op_desc.Input("X").front(); auto input_name = op_desc.Input(i_name).front();
auto output_name = op_desc.Output("Out").front(); auto output_name = op_desc.Output("Out").front();
auto weight_shape = framework::vectorize2int(y_t->dims());
engine_->AddOp(op_name, "Dense", {input_name}, {output_name}); engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "bias_term", false); engine_->AddOpAttr(op_name, "bias_term", with_bias);
engine_->AddOpAttr(op_name, "axis", 1); engine_->AddOpAttr(op_name, "axis", 1);
auto weight_shape = framework::vectorize2int(y_t->dims());
int out_dim = weight_shape[1]; int out_dim = weight_shape[1];
engine_->AddOpAttr(op_name, "out_dim", out_dim); engine_->AddOpAttr(op_name, "out_dim", out_dim);
const int w_m = weight_shape[0];
const int w_k = weight_shape[1];
weight_shape.push_back(1); weight_shape.push_back(1);
weight_shape.push_back(1); weight_shape.push_back(1);
...@@ -54,18 +67,54 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op, ...@@ -54,18 +67,54 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op,
framework::LoDTensor weight_tensor; framework::LoDTensor weight_tensor;
weight_tensor.Resize(y_t->dims()); weight_tensor.Resize(y_t->dims());
TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor); TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor);
auto *weight_data = weight_tensor.data<float>();
PADDLE_ENFORCE(w_m * w_k == weight_tensor.numel());
std::vector<float> trans_weight_data(weight_tensor.numel());
for (int i = 0; i < w_m; i++) {
for (int j = 0; j < w_k; j++) {
trans_weight_data[i + j * w_m] = weight_data[i * w_k + j];
}
}
auto *weight1 = auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape); GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data()); float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor.data<float>(), weight_tensor.numel(), cpu_data); std::copy_n(trans_weight_data.data(), weight_tensor.numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor()); weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1); engine_->AddOpAttr(op_name, "weight_1", *weight1);
// get bias
if (with_bias) {
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(b_v);
auto *b_t = b_v->GetMutable<framework::LoDTensor>();
auto bias_shape = framework::vectorize2int(b_t->dims());
framework::LoDTensor bias_tensor;
bias_tensor.Resize(b_t->dims());
TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor);
auto *bias_data = bias_tensor.data<float>();
bias_shape.insert(bias_shape.begin(), 1);
bias_shape.insert(bias_shape.begin(), 1);
bias_shape.insert(bias_shape.begin(), 1);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape anakin_bias_shape(bias_shape);
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
anakin_bias_shape);
float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
weight2->d_tensor().set_shape(anakin_bias_shape);
weight2->d_tensor().copy_from(weight2->h_tensor());
engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter); REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter);
...@@ -20,14 +20,26 @@ namespace paddle { ...@@ -20,14 +20,26 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class FcOpConverter : public AnakinOpConverter { class FcBaseOpConverter : public AnakinOpConverter {
public: public:
FcOpConverter() = default; FcBaseOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op, virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, const framework::Scope &scope,
bool test_mode) override; bool test_mode) override;
virtual ~FcOpConverter() {} virtual ~FcBaseOpConverter() {}
};
// with bias
class FcOpConverter : public FcBaseOpConverter {
public:
FcOpConverter() = default;
};
// without bias
class MulOpConverter : public FcBaseOpConverter {
public:
MulOpConverter() = default;
}; };
} // namespace anakin } // namespace anakin
......
...@@ -47,14 +47,6 @@ class AnakinOpConverter { ...@@ -47,14 +47,6 @@ class AnakinOpConverter {
std::string op_type = op_desc.Type(); std::string op_type = op_desc.Type();
AnakinOpConverter *it = nullptr; AnakinOpConverter *it = nullptr;
if (op_type == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
it = Registry<AnakinOpConverter>::Global().Lookup("fc");
}
}
if (!it) { if (!it) {
it = Registry<AnakinOpConverter>::Global().Lookup(op_type); it = Registry<AnakinOpConverter>::Global().Lookup(op_type);
} }
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include <algorithm>
#include <map>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
namespace paddle {
namespace inference {
namespace anakin {
void ReluOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
engine_->AddOp(op_name, "ReLU", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "alpha", 0);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class ReluOpConverter : public AnakinOpConverter {
public:
ReluOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ReluOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
...@@ -41,16 +41,13 @@ static void test_activation_op(const std::string &op_type) { ...@@ -41,16 +41,13 @@ static void test_activation_op(const std::string &op_type) {
validator.Execute(5); validator.Execute(5);
} }
TEST(relu_op, test) { test_activation_op("relu"); }
TEST(sigm_op, test) { test_activation_op("sigmoid"); } TEST(sigm_op, test) { test_activation_op("sigmoid"); }
TEST(tanh_op, test) { test_activation_op("tanh"); } TEST(tanh_op, test) { test_activation_op("tanh"); }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
USE_OP(relu);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(tanh); USE_OP(tanh);
USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(sigmoid); USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh); USE_ANAKIN_CONVERTER(tanh);
...@@ -25,10 +25,10 @@ TEST(concat_op, test) { ...@@ -25,10 +25,10 @@ TEST(concat_op, test) {
std::unordered_set<std::string> parameters({""}); std::unordered_set<std::string> parameters({""});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("concat_x1", {1, 10, 3, 1}); validator.DeclInputVar("concat_x1", {1, 2, 1, 1});
validator.DeclInputVar("concat_x2", {1, 3, 3, 1}); validator.DeclInputVar("concat_x2", {1, 3, 1, 1});
validator.DeclInputVar("concat_x3", {1, 7, 3, 1}); validator.DeclInputVar("concat_x3", {1, 1, 1, 1});
validator.DeclOutputVar("concat_out", {1, 20, 3, 1}); validator.DeclOutputVar("concat_out", {1, 6, 1, 1});
// Prepare Op description // Prepare Op description
framework::OpDesc desc; framework::OpDesc desc;
......
...@@ -28,9 +28,9 @@ TEST(conv2d_op, test) { ...@@ -28,9 +28,9 @@ TEST(conv2d_op, test) {
std::unordered_set<std::string> parameters({"conv2d-Y"}); std::unordered_set<std::string> parameters({"conv2d-Y"});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("conv2d-X", {1, 2, 5, 5}); validator.DeclInputVar("conv2d-X", {1, 3, 3, 3});
validator.DeclParamVar("conv2d-Y", {3, 2, 3, 3}); validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1});
validator.DeclOutputVar("conv2d-Out", {1, 3, 5, 5}); validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3});
// Prepare Op description // Prepare Op description
framework::OpDesc desc; framework::OpDesc desc;
...@@ -40,7 +40,7 @@ TEST(conv2d_op, test) { ...@@ -40,7 +40,7 @@ TEST(conv2d_op, test) {
desc.SetOutput("Output", {"conv2d-Out"}); desc.SetOutput("Output", {"conv2d-Out"});
const std::vector<int> strides({1, 1}); const std::vector<int> strides({1, 1});
const std::vector<int> paddings({1, 1}); const std::vector<int> paddings({0, 0});
const std::vector<int> dilations({1, 1}); const std::vector<int> dilations({1, 1});
const int groups = 1; const int groups = 1;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
TEST(elementwise_op, native) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("elementwise_add_x", {1, 1, 2, 2});
validator.DeclInputVar("elementwise_y", {1, 1, 2, 2});
validator.DeclOutputVar("elementwise_out", {1, 1, 2, 2});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("elementwise_add");
desc.SetInput("X", {"elementwise_add_x"});
desc.SetInput("Y", {"elementwise_y"});
desc.SetOutput("Out", {"elementwise_out"});
int axis = -1;
desc.SetAttr("axis", axis);
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(elementwise_add);
USE_ANAKIN_CONVERTER(elementwise_add);
...@@ -27,9 +27,9 @@ TEST(fc_op, test) { ...@@ -27,9 +27,9 @@ TEST(fc_op, test) {
std::unordered_set<std::string> parameters({"mul_y"}); std::unordered_set<std::string> parameters({"mul_y"});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("mul_x", {1, 1, 1, 1}); validator.DeclInputVar("mul_x", {1, 1, 2, 2});
validator.DeclParamVar("mul_y", {1, 2}); validator.DeclParamVar("mul_y", {4, 2});
validator.DeclOutputVar("mul_out", {1, 1, 1, 2}); validator.DeclOutputVar("mul_out", {1, 2});
// Prepare Op description // Prepare Op description
framework::OpDesc desc; framework::OpDesc desc;
...@@ -37,8 +37,8 @@ TEST(fc_op, test) { ...@@ -37,8 +37,8 @@ TEST(fc_op, test) {
desc.SetInput("X", {"mul_x"}); desc.SetInput("X", {"mul_x"});
desc.SetInput("Y", {"mul_y"}); desc.SetInput("Y", {"mul_y"});
desc.SetOutput("Out", {"mul_out"}); desc.SetOutput("Out", {"mul_out"});
int num_flatten_dims = 3; // int num_flatten_dims = 3;
desc.SetAttr("x_num_col_dims", num_flatten_dims); // desc.SetAttr("x_num_col_dims", num_flatten_dims);
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
validator.Execute(10); validator.Execute(10);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
static void test_activation_op(const std::string &op_type) {
auto *converter = Registry<AnakinOpConverter>::Global().Lookup(op_type);
PADDLE_ENFORCE(converter != nullptr);
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("act-X", {10, 6, 1, 1});
validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
framework::OpDesc desc;
desc.SetType(op_type);
desc.SetInput("X", {"act-X"});
desc.SetOutput("Out", {"act-Out"});
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(5);
}
TEST(sigm_op, test) { test_activation_op("relu"); }
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(relu);
USE_ANAKIN_CONVERTER(relu);
...@@ -161,10 +161,6 @@ class AnakinConvertValidation { ...@@ -161,10 +161,6 @@ class AnakinConvertValidation {
framework::TensorToVector(*tensor, ctx, &fluid_out); framework::TensorToVector(*tensor, ctx, &fluid_out);
fluid_outputs.push_back(fluid_out); fluid_outputs.push_back(fluid_out);
// size_t fluid_out_size = fluid_out.size();
/*for (size_t i = 0; i < fluid_out_size; i++) {
std::cout << fluid_out[i] << std::endl;
}*/
outputs.insert({output, tensor}); outputs.insert({output, tensor});
} }
...@@ -180,8 +176,7 @@ class AnakinConvertValidation { ...@@ -180,8 +176,7 @@ class AnakinConvertValidation {
size_t anakin_out_size = anakin_out.size(); size_t anakin_out_size = anakin_out.size();
auto fluid_out = fluid_outputs[i_output++]; auto fluid_out = fluid_outputs[i_output++];
for (size_t i = 0; i < anakin_out_size; i++) { for (size_t i = 0; i < anakin_out_size; i++) {
LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], " EXPECT_LT(std::abs(fluid_out[i] - anakin_out[i]), 1e-3);
<< "fluid[" << fluid_out[i] << "]";
} }
} }
} }
......
...@@ -68,29 +68,34 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute( ...@@ -68,29 +68,34 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
auto *tensor = input.second; auto *tensor = input.second;
auto *data = tensor->data<float>(); auto *data = tensor->data<float>();
auto shape = framework::vectorize2int(tensor->dims()); auto shape = framework::vectorize2int(tensor->dims());
::anakin::saber::Shape anakin_shape(shape);
auto *anakin_input = net_->get_in(input.first); auto *anakin_input = net_->get_in(input.first);
auto anakin_input_shape = anakin_input->valid_shape();
PADDLE_ENFORCE(tensor->numel(), anakin_input_shape.count(),
"the fluid input size should be equal to anakin");
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0, ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
anakin_shape); anakin_input_shape);
anakin_input->share_from(tmp_anakin_tensor); anakin_input->copy_from(tmp_anakin_tensor);
} }
for (const auto &output : outputs) { for (const auto &output : outputs) {
auto *tensor = output.second; auto *tensor = output.second;
auto *data = tensor->data<float>(); auto *data = tensor->data<float>();
auto shape = framework::vectorize2int(tensor->dims()); auto shape = framework::vectorize2int(tensor->dims());
::anakin::saber::Shape anakin_shape(shape);
auto *anakin_output = net_->get_out(output.first); auto *anakin_output = net_->get_out(output.first);
auto anakin_output_shape = anakin_output->valid_shape();
PADDLE_ENFORCE(tensor->numel(), anakin_output_shape.count(),
"the fluid output size should be equal to anakin");
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0, ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
anakin_shape); anakin_output_shape);
anakin_output->share_from(tmp_anakin_tensor); anakin_output->share_from(tmp_anakin_tensor);
} }
net_->prediction(); net_->prediction();
cudaDeviceSynchronize();
} }
template <typename TargetT, Precision PrecisionType, OpRunType RunType> template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Freeze() { void AnakinEngine<TargetT, PrecisionType, RunType>::Freeze() {
PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph."); PADDLE_ENFORCE(graph_->Freeze_v3(), "Freeze anakin subgraph.");
} }
template <typename TargetT, Precision PrecisionType, OpRunType RunType> template <typename TargetT, Precision PrecisionType, OpRunType RunType>
......
...@@ -46,6 +46,9 @@ namespace anakin { ...@@ -46,6 +46,9 @@ namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionType, template <typename TargetT, ::anakin::Precision PrecisionType,
::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC> ::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC>
class AnakinEngine { class AnakinEngine {
using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
public: public:
explicit AnakinEngine(bool need_summary = false); explicit AnakinEngine(bool need_summary = false);
~AnakinEngine(); ~AnakinEngine();
...@@ -61,16 +64,15 @@ class AnakinEngine { ...@@ -61,16 +64,15 @@ class AnakinEngine {
PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value), PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value),
"Add operation's attribution."); "Add operation's attribution.");
} }
NetT *Net() { return net_.get(); }
std::unique_ptr<AnakinEngine> Clone(); std::unique_ptr<AnakinEngine> Clone();
void Freeze(); void Freeze();
void Optimize(); void Optimize();
void Save(std::string path) { graph_->save(path); }
void Execute(const std::map<std::string, framework::LoDTensor *> &inputs, void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs); const std::map<std::string, framework::LoDTensor *> &outputs);
private: private:
using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
std::unique_ptr<GraphT> graph_; std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_; std::unique_ptr<NetT> net_;
}; };
......
...@@ -20,7 +20,18 @@ namespace anakin { ...@@ -20,7 +20,18 @@ namespace anakin {
// Just tell by the op_types. // Just tell by the op_types.
struct SimpleOpTypeSetTeller : public Teller { struct SimpleOpTypeSetTeller : public Teller {
SimpleOpTypeSetTeller() {} SimpleOpTypeSetTeller() {
// teller_set.insert("mul");
teller_set.insert("fc");
teller_set.insert("conv2d_fusion");
teller_set.insert("split");
teller_set.insert("relu");
teller_set.insert("pool2d");
teller_set.insert("elementwise_add");
teller_set.insert("concat");
teller_set.insert("tanh");
// teller_set.insert("conv2d");
}
bool operator()(const std::string& op_type, bool operator()(const std::string& op_type,
const framework::OpDesc& desc) override { const framework::OpDesc& desc) override {
...@@ -28,7 +39,7 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -28,7 +39,7 @@ struct SimpleOpTypeSetTeller : public Teller {
} }
private: private:
std::unordered_set<std::string> teller_set{{"mul"}}; std::unordered_set<std::string> teller_set;
}; };
bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) { bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) {
......
...@@ -13,8 +13,11 @@ ...@@ -13,8 +13,11 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/ir_pass_manager.h" #include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
...@@ -63,7 +66,14 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -63,7 +66,14 @@ void IRPassManager::CreatePasses(Argument *argument,
} else if (pass_name == "cpu_quantize_pass") { } else if (pass_name == "cpu_quantize_pass") {
pass->Set("quant_var_scales", pass->Set("quant_var_scales",
new VarQuantScale(argument->quant_var_scales())); new VarQuantScale(argument->quant_var_scales()));
} else if (pass_name == "tensorrt_subgraph_pass") { }
if (pass_name == "anakin_subgraph_pass") {
pass->Set("program",
new framework::ProgramDesc *(&argument->main_program()));
}
if (pass_name == "tensorrt_subgraph_pass") {
pass->Set("workspace_size", new int(argument->tensorrt_workspace_size())); pass->Set("workspace_size", new int(argument->tensorrt_workspace_size()));
pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size())); pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
pass->Set("min_subgraph_size", pass->Set("min_subgraph_size",
......
...@@ -14,3 +14,15 @@ if (WITH_GPU AND TENSORRT_FOUND) ...@@ -14,3 +14,15 @@ if (WITH_GPU AND TENSORRT_FOUND)
file(APPEND ${pass_file} "USE_PASS(tensorrt_subgraph_pass);\n") file(APPEND ${pass_file} "USE_PASS(tensorrt_subgraph_pass);\n")
set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "")
endif() endif()
if (WITH_ANAKIN_SUBGRAPH)
cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller)
set(analysis_deps ${analysis_deps}
subgraph_detector anakin_subgraph_pass
CACHE INTERNAL "")
set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h)
file(APPEND ${pass_file} "USE_PASS(anakin_subgraph_pass);\n")
set(INFER_IR_PASSES ${INFER_IR_PASSES} anakin_subgraph_pass CACHE INTERNAL "")
endif()
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/inference/anakin/op_teller.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace inference {
namespace analysis {
using framework::ir::Node;
std::vector<std::string> ExtractAnakinParameters(
const std::unordered_set<Node *> &nodes);
std::unique_ptr<framework::ir::Graph> analysis::AnakinSubgraphPass::ApplyImpl(
std::unique_ptr<framework::ir::Graph> graph) const {
framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph.get());
auto teller = [](const framework::ir::Node *node) {
if (!node->IsOp() || !node->Op()) return false;
return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op());
};
SubGraphFuser fuser(graph.get(), teller, 0);
fuser();
for (auto *node : graph->Nodes()) {
if (node->IsOp() && !Agent(node).subgraph()->empty()) {
CreateAnakinOp(node, graph.get());
std::unordered_set<const Node *> nodes2remove(
Agent(node).subgraph()->begin(), Agent(node).subgraph()->end());
framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove);
}
}
std::unordered_set<const Node *> nodes2remove;
for (auto *node : graph->Nodes()) {
if (node->IsOp() && Agent(node).deleted()) {
nodes2remove.insert(node);
}
}
framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove);
return graph;
}
std::string GenerateAnakinEngineKey(
const std::set<std::string> &engine_inputs,
const std::set<std::string> &engine_outputs) {
std::string engine_hash_key = "";
for (auto name : engine_inputs) {
engine_hash_key += name;
}
for (auto name : engine_outputs) {
engine_hash_key += name;
}
auto engine_key = std::to_string(std::hash<std::string>()(engine_hash_key));
return engine_key;
}
void AnakinSubgraphPass::CreateAnakinOp(framework::ir::Node *node,
Graph *graph) const {
auto *op_desc = node->Op();
auto &subgraph = *Agent(node).subgraph();
PADDLE_ENFORCE(!subgraph.empty());
framework::ProgramDesc *program_desc =
Get<framework::ProgramDesc *>("program");
// Add new block for TensorRTEngineOP
const framework::BlockDesc &main_block =
program_desc->Block(framework::kRootBlockIndex);
// const framework::BlockDesc& main_block = program_desc->Block(0);
framework::BlockDesc *new_block = program_desc->AppendBlock(main_block);
// An fake block desc.
framework::proto::BlockDesc block_proto;
framework::BlockDesc block_desc(nullptr, &block_proto);
block_desc.Proto()->set_parent_idx(-1);
block_desc.Proto()->set_idx(0);
string::PrettyLogDetail("--- detect a sub-graph with %d nodes",
subgraph.size());
for (auto *node : subgraph) {
auto *new_block_op = new_block->AppendOp();
auto *op = block_desc.AppendOp();
*new_block_op->Proto() = *node->Op()->Proto();
*op->Proto() = *node->Op()->Proto();
}
// Then, we will use the input_names_with_id and output_names_with_id to
// generate the eigine key.
// So, We use set instead of unordered_set here to ensure that the engine key
// is unique.
std::set<std::string> input_names;
std::set<std::string> input_names_with_id;
for (auto *x : node->inputs) {
input_names.insert(x->Name());
input_names_with_id.insert(x->Name() + std::to_string(x->id()));
}
op_desc->SetInput(
"Xs", std::vector<std::string>(input_names.begin(), input_names.end()));
std::set<std::string> output_names;
std::set<std::string> output_names_with_id;
for (auto *x : node->outputs) {
output_names.insert(x->Name());
output_names_with_id.insert(x->Name() + std::to_string(x->id()));
}
op_desc->SetOutput(
"Ys", std::vector<std::string>(output_names.begin(), output_names.end()));
op_desc->SetType("anakin_engine");
std::unordered_map<std::string, std::string> output_name_map;
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
// Why we do this?
// During the transition from fluid OP to anakin OP, we map
// the input and output Tensor(fluid data structure) of fluid OP
// to the corresponding ITensor (trt data structure) through the
// Tensor name. When we set up ITensor for an variable, we must
// ensure that it has not been set before.
// If there is variable in the fluid graph, which is not only the
// input of a OP, but also the output of a Op, there will be problems.
// So we have to rename the variable in the subgraph to make sure
// it is either an OP's input or an OP's output.
auto &subgraph_nodes = *Agent(node).subgraph();
for (size_t index = 0; index < block_desc.OpSize(); ++index) {
framework::proto::OpDesc *op = block_desc.Op(index)->Proto();
auto correspond_node = subgraph_nodes[index];
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type());
std::unordered_map<std::string, size_t> var2id;
for (auto *in_var : correspond_node->inputs) {
var2id[in_var->Name()] = in_var->id();
}
// rename for the input variables of op inside subgraph
for (int i = 0; i < op->inputs_size(); i++) {
// one input
auto *in_var = op->mutable_inputs(i);
std::vector<std::string> replaced_names;
for (int k = 0; k < in_var->arguments_size(); k++) { // all the arguments
std::string arg_value = in_var->arguments(k);
std::string arg_value_with_id =
arg_value + std::to_string(var2id[arg_value]);
if (input_names_with_id.count(arg_value_with_id)) {
replaced_names.push_back(arg_value);
} else {
replaced_names.push_back(arg_value_with_id);
}
}
in_var->clear_arguments();
for (size_t k = 0; k < replaced_names.size(); k++) {
in_var->add_arguments(replaced_names[k]);
}
}
var2id.clear();
for (auto out_var : correspond_node->outputs) {
var2id[out_var->Name()] = out_var->id();
}
// rename for the output variables of op inside subgraph
for (int i = 0; i < op->outputs_size(); i++) {
framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i);
std::vector<std::string> replaced_names;
for (int k = 0; k < out_var->arguments_size(); k++) {
std::string arg_value = out_var->arguments(k);
std::string arg_value_with_id =
arg_value + std::to_string(var2id[arg_value]);
if (output_names_with_id.count(arg_value_with_id)) {
output_name_map[arg_value] = arg_value_with_id;
}
replaced_names.push_back(arg_value_with_id);
}
out_var->clear_arguments();
for (size_t k = 0; k < replaced_names.size(); k++) {
out_var->add_arguments(replaced_names[k]);
}
}
}
// When anakin engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
// to Tensor.
std::vector<std::string> output_mapping;
for (auto name : output_names) {
PADDLE_ENFORCE(output_name_map.count(name) != 0);
output_mapping.push_back(output_name_map[name]);
}
auto *vars = block_desc.Proto()->mutable_vars();
for (framework::ir::Node *node : graph->Nodes()) {
if (node->IsVar() && node->Var()) {
*vars->Add() = *node->Var()->Proto();
}
}
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(),
"the block has no var-desc");
PADDLE_ENFORCE(!output_mapping.empty());
op_desc->SetBlockAttr("sub_block", new_block);
SetAttr(op_desc->Proto(), "subgraph",
block_desc.Proto()->SerializeAsString());
// Set attrs
SetAttr(op_desc->Proto(), "parameters",
ExtractAnakinParameters(graph->Nodes()));
SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping);
auto engine_key =
GenerateAnakinEngineKey(input_names_with_id, output_names_with_id);
SetAttr(op_desc->Proto(), "engine_key", engine_key);
}
std::vector<std::string> ExtractAnakinParameters(
const std::unordered_set<Node *> &nodes) {
// We can judge whether a variable is a parameter by
// its presistable property, but sometimes the presistable
// of the feed op output is true, so we have to identify it.
std::vector<std::string> feed_outputs;
for (const auto &node : nodes) {
if (!node->IsOp()) continue;
std::string op_type = node->Op()->Type();
if (op_type == "feed") {
std::vector<std::string> output_names = node->Op()->OutputArgumentNames();
std::copy(output_names.begin(), output_names.end(),
std::back_inserter(feed_outputs));
}
}
std::vector<std::string> parameters;
for (const auto &node : nodes) {
if (!node->IsVar()) continue;
if (node->Var()->Persistable() &&
std::find(feed_outputs.begin(), feed_outputs.end(), node->Name()) ==
feed_outputs.end()) {
parameters.push_back(node->Name());
}
}
return parameters;
}
} // namespace analysis
} // namespace inference
} // namespace paddle
REGISTER_PASS(anakin_subgraph_pass,
paddle::inference::analysis::AnakinSubgraphPass);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace inference {
namespace analysis {
class AnakinSubgraphPass : public framework::ir::FusePassBase {
public:
std::unique_ptr<framework::ir::Graph> ApplyImpl(
std::unique_ptr<framework::ir::Graph> graph) const override;
private:
void CreateAnakinOp(framework::ir::Node *x,
framework::ir::Graph *graph) const;
void CleanIntermediateOutputs(framework::ir::Node *node);
};
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" #include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility> #include <utility>
#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...@@ -418,7 +420,7 @@ void SubGraphFuser::ReplaceNodesWithSubGraphs() { ...@@ -418,7 +420,7 @@ void SubGraphFuser::ReplaceNodesWithSubGraphs() {
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph // Node that contains this subgraph 2. Mark the nodes inside the sub-graph
// as deleted. 3. Replace the deleted node with the new Block Node. // as deleted. 3. Replace the deleted node with the new Block Node.
framework::OpDesc empty_desc; framework::OpDesc empty_desc;
empty_desc.SetType("tensorrt_engine"); empty_desc.SetType("anakin_engine");
auto *block_node = graph_->CreateOpNode(&empty_desc); auto *block_node = graph_->CreateOpNode(&empty_desc);
Agent(block_node).set_subgraph({}); Agent(block_node).set_subgraph({});
auto io = ExtractInputAndOutputOfSubGraph(subgraph); auto io = ExtractInputAndOutputOfSubGraph(subgraph);
......
...@@ -30,6 +30,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -30,6 +30,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// The parameters are on the cpu, therefore, synchronization is not necessary. // The parameters are on the cpu, therefore, synchronization is not necessary.
if (!argument->use_gpu()) return; if (!argument->use_gpu()) return;
return;
auto &graph = argument->main_graph(); auto &graph = argument->main_graph();
std::vector<std::string> repetitive_params; std::vector<std::string> repetitive_params;
......
...@@ -27,6 +27,10 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -27,6 +27,10 @@ if(WITH_GPU AND TENSORRT_FOUND)
set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter) set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter)
endif() endif()
if (WITH_ANAKIN_SUBGRAPH)
set(inference_deps ${inference_deps} anakin_op_converter anakin_engine)
endif()
add_subdirectory(details) add_subdirectory(details)
cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder) cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder)
......
...@@ -43,6 +43,8 @@ ...@@ -43,6 +43,8 @@
#endif #endif
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
DECLARE_bool(profile); DECLARE_bool(profile);
namespace paddle { namespace paddle {
...@@ -805,3 +807,14 @@ USE_TRT_CONVERTER(prelu); ...@@ -805,3 +807,14 @@ USE_TRT_CONVERTER(prelu);
USE_TRT_CONVERTER(conv2d_transpose); USE_TRT_CONVERTER(conv2d_transpose);
USE_TRT_CONVERTER(leaky_relu); USE_TRT_CONVERTER(leaky_relu);
#endif #endif
USE_ANAKIN_CONVERTER(fc);
USE_ANAKIN_CONVERTER(conv2d);
USE_ANAKIN_CONVERTER(concat);
USE_ANAKIN_CONVERTER(split);
USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh);
USE_ANAKIN_CONVERTER(pool2d);
USE_ANAKIN_CONVERTER(conv2d_fusion);
USE_ANAKIN_CONVERTER(elementwise_add);
...@@ -34,6 +34,10 @@ if (WITH_GPU AND TENSORRT_FOUND) ...@@ -34,6 +34,10 @@ if (WITH_GPU AND TENSORRT_FOUND)
add_subdirectory(tensorrt) add_subdirectory(tensorrt)
endif() endif()
if (WITH_ANAKIN_SUBGRAPH)
add_subdirectory(anakin)
endif()
SET(OP_HEADER_DEPS xxhash) SET(OP_HEADER_DEPS xxhash)
if (WITH_GPU) if (WITH_GPU)
SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub)
......
op_library(anakin_engine_op DEPS anakin_engine anakin_op_converter)
# file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(anakin_engine);\n")
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <string>
#include <vector>
#include "paddle/fluid/operators/anakin/anakin_engine_op.h"
namespace paddle {
namespace operators {
class AnakinEngineOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Xs", "A list of inputs.").AsDuplicable();
AddOutput("Ys", "A list of outputs").AsDuplicable();
AddAttr<std::string>("subgraph", "the subgraph.");
AddAttr<std::string>(
"engine_key",
"The engine_key here is used to distinguish different TRT Engines");
AddAttr<framework::BlockDesc *>("sub_block", "the trt block");
AddComment("Anakin engine operator.");
}
};
class AnakinEngineInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(anakin_engine, ops::AnakinEngineOp, ops::AnakinEngineOpMaker,
ops::AnakinEngineOpMaker);
#endif // PADDLE_WITH_CUDA
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace paddle {
namespace operators {
using FluidDT = framework::proto::VarType_Type;
using inference::Singleton;
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
using inference::anakin::AnakinEngine;
class AnakinEngineOp : public framework::OperatorBase {
using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
private:
std::vector<std::string> input_names_;
std::unordered_set<std::string> param_names_;
mutable std::unique_ptr<AnakinNvEngineT> anakin_engine_;
std::string engine_key_;
public:
AnakinEngineOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {
input_names_ = Inputs("Xs");
engine_key_ = Attr<std::string>("engine_key");
auto params = Attr<std::vector<std::string>>("parameters");
for (const auto &param : params) {
param_names_.insert(param);
}
}
protected:
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
RunAnakin(scope, dev_place);
}
void RunAnakin(const framework::Scope &scope,
const platform::Place &dev_place) const {
if (anakin_engine_.get() == nullptr) {
anakin_engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
Prepare(scope, dev_place, anakin_engine_.get());
}
auto *engine = anakin_engine_.get();
PADDLE_ENFORCE(!input_names_.empty(), "should pass more than one inputs");
std::vector<std::string> output_maps =
Attr<std::vector<std::string>>("output_name_mapping");
std::map<std::string, framework::LoDTensor *> inputs;
// Convert input tensor from fluid to engine.
for (const auto &x : Inputs("Xs")) {
if (param_names_.count(x)) continue;
auto &t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
auto t_shape = framework::vectorize(t.dims());
inputs.insert({x, &t});
}
std::map<std::string, framework::LoDTensor *> outputs;
int output_index = 0;
for (const auto &y : Outputs("Ys")) {
std::vector<int> ddim =
engine->Net()->get_out(output_maps[output_index])->valid_shape();
// we need get the output anakin output shape.
auto *fluid_v = scope.FindVar(y);
PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y);
auto *fluid_t = fluid_v->GetMutable<framework::LoDTensor>();
fluid_t->Resize(framework::make_ddim(ddim));
fluid_t->mutable_data<float>(boost::get<platform::CUDAPlace>(dev_place));
outputs.insert({output_maps[output_index], fluid_t});
output_index += 1;
}
engine->Execute(inputs, outputs);
}
void Prepare(const framework::Scope &scope, const platform::Place &dev_place,
AnakinNvEngineT *engine) const {
LOG(INFO) << "Prepare Anakin engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time.";
framework::proto::BlockDesc block_desc;
block_desc.ParseFromString(Attr<std::string>("subgraph"));
std::vector<std::string> output_maps =
Attr<std::vector<std::string>>("output_name_mapping");
inference::Singleton<inference::anakin::AnakinOpConverter>::Global()
.ConvertBlock(block_desc, param_names_, scope, engine);
engine->Freeze();
engine->Optimize();
for (const auto &x : Inputs("Xs")) {
if (param_names_.count(x)) continue;
auto &t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
auto t_shape = framework::vectorize2int(t.dims());
// all input shape should be 4 dims
if (t_shape.size() == 2) {
t_shape.push_back(1);
t_shape.push_back(1);
}
engine->SetInputShape(x, t_shape);
}
engine->InitGraph();
}
};
} // namespace operators
} // namespace paddle
#endif // PADDLE_WITH_CUDA
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册