提交 3785aab8 编写于 作者: J jackzhang235

support mlu set_input_layout

上级 12a1a095
...@@ -203,23 +203,22 @@ void ConfigBase::set_threads(int threads) { ...@@ -203,23 +203,22 @@ void ConfigBase::set_threads(int threads) {
#endif #endif
} }
void CxxConfig::mlu_set_mlu_core_version( void CxxConfig::set_mlu_core_version(lite_api::MLUCoreVersion core_version) {
lite_api::MLUCoreVersion core_version) {
mlu_core_version_ = core_version; mlu_core_version_ = core_version;
} }
void CxxConfig::mlu_set_mlu_core_number(int core_number) { void CxxConfig::set_mlu_core_number(int core_number) {
mlu_core_number_ = core_number; mlu_core_number_ = core_number;
} }
void CxxConfig::mlu_set_input_layout()(DataLayoutType layout) { void CxxConfig::set_mlu_input_layout(DataLayoutType layout) {
mlu_input_layout_ = layout; mlu_input_layout_ = layout;
} }
void CxxConfig::mlu_set_use_first_conv(bool use_first_conv) { void CxxConfig::set_mlu_use_first_conv(bool use_first_conv) {
mlu_use_first_conv_ = use_first_conv; mlu_use_first_conv_ = use_first_conv;
} }
void CxxConfig::mlu_set_first_conv_mean(const std::vector<float> &mean) { void CxxConfig::set_mlu_first_conv_mean(const std::vector<float> &mean) {
mlu_first_conv_mean_ = mean; mlu_first_conv_mean_ = mean;
} }
void CxxConfig::mlu_set_first_conv_std(const std::vector<float> &std) { void CxxConfig::set_mlu_first_conv_std(const std::vector<float> &std) {
mlu_first_conv_std_ = std; mlu_first_conv_std_ = std;
} }
lite_api::MLUCoreVersion CxxConfig::mlu_core_version() const { lite_api::MLUCoreVersion CxxConfig::mlu_core_version() const {
......
...@@ -136,7 +136,6 @@ class LITE_API CxxConfig : public ConfigBase { ...@@ -136,7 +136,6 @@ class LITE_API CxxConfig : public ConfigBase {
#ifdef LITE_WITH_X86 #ifdef LITE_WITH_X86
int x86_math_library_math_threads_ = 1; int x86_math_library_math_threads_ = 1;
#endif #endif
lite_api::MLUCoreVersion mlu_core_version_{lite_api::MLUCoreVersion::MLU_270}; lite_api::MLUCoreVersion mlu_core_version_{lite_api::MLUCoreVersion::MLU_270};
int mlu_core_number_{1}; int mlu_core_number_{1};
DataLayoutType mlu_input_layout_{DATALAYOUT(kNCHW)}; DataLayoutType mlu_input_layout_{DATALAYOUT(kNCHW)};
...@@ -171,12 +170,12 @@ class LITE_API CxxConfig : public ConfigBase { ...@@ -171,12 +170,12 @@ class LITE_API CxxConfig : public ConfigBase {
} }
#endif #endif
void mlu_set_mlu_core_version(lite_api::MLUCoreVersion core_version); void set_mlu_core_version(lite_api::MLUCoreVersion core_version);
void mlu_set_mlu_core_number(int core_number); void set_mlu_core_number(int core_number);
void mlu_set_input_layout()(DataLayoutType layout); void set_mlu_input_layout(DataLayoutType layout);
void mlu_set_use_first_conv(bool use_first_conv); void set_mlu_use_first_conv(bool use_first_conv);
void mlu_set_first_conv_mean(const std::vector<float>& mean); void set_mlu_first_conv_mean(const std::vector<float>& mean);
void mlu_set_first_conv_std(const std::vector<float>& std); void set_mlu_first_conv_std(const std::vector<float>& std);
lite_api::MLUCoreVersion mlu_core_version() const; lite_api::MLUCoreVersion mlu_core_version() const;
int mlu_core_number() const; int mlu_core_number() const;
......
...@@ -128,11 +128,12 @@ void BindLiteCxxConfig(py::module *m) { ...@@ -128,11 +128,12 @@ void BindLiteCxxConfig(py::module *m) {
.def("power_mode", &CxxConfig::power_mode); .def("power_mode", &CxxConfig::power_mode);
#endif #endif
#ifdef LITE_WITH_MLU #ifdef LITE_WITH_MLU
cxx_config.def("set_use_firstconv", &CxxConfig::set_use_firstconv) cxx_config.def("set_mlu_core_version", &CxxConfig::set_mlu_core_version)
.def("set_mean", &CxxConfig::set_mean) .def("set_mlu_core_number", &CxxConfig::set_mlu_core_number)
.def("set_std", &CxxConfig::set_std) .def("set_mlu_input_layout", &CxxConfig::set_mlu_input_layout)
.def("set_mlu_core_version", &CxxConfig::set_mlu_core_version) .def("set_mlu_use_first_conv", &CxxConfig::set_mlu_use_first_conv)
.def("set_mlu_core_number", &CxxConfig::set_mlu_core_number); .def("set_mlu_first_conv_mean", &CxxConfig::set_mlu_first_conv_mean)
.def("set_mlu_first_conv_std", &CxxConfig::set_mlu_first_conv_std);
#endif #endif
} }
......
...@@ -72,6 +72,7 @@ thread_local int DeviceInfo::mlu_core_number_{1}; ...@@ -72,6 +72,7 @@ thread_local int DeviceInfo::mlu_core_number_{1};
thread_local bool DeviceInfo::use_first_conv_{false}; thread_local bool DeviceInfo::use_first_conv_{false};
thread_local std::vector<float> DeviceInfo::mean_vec_; thread_local std::vector<float> DeviceInfo::mean_vec_;
thread_local std::vector<float> DeviceInfo::std_vec_; thread_local std::vector<float> DeviceInfo::std_vec_;
thread_local DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)};
#endif #endif
#ifdef TARGET_IOS #ifdef TARGET_IOS
...@@ -1123,7 +1124,7 @@ const std::vector<float>& DeviceInfo::MeanVec() const { return mean_vec_; } ...@@ -1123,7 +1124,7 @@ const std::vector<float>& DeviceInfo::MeanVec() const { return mean_vec_; }
const std::vector<float>& DeviceInfo::StdVec() const { return std_vec_; } const std::vector<float>& DeviceInfo::StdVec() const { return std_vec_; }
const DataLayoutType InputLayout() const { return input_layout_; } DataLayoutType DeviceInfo::InputLayout() const { return input_layout_; }
#endif // LITE_WITH_MLU #endif // LITE_WITH_MLU
......
...@@ -67,7 +67,7 @@ class DeviceInfo { ...@@ -67,7 +67,7 @@ class DeviceInfo {
bool UseFirstConv(); bool UseFirstConv();
const std::vector<float>& MeanVec() const; const std::vector<float>& MeanVec() const;
const std::vector<float>& StdVec() const; const std::vector<float>& StdVec() const;
const DataLayoutType InputLayout() const; DataLayoutType InputLayout() const;
#endif #endif
void SetCache(int l1size, int l2size, int l3size); void SetCache(int l1size, int l2size, int l3size);
void SetArch(ARMArch arch) { arch_ = arch; } void SetArch(ARMArch arch) { arch_ = arch; }
......
...@@ -74,7 +74,9 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type, ...@@ -74,7 +74,9 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
const Type* in_arg_ty = kernel->GetInputDeclType("Input"); const Type* in_arg_ty = kernel->GetInputDeclType("Input");
const Type* out_arg_ty = kernel->GetOutputDeclType("Out"); const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
if (DataLayoutCompatible(*in_arg_ty, *cur_node->AsArg().type) && if (DataLayoutCompatible(*in_arg_ty, *cur_node->AsArg().type) &&
DataLayoutCompatible(*out_arg_ty, *cast_type)) { DataLayoutCompatible(*out_arg_ty, *cast_type) &&
// for first conv
PrecisionCompatibleTo(*in_arg_ty, *cur_node->AsArg().type)) {
is_found = true; is_found = true;
} }
} else if (op_type == "io_copy") { } else if (op_type == "io_copy") {
...@@ -121,7 +123,7 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type, ...@@ -121,7 +123,7 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
cast_arg->AsArg().type = cast_type; cast_arg->AsArg().type = cast_type;
auto* var = inst_node->AsStmt().op()->scope()->Var(cast_arg_name); auto* var = inst_node->AsStmt().op()->scope()->Var(cast_arg_name);
// for CastAfter manully set the tensor's type // for CastAfter manully set the tensor's type
var->GetMutable<::paddle::lite::Tensor>(); var->GetMutable<paddle::lite::Tensor>();
// create the stmt node // create the stmt node
auto* cast_inst = graph->NewInstructNode(); auto* cast_inst = graph->NewInstructNode();
...@@ -281,7 +283,7 @@ void MLUPostprocessPass::GetSubgraphOpArgType(Node* inst_node, ...@@ -281,7 +283,7 @@ void MLUPostprocessPass::GetSubgraphOpArgType(Node* inst_node,
// get subgraph's valid precision // get subgraph's valid precision
const auto& places = graph->valid_places(); const auto& places = graph->valid_places();
std::set<::paddle::lite_api::PrecisionType> prec_set; std::set<paddle::lite_api::PrecisionType> prec_set;
for (const auto& place : places) { for (const auto& place : places) {
if (place.target == TARGET(kMLU)) { if (place.target == TARGET(kMLU)) {
prec_set.insert(place.precision); prec_set.insert(place.precision);
...@@ -474,13 +476,20 @@ bool MLUPostprocessPass::IsFirstConvNode(Node* arg_node) { ...@@ -474,13 +476,20 @@ bool MLUPostprocessPass::IsFirstConvNode(Node* arg_node) {
return false; return false;
} }
void MLUPostprocessPass::GatherFirstConvNodes(SSAGraph* graph) { void MLUPostprocessPass::GatherAndModifyFirstConvNodes(SSAGraph* graph) {
for (auto& node : graph->mutable_nodes()) { for (auto& node : graph->mutable_nodes()) {
if (!node.IsStmt()) continue; if (!node.IsStmt()) continue;
if (node.AsStmt().op_type() == "feed") { if (node.AsStmt().op_type() == "feed") {
for (auto& out : node.outlinks) { for (auto& out : node.outlinks) {
if (IsFirstConvNode(out)) { if (IsFirstConvNode(out)) {
first_conv_nodes_.insert(out->AsArg().name); first_conv_nodes_.insert(out->AsArg().name);
// modify first conv nodes' type
const auto* old_type = out->AsArg().type;
out->AsArg().type =
LiteType::GetTensorTy(old_type->target(),
paddle::lite_api::PrecisionType::kInt8,
old_type->layout(),
old_type->device());
} }
} }
} }
...@@ -504,7 +513,7 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) { ...@@ -504,7 +513,7 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) {
out->AsArg().type = out->AsArg().type =
LiteType::GetTensorTy(old_type->target(), LiteType::GetTensorTy(old_type->target(),
old_type->precision(), old_type->precision(),
::paddle::lite_api::DataLayoutType::kNHWC, paddle::lite_api::DataLayoutType::kNHWC,
old_type->device()); old_type->device());
} }
} }
...@@ -523,7 +532,7 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) { ...@@ -523,7 +532,7 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) {
inp->AsArg().type = inp->AsArg().type =
LiteType::GetTensorTy(old_type->target(), LiteType::GetTensorTy(old_type->target(),
old_type->precision(), old_type->precision(),
::paddle::lite_api::DataLayoutType::kNHWC, paddle::lite_api::DataLayoutType::kNHWC,
old_type->device()); old_type->device());
} }
} }
...@@ -539,12 +548,12 @@ void MLUPostprocessPass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -539,12 +548,12 @@ void MLUPostprocessPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
// 1: feed->arg_in->subgraph->... 2: ...->subgraph->arg_out->fetch; // 1: feed->arg_in->subgraph->... 2: ...->subgraph->arg_out->fetch;
// arg_in and arg_out are assumed to be NHWC which user should be aware of. // arg_in and arg_out are assumed to be NHWC which user should be aware of.
// Thus here we change these args' layout to NHWC // Thus here we change these args' layout to NHWC
if (lite::DeviceInfo::Global().InputLayout() == DATALAYOUT(kNHWC) { if (lite::DeviceInfo::Global().InputLayout() == DATALAYOUT(kNHWC)) {
ModifyLayout(graph.get()); ModifyLayout(graph.get());
} }
if (lite::DeviceInfo::Global().UseFirstConv()) { if (lite::DeviceInfo::Global().UseFirstConv()) {
GatherFirstConvNodes(graph.get()); GatherAndModifyFirstConvNodes(graph.get());
} }
// insert io_copy, layout and precision cast of subgraph's inputs and outputs // insert io_copy, layout and precision cast of subgraph's inputs and outputs
......
...@@ -109,7 +109,7 @@ class MLUPostprocessPass : public ProgramPass { ...@@ -109,7 +109,7 @@ class MLUPostprocessPass : public ProgramPass {
void RecreateOp(Node* inst_node, SSAGraph* graph); void RecreateOp(Node* inst_node, SSAGraph* graph);
void GatherFirstConvNodes(SSAGraph* graph); void GatherAndModifyFirstConvNodes(SSAGraph* graph);
bool IsFirstConvNode(Node* arg_node); bool IsFirstConvNode(Node* arg_node);
......
...@@ -84,7 +84,7 @@ struct FPTypeTraits<paddle::lite_api::PrecisionType::kFloat> { ...@@ -84,7 +84,7 @@ struct FPTypeTraits<paddle::lite_api::PrecisionType::kFloat> {
template <> template <>
struct FPTypeTraits<paddle::lite_api::PrecisionType::kFP16> { struct FPTypeTraits<paddle::lite_api::PrecisionType::kFP16> {
typedef ::paddle::lite::fluid::float16 T; typedef paddle::lite::fluid::float16 T;
}; };
} // namespace mlu } // namespace mlu
......
...@@ -89,3 +89,20 @@ REGISTER_LITE_KERNEL( ...@@ -89,3 +89,20 @@ REGISTER_LITE_KERNEL(
PRECISION(kFloat), PRECISION(kFloat),
DATALAYOUT(kNHWC))}) DATALAYOUT(kNHWC))})
.Finalize(); .Finalize();
REGISTER_LITE_KERNEL(
layout,
kMLU,
kInt8,
kNHWC,
paddle::lite::kernels::mlu::LayoutNchwToNhwcCompute<PRECISION(kInt8)>,
def_layout_nchw2nhwc_fp32_int8)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt8),
DATALAYOUT(kNCHW))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt8),
DATALAYOUT(kNHWC))})
.Finalize();
...@@ -29,6 +29,24 @@ namespace lite { ...@@ -29,6 +29,24 @@ namespace lite {
namespace kernels { namespace kernels {
namespace mlu { namespace mlu {
template <paddle::lite_api::PrecisionType>
struct FPTypeTraits {};
template <>
struct FPTypeTraits<paddle::lite_api::PrecisionType::kFloat> {
typedef float T;
};
template <>
struct FPTypeTraits<paddle::lite_api::PrecisionType::kFP16> {
typedef paddle::lite::fluid::float16 T;
};
template <>
struct FPTypeTraits<paddle::lite_api::PrecisionType::kInt8> {
typedef int8_t T;
};
template <lite::TargetType Target, typename T> template <lite::TargetType Target, typename T>
inline void LayoutTransCompute(const int dim, inline void LayoutTransCompute(const int dim,
const lite::Context<Target>& context, const lite::Context<Target>& context,
...@@ -63,7 +81,7 @@ class LayoutNchwToNhwcCompute ...@@ -63,7 +81,7 @@ class LayoutNchwToNhwcCompute
auto& param = this->template Param<param_t>(); auto& param = this->template Param<param_t>();
auto* x = param.x; auto* x = param.x;
auto* out = param.y; auto* out = param.y;
out->template mutable_data<float>(); out->template mutable_data<typename FPTypeTraits<Precision>::T>();
auto x_dims = param.x->dims().size(); auto x_dims = param.x->dims().size();
auto& context = this->ctx_->template As<X86Context>(); auto& context = this->ctx_->template As<X86Context>();
...@@ -88,7 +106,8 @@ class LayoutNchwToNhwcCompute ...@@ -88,7 +106,8 @@ class LayoutNchwToNhwcCompute
CHECK(0) << "Unsupport dim in mlu layout nchw to nhwc"; CHECK(0) << "Unsupport dim in mlu layout nchw to nhwc";
} }
LayoutTransCompute<lite::TargetType::kX86, float>( LayoutTransCompute<lite::TargetType::kX86,
typename FPTypeTraits<Precision>::T>(
x_dims, context, *x, out, axis); x_dims, context, *x, out, axis);
if (x_dims > 2) { if (x_dims > 2) {
...@@ -111,7 +130,7 @@ class LayoutNhwcToNchwCompute ...@@ -111,7 +130,7 @@ class LayoutNhwcToNchwCompute
auto& param = this->template Param<param_t>(); auto& param = this->template Param<param_t>();
auto* x = param.x; auto* x = param.x;
auto* out = param.y; auto* out = param.y;
out->template mutable_data<float>(); out->template mutable_data<typename FPTypeTraits<Precision>::T>();
auto x_dims = param.x->dims().size(); auto x_dims = param.x->dims().size();
auto& context = this->ctx_->template As<X86Context>(); auto& context = this->ctx_->template As<X86Context>();
...@@ -136,7 +155,8 @@ class LayoutNhwcToNchwCompute ...@@ -136,7 +155,8 @@ class LayoutNhwcToNchwCompute
CHECK(0) << "Unsupport dim in mlu layout nhwc to nchw"; CHECK(0) << "Unsupport dim in mlu layout nhwc to nchw";
} }
LayoutTransCompute<lite::TargetType::kX86, float>( LayoutTransCompute<lite::TargetType::kX86,
typename FPTypeTraits<Precision>::T>(
x_dims, context, *x, out, axis); x_dims, context, *x, out, axis);
if (x_dims > 2) { if (x_dims > 2) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册