提交 3785aab8 编写于 作者: J jackzhang235

support mlu set_input_layout

上级 12a1a095
......@@ -203,23 +203,22 @@ void ConfigBase::set_threads(int threads) {
#endif
}
void CxxConfig::mlu_set_mlu_core_version(
lite_api::MLUCoreVersion core_version) {
void CxxConfig::set_mlu_core_version(lite_api::MLUCoreVersion core_version) {
mlu_core_version_ = core_version;
}
void CxxConfig::mlu_set_mlu_core_number(int core_number) {
void CxxConfig::set_mlu_core_number(int core_number) {
mlu_core_number_ = core_number;
}
void CxxConfig::mlu_set_input_layout()(DataLayoutType layout) {
void CxxConfig::set_mlu_input_layout(DataLayoutType layout) {
mlu_input_layout_ = layout;
}
void CxxConfig::mlu_set_use_first_conv(bool use_first_conv) {
void CxxConfig::set_mlu_use_first_conv(bool use_first_conv) {
mlu_use_first_conv_ = use_first_conv;
}
void CxxConfig::mlu_set_first_conv_mean(const std::vector<float> &mean) {
void CxxConfig::set_mlu_first_conv_mean(const std::vector<float> &mean) {
mlu_first_conv_mean_ = mean;
}
void CxxConfig::mlu_set_first_conv_std(const std::vector<float> &std) {
void CxxConfig::set_mlu_first_conv_std(const std::vector<float> &std) {
mlu_first_conv_std_ = std;
}
lite_api::MLUCoreVersion CxxConfig::mlu_core_version() const {
......
......@@ -136,7 +136,6 @@ class LITE_API CxxConfig : public ConfigBase {
#ifdef LITE_WITH_X86
int x86_math_library_math_threads_ = 1;
#endif
lite_api::MLUCoreVersion mlu_core_version_{lite_api::MLUCoreVersion::MLU_270};
int mlu_core_number_{1};
DataLayoutType mlu_input_layout_{DATALAYOUT(kNCHW)};
......@@ -171,12 +170,12 @@ class LITE_API CxxConfig : public ConfigBase {
}
#endif
void mlu_set_mlu_core_version(lite_api::MLUCoreVersion core_version);
void mlu_set_mlu_core_number(int core_number);
void mlu_set_input_layout()(DataLayoutType layout);
void mlu_set_use_first_conv(bool use_first_conv);
void mlu_set_first_conv_mean(const std::vector<float>& mean);
void mlu_set_first_conv_std(const std::vector<float>& std);
void set_mlu_core_version(lite_api::MLUCoreVersion core_version);
void set_mlu_core_number(int core_number);
void set_mlu_input_layout(DataLayoutType layout);
void set_mlu_use_first_conv(bool use_first_conv);
void set_mlu_first_conv_mean(const std::vector<float>& mean);
void set_mlu_first_conv_std(const std::vector<float>& std);
lite_api::MLUCoreVersion mlu_core_version() const;
int mlu_core_number() const;
......
......@@ -128,11 +128,12 @@ void BindLiteCxxConfig(py::module *m) {
.def("power_mode", &CxxConfig::power_mode);
#endif
#ifdef LITE_WITH_MLU
cxx_config.def("set_use_firstconv", &CxxConfig::set_use_firstconv)
.def("set_mean", &CxxConfig::set_mean)
.def("set_std", &CxxConfig::set_std)
.def("set_mlu_core_version", &CxxConfig::set_mlu_core_version)
.def("set_mlu_core_number", &CxxConfig::set_mlu_core_number);
cxx_config.def("set_mlu_core_version", &CxxConfig::set_mlu_core_version)
.def("set_mlu_core_number", &CxxConfig::set_mlu_core_number)
.def("set_mlu_input_layout", &CxxConfig::set_mlu_input_layout)
.def("set_mlu_use_first_conv", &CxxConfig::set_mlu_use_first_conv)
.def("set_mlu_first_conv_mean", &CxxConfig::set_mlu_first_conv_mean)
.def("set_mlu_first_conv_std", &CxxConfig::set_mlu_first_conv_std);
#endif
}
......
......@@ -72,6 +72,7 @@ thread_local int DeviceInfo::mlu_core_number_{1};
thread_local bool DeviceInfo::use_first_conv_{false};
thread_local std::vector<float> DeviceInfo::mean_vec_;
thread_local std::vector<float> DeviceInfo::std_vec_;
thread_local DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)};
#endif
#ifdef TARGET_IOS
......@@ -1123,7 +1124,7 @@ const std::vector<float>& DeviceInfo::MeanVec() const { return mean_vec_; }
const std::vector<float>& DeviceInfo::StdVec() const { return std_vec_; }
const DataLayoutType InputLayout() const { return input_layout_; }
DataLayoutType DeviceInfo::InputLayout() const { return input_layout_; }
#endif // LITE_WITH_MLU
......
......@@ -67,7 +67,7 @@ class DeviceInfo {
bool UseFirstConv();
const std::vector<float>& MeanVec() const;
const std::vector<float>& StdVec() const;
const DataLayoutType InputLayout() const;
DataLayoutType InputLayout() const;
#endif
void SetCache(int l1size, int l2size, int l3size);
void SetArch(ARMArch arch) { arch_ = arch; }
......
......@@ -74,7 +74,9 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
const Type* in_arg_ty = kernel->GetInputDeclType("Input");
const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
if (DataLayoutCompatible(*in_arg_ty, *cur_node->AsArg().type) &&
DataLayoutCompatible(*out_arg_ty, *cast_type)) {
DataLayoutCompatible(*out_arg_ty, *cast_type) &&
// for first conv
PrecisionCompatibleTo(*in_arg_ty, *cur_node->AsArg().type)) {
is_found = true;
}
} else if (op_type == "io_copy") {
......@@ -121,7 +123,7 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
cast_arg->AsArg().type = cast_type;
auto* var = inst_node->AsStmt().op()->scope()->Var(cast_arg_name);
// for CastAfter manully set the tensor's type
var->GetMutable<::paddle::lite::Tensor>();
var->GetMutable<paddle::lite::Tensor>();
// create the stmt node
auto* cast_inst = graph->NewInstructNode();
......@@ -281,7 +283,7 @@ void MLUPostprocessPass::GetSubgraphOpArgType(Node* inst_node,
// get subgraph's valid precision
const auto& places = graph->valid_places();
std::set<::paddle::lite_api::PrecisionType> prec_set;
std::set<paddle::lite_api::PrecisionType> prec_set;
for (const auto& place : places) {
if (place.target == TARGET(kMLU)) {
prec_set.insert(place.precision);
......@@ -474,13 +476,20 @@ bool MLUPostprocessPass::IsFirstConvNode(Node* arg_node) {
return false;
}
void MLUPostprocessPass::GatherFirstConvNodes(SSAGraph* graph) {
void MLUPostprocessPass::GatherAndModifyFirstConvNodes(SSAGraph* graph) {
for (auto& node : graph->mutable_nodes()) {
if (!node.IsStmt()) continue;
if (node.AsStmt().op_type() == "feed") {
for (auto& out : node.outlinks) {
if (IsFirstConvNode(out)) {
first_conv_nodes_.insert(out->AsArg().name);
// modify first conv nodes' type
const auto* old_type = out->AsArg().type;
out->AsArg().type =
LiteType::GetTensorTy(old_type->target(),
paddle::lite_api::PrecisionType::kInt8,
old_type->layout(),
old_type->device());
}
}
}
......@@ -504,7 +513,7 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) {
out->AsArg().type =
LiteType::GetTensorTy(old_type->target(),
old_type->precision(),
::paddle::lite_api::DataLayoutType::kNHWC,
paddle::lite_api::DataLayoutType::kNHWC,
old_type->device());
}
}
......@@ -523,7 +532,7 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) {
inp->AsArg().type =
LiteType::GetTensorTy(old_type->target(),
old_type->precision(),
::paddle::lite_api::DataLayoutType::kNHWC,
paddle::lite_api::DataLayoutType::kNHWC,
old_type->device());
}
}
......@@ -539,12 +548,12 @@ void MLUPostprocessPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
// 1: feed->arg_in->subgraph->... 2: ...->subgraph->arg_out->fetch;
// arg_in and arg_out are assumed to be NHWC which user should be aware of.
// Thus here we change these args' layout to NHWC
if (lite::DeviceInfo::Global().InputLayout() == DATALAYOUT(kNHWC) {
if (lite::DeviceInfo::Global().InputLayout() == DATALAYOUT(kNHWC)) {
ModifyLayout(graph.get());
}
if (lite::DeviceInfo::Global().UseFirstConv()) {
GatherFirstConvNodes(graph.get());
GatherAndModifyFirstConvNodes(graph.get());
}
// insert io_copy, layout and precision cast of subgraph's inputs and outputs
......
......@@ -109,7 +109,7 @@ class MLUPostprocessPass : public ProgramPass {
void RecreateOp(Node* inst_node, SSAGraph* graph);
void GatherFirstConvNodes(SSAGraph* graph);
void GatherAndModifyFirstConvNodes(SSAGraph* graph);
bool IsFirstConvNode(Node* arg_node);
......
......@@ -84,7 +84,7 @@ struct FPTypeTraits<paddle::lite_api::PrecisionType::kFloat> {
template <>
struct FPTypeTraits<paddle::lite_api::PrecisionType::kFP16> {
typedef ::paddle::lite::fluid::float16 T;
typedef paddle::lite::fluid::float16 T;
};
} // namespace mlu
......
......@@ -89,3 +89,20 @@ REGISTER_LITE_KERNEL(
PRECISION(kFloat),
DATALAYOUT(kNHWC))})
.Finalize();
REGISTER_LITE_KERNEL(
layout,
kMLU,
kInt8,
kNHWC,
paddle::lite::kernels::mlu::LayoutNchwToNhwcCompute<PRECISION(kInt8)>,
def_layout_nchw2nhwc_fp32_int8)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt8),
DATALAYOUT(kNCHW))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt8),
DATALAYOUT(kNHWC))})
.Finalize();
......@@ -29,6 +29,24 @@ namespace lite {
namespace kernels {
namespace mlu {
template <paddle::lite_api::PrecisionType>
struct FPTypeTraits {};
template <>
struct FPTypeTraits<paddle::lite_api::PrecisionType::kFloat> {
typedef float T;
};
template <>
struct FPTypeTraits<paddle::lite_api::PrecisionType::kFP16> {
typedef paddle::lite::fluid::float16 T;
};
template <>
struct FPTypeTraits<paddle::lite_api::PrecisionType::kInt8> {
typedef int8_t T;
};
template <lite::TargetType Target, typename T>
inline void LayoutTransCompute(const int dim,
const lite::Context<Target>& context,
......@@ -63,7 +81,7 @@ class LayoutNchwToNhwcCompute
auto& param = this->template Param<param_t>();
auto* x = param.x;
auto* out = param.y;
out->template mutable_data<float>();
out->template mutable_data<typename FPTypeTraits<Precision>::T>();
auto x_dims = param.x->dims().size();
auto& context = this->ctx_->template As<X86Context>();
......@@ -88,7 +106,8 @@ class LayoutNchwToNhwcCompute
CHECK(0) << "Unsupport dim in mlu layout nchw to nhwc";
}
LayoutTransCompute<lite::TargetType::kX86, float>(
LayoutTransCompute<lite::TargetType::kX86,
typename FPTypeTraits<Precision>::T>(
x_dims, context, *x, out, axis);
if (x_dims > 2) {
......@@ -111,7 +130,7 @@ class LayoutNhwcToNchwCompute
auto& param = this->template Param<param_t>();
auto* x = param.x;
auto* out = param.y;
out->template mutable_data<float>();
out->template mutable_data<typename FPTypeTraits<Precision>::T>();
auto x_dims = param.x->dims().size();
auto& context = this->ctx_->template As<X86Context>();
......@@ -136,7 +155,8 @@ class LayoutNhwcToNchwCompute
CHECK(0) << "Unsupport dim in mlu layout nhwc to nchw";
}
LayoutTransCompute<lite::TargetType::kX86, float>(
LayoutTransCompute<lite::TargetType::kX86,
typename FPTypeTraits<Precision>::T>(
x_dims, context, *x, out, axis);
if (x_dims > 2) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册