未验证 提交 79c54557 编写于 作者: Y ysh329 提交者: GitHub

[ARM] add support ttfnet. test=develop (#3876)

* fix ttfnet miscs. test=develop
上级 486d6572
...@@ -1254,6 +1254,19 @@ void elementwise_max_relu_broadcast<float>(const float* dinx, ...@@ -1254,6 +1254,19 @@ void elementwise_max_relu_broadcast<float>(const float* dinx,
} }
} }
template <>
void elementwise_div<int64_t>(const int64_t* dinx,
const int64_t* diny,
int64_t* dout,
int num) {
for (int i = 0; i < num; i++) {
*dout = *dinx / *diny;
dout++;
dinx++;
diny++;
}
}
template <> template <>
void elementwise_div<float>(const float* dinx, void elementwise_div<float>(const float* dinx,
const float* diny, const float* diny,
...@@ -1306,6 +1319,28 @@ void elementwise_div<float>(const float* dinx, ...@@ -1306,6 +1319,28 @@ void elementwise_div<float>(const float* dinx,
} }
} }
template <>
void elementwise_div_broadcast<int64_t>(const int64_t* dinx,
const int64_t* diny,
int64_t* dout,
int batch,
int channels,
int num) {
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num;
const int64_t* din_ptr = dinx + offset;
const int64_t diny_data = diny[j];
int64_t* dout_ptr = dout + offset;
for (int p = 0; p < num; p++) {
*dout_ptr = *din_ptr / diny_data;
dout_ptr++;
din_ptr++;
}
}
}
}
template <> template <>
void elementwise_div_broadcast<float>(const float* dinx, void elementwise_div_broadcast<float>(const float* dinx,
const float* diny, const float* diny,
......
...@@ -33,6 +33,17 @@ void CalibComputeFp32ToInt8<DLType>::Run() { ...@@ -33,6 +33,17 @@ void CalibComputeFp32ToInt8<DLType>::Run() {
din, dout, scale.data(), 1, 1, param.input->numel()); din, dout, scale.data(), 1, 1, param.input->numel());
} }
template <DataLayoutType DLType>
void CalibComputeInt64ToInt32<DLType>::Run() {
auto& param = this->template Param<operators::CalibParam>();
const auto* din = param.input->template data<int64_t>();
std::vector<float> scale = {param.scale};
auto* dout = param.output->template mutable_data<int32_t>();
for (auto i = 0; i < param.input->numel(); ++i) {
dout[i] = din[i];
}
}
template <DataLayoutType DLType> template <DataLayoutType DLType>
void CalibComputeInt8ToFp32<DLType>::Run() { void CalibComputeInt8ToFp32<DLType>::Run() {
auto& param = this->template Param<operators::CalibParam>(); auto& param = this->template Param<operators::CalibParam>();
...@@ -105,6 +116,23 @@ REGISTER_LITE_KERNEL( ...@@ -105,6 +116,23 @@ REGISTER_LITE_KERNEL(
DATALAYOUT(kNHWC))}) DATALAYOUT(kNHWC))})
.Finalize(); .Finalize();
REGISTER_LITE_KERNEL(
calib,
kARM,
kInt64,
kNCHW,
paddle::lite::kernels::arm::CalibComputeInt64ToInt32<DATALAYOUT(kNCHW)>,
int64_to_int32)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kInt64),
DATALAYOUT(kNCHW))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kInt32),
DATALAYOUT(kNCHW))})
.Finalize();
REGISTER_LITE_KERNEL( REGISTER_LITE_KERNEL(
calib_once, calib_once,
kARM, kARM,
...@@ -161,3 +189,20 @@ REGISTER_LITE_KERNEL( ...@@ -161,3 +189,20 @@ REGISTER_LITE_KERNEL(
PRECISION(kFloat), PRECISION(kFloat),
DATALAYOUT(kNHWC))}) DATALAYOUT(kNHWC))})
.Finalize(); .Finalize();
REGISTER_LITE_KERNEL(
calib_once,
kARM,
kInt64,
kNCHW,
paddle::lite::kernels::arm::CalibComputeInt64ToInt32<DATALAYOUT(kNCHW)>,
int64_to_int32)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kInt64),
DATALAYOUT(kNCHW))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kInt32),
DATALAYOUT(kNCHW))})
.Finalize();
...@@ -34,6 +34,19 @@ class CalibComputeFp32ToInt8 ...@@ -34,6 +34,19 @@ class CalibComputeFp32ToInt8
private: private:
}; };
template <DataLayoutType DLType>
class CalibComputeInt64ToInt32
: public KernelLite<TARGET(kARM), PRECISION(kInt64), DLType> {
public:
using param_t = operators::CalibParam;
void Run() override;
~CalibComputeInt64ToInt32() override{};
private:
};
template <DataLayoutType DLType> template <DataLayoutType DLType>
class CalibComputeInt8ToFp32 class CalibComputeInt8ToFp32
: public KernelLite<TARGET(kARM), PRECISION(kInt8), DLType> { : public KernelLite<TARGET(kARM), PRECISION(kInt8), DLType> {
......
...@@ -62,8 +62,19 @@ void CastCompute::Run() { ...@@ -62,8 +62,19 @@ void CastCompute::Run() {
int32_t* out_data = param.Out->mutable_data<int32_t>(); int32_t* out_data = param.Out->mutable_data<int32_t>();
std::transform( std::transform(
x_data_begin, x_data_end, out_data, TransOp<int64_t, int32_t>); x_data_begin, x_data_end, out_data, TransOp<int64_t, int32_t>);
} else if (param.in_dtype == 0 && param.out_dtype == 5) { // bool->fp32
const bool* x_data_begin = param.X->data<bool>();
const bool* x_data_end = x_data_begin + param.X->numel();
float* out_data = param.Out->mutable_data<float>();
std::transform(x_data_begin, x_data_end, out_data, TransOp<bool, float>);
} else if (param.in_dtype == 3 && param.out_dtype == 5) { // int64->fp32
const int64_t* x_data_begin = param.X->data<int64_t>();
const int64_t* x_data_end = x_data_begin + param.X->numel();
float* out_data = param.Out->mutable_data<float>();
std::transform(x_data_begin, x_data_end, out_data, TransOp<int64_t, float>);
} else { } else {
LOG(FATAL) << "other has not been implemented"; LOG(FATAL) << "other has not been implemented transform with dtype"
<< param.in_dtype << " X, dtype" << param.out_dtype << " Out";
} }
} }
......
...@@ -300,11 +300,12 @@ void ElementwiseMaxActivationCompute::Run() { ...@@ -300,11 +300,12 @@ void ElementwiseMaxActivationCompute::Run() {
} }
} }
void ElementwiseDivCompute::Run() { template <typename T, PrecisionType PType>
auto& param = Param<operators::ElementwiseParam>(); void ElementwiseDivCompute<T, PType>::Run() {
const float* x_data = param.X->data<float>(); auto& param = this->template Param<operators::ElementwiseParam>();
const float* y_data = param.Y->data<float>(); auto* x_data = param.X->template data<T>();
float* out_data = param.Out->mutable_data<float>(); auto* y_data = param.Y->template data<T>();
auto* out_data = param.Out->template mutable_data<T>();
int axis = param.axis; int axis = param.axis;
auto x_dims = param.X->dims(); auto x_dims = param.X->dims();
auto y_dims = param.Y->dims(); auto y_dims = param.Y->dims();
...@@ -313,10 +314,10 @@ void ElementwiseDivCompute::Run() { ...@@ -313,10 +314,10 @@ void ElementwiseDivCompute::Run() {
LOG(FATAL) << "elewise div don't support x_dims size < y_dims size"; LOG(FATAL) << "elewise div don't support x_dims size < y_dims size";
} }
if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) { if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) {
lite::arm::math::elementwise_div_broadcast( lite::arm::math::elementwise_div_broadcast<T>(
x_data, y_data, out_data, pre, n, post); x_data, y_data, out_data, pre, n, post);
} else { } else {
lite::arm::math::elementwise_div( lite::arm::math::elementwise_div<T>(
x_data, y_data, out_data, x_dims.production()); x_data, y_data, out_data, x_dims.production());
} }
} }
...@@ -488,17 +489,27 @@ REGISTER_LITE_KERNEL( ...@@ -488,17 +489,27 @@ REGISTER_LITE_KERNEL(
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize(); .Finalize();
REGISTER_LITE_KERNEL(elementwise_div, using elementwise_div_fp32 =
kARM, paddle::lite::kernels::arm::ElementwiseDivCompute<float, PRECISION(kFloat)>;
kFloat,
kNCHW, REGISTER_LITE_KERNEL(
paddle::lite::kernels::arm::ElementwiseDivCompute, elementwise_div, kARM, kFloat, kNCHW, elementwise_div_fp32, def)
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize(); .Finalize();
using elementwise_div_int64 =
paddle::lite::kernels::arm::ElementwiseDivCompute<int64_t,
PRECISION(kInt64)>;
REGISTER_LITE_KERNEL(
elementwise_div, kARM, kInt64, kNCHW, elementwise_div_int64, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.Finalize();
REGISTER_LITE_KERNEL( REGISTER_LITE_KERNEL(
fusion_elementwise_div_activation, fusion_elementwise_div_activation,
kARM, kARM,
......
...@@ -86,8 +86,8 @@ class ElementwiseMaxActivationCompute ...@@ -86,8 +86,8 @@ class ElementwiseMaxActivationCompute
virtual ~ElementwiseMaxActivationCompute() = default; virtual ~ElementwiseMaxActivationCompute() = default;
}; };
class ElementwiseDivCompute template <typename T, PrecisionType PType>
: public KernelLite<TARGET(kARM), PRECISION(kFloat)> { class ElementwiseDivCompute : public KernelLite<TARGET(kARM), PType> {
public: public:
void Run() override; void Run() override;
......
...@@ -73,7 +73,6 @@ void GatherCompute::Run() { ...@@ -73,7 +73,6 @@ void GatherCompute::Run() {
REGISTER_LITE_KERNEL( REGISTER_LITE_KERNEL(
gather, kARM, kAny, kNCHW, paddle::lite::kernels::arm::GatherCompute, def) gather, kARM, kAny, kNCHW, paddle::lite::kernels::arm::GatherCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))}) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.BindInput("Index", .BindInput("Index", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
{LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.Finalize(); .Finalize();
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册