未验证 提交 79c54557 编写于 作者: Y ysh329 提交者: GitHub

[ARM] add support ttfnet. test=develop (#3876)

* fix ttfnet miscs. test=develop
上级 486d6572
......@@ -1254,6 +1254,19 @@ void elementwise_max_relu_broadcast<float>(const float* dinx,
}
}
template <>
void elementwise_div<int64_t>(const int64_t* dinx,
const int64_t* diny,
int64_t* dout,
int num) {
for (int i = 0; i < num; i++) {
*dout = *dinx / *diny;
dout++;
dinx++;
diny++;
}
}
template <>
void elementwise_div<float>(const float* dinx,
const float* diny,
......@@ -1306,6 +1319,28 @@ void elementwise_div<float>(const float* dinx,
}
}
template <>
void elementwise_div_broadcast<int64_t>(const int64_t* dinx,
const int64_t* diny,
int64_t* dout,
int batch,
int channels,
int num) {
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num;
const int64_t* din_ptr = dinx + offset;
const int64_t diny_data = diny[j];
int64_t* dout_ptr = dout + offset;
for (int p = 0; p < num; p++) {
*dout_ptr = *din_ptr / diny_data;
dout_ptr++;
din_ptr++;
}
}
}
}
template <>
void elementwise_div_broadcast<float>(const float* dinx,
const float* diny,
......
......@@ -33,6 +33,17 @@ void CalibComputeFp32ToInt8<DLType>::Run() {
din, dout, scale.data(), 1, 1, param.input->numel());
}
template <DataLayoutType DLType>
void CalibComputeInt64ToInt32<DLType>::Run() {
auto& param = this->template Param<operators::CalibParam>();
const auto* din = param.input->template data<int64_t>();
std::vector<float> scale = {param.scale};
auto* dout = param.output->template mutable_data<int32_t>();
for (auto i = 0; i < param.input->numel(); ++i) {
dout[i] = din[i];
}
}
template <DataLayoutType DLType>
void CalibComputeInt8ToFp32<DLType>::Run() {
auto& param = this->template Param<operators::CalibParam>();
......@@ -105,6 +116,23 @@ REGISTER_LITE_KERNEL(
DATALAYOUT(kNHWC))})
.Finalize();
REGISTER_LITE_KERNEL(
calib,
kARM,
kInt64,
kNCHW,
paddle::lite::kernels::arm::CalibComputeInt64ToInt32<DATALAYOUT(kNCHW)>,
int64_to_int32)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kInt64),
DATALAYOUT(kNCHW))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kInt32),
DATALAYOUT(kNCHW))})
.Finalize();
REGISTER_LITE_KERNEL(
calib_once,
kARM,
......@@ -161,3 +189,20 @@ REGISTER_LITE_KERNEL(
PRECISION(kFloat),
DATALAYOUT(kNHWC))})
.Finalize();
REGISTER_LITE_KERNEL(
calib_once,
kARM,
kInt64,
kNCHW,
paddle::lite::kernels::arm::CalibComputeInt64ToInt32<DATALAYOUT(kNCHW)>,
int64_to_int32)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kInt64),
DATALAYOUT(kNCHW))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kInt32),
DATALAYOUT(kNCHW))})
.Finalize();
......@@ -34,6 +34,19 @@ class CalibComputeFp32ToInt8
private:
};
template <DataLayoutType DLType>
class CalibComputeInt64ToInt32
: public KernelLite<TARGET(kARM), PRECISION(kInt64), DLType> {
public:
using param_t = operators::CalibParam;
void Run() override;
~CalibComputeInt64ToInt32() override{};
private:
};
template <DataLayoutType DLType>
class CalibComputeInt8ToFp32
: public KernelLite<TARGET(kARM), PRECISION(kInt8), DLType> {
......
......@@ -62,8 +62,19 @@ void CastCompute::Run() {
int32_t* out_data = param.Out->mutable_data<int32_t>();
std::transform(
x_data_begin, x_data_end, out_data, TransOp<int64_t, int32_t>);
} else if (param.in_dtype == 0 && param.out_dtype == 5) { // bool->fp32
const bool* x_data_begin = param.X->data<bool>();
const bool* x_data_end = x_data_begin + param.X->numel();
float* out_data = param.Out->mutable_data<float>();
std::transform(x_data_begin, x_data_end, out_data, TransOp<bool, float>);
} else if (param.in_dtype == 3 && param.out_dtype == 5) { // int64->fp32
const int64_t* x_data_begin = param.X->data<int64_t>();
const int64_t* x_data_end = x_data_begin + param.X->numel();
float* out_data = param.Out->mutable_data<float>();
std::transform(x_data_begin, x_data_end, out_data, TransOp<int64_t, float>);
} else {
LOG(FATAL) << "other has not been implemented";
LOG(FATAL) << "other has not been implemented transform with dtype"
<< param.in_dtype << " X, dtype" << param.out_dtype << " Out";
}
}
......
......@@ -300,11 +300,12 @@ void ElementwiseMaxActivationCompute::Run() {
}
}
void ElementwiseDivCompute::Run() {
auto& param = Param<operators::ElementwiseParam>();
const float* x_data = param.X->data<float>();
const float* y_data = param.Y->data<float>();
float* out_data = param.Out->mutable_data<float>();
template <typename T, PrecisionType PType>
void ElementwiseDivCompute<T, PType>::Run() {
auto& param = this->template Param<operators::ElementwiseParam>();
auto* x_data = param.X->template data<T>();
auto* y_data = param.Y->template data<T>();
auto* out_data = param.Out->template mutable_data<T>();
int axis = param.axis;
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
......@@ -313,10 +314,10 @@ void ElementwiseDivCompute::Run() {
LOG(FATAL) << "elewise div don't support x_dims size < y_dims size";
}
if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) {
lite::arm::math::elementwise_div_broadcast(
lite::arm::math::elementwise_div_broadcast<T>(
x_data, y_data, out_data, pre, n, post);
} else {
lite::arm::math::elementwise_div(
lite::arm::math::elementwise_div<T>(
x_data, y_data, out_data, x_dims.production());
}
}
......@@ -488,17 +489,27 @@ REGISTER_LITE_KERNEL(
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
REGISTER_LITE_KERNEL(elementwise_div,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::ElementwiseDivCompute,
def)
using elementwise_div_fp32 =
paddle::lite::kernels::arm::ElementwiseDivCompute<float, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(
elementwise_div, kARM, kFloat, kNCHW, elementwise_div_fp32, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
using elementwise_div_int64 =
paddle::lite::kernels::arm::ElementwiseDivCompute<int64_t,
PRECISION(kInt64)>;
REGISTER_LITE_KERNEL(
elementwise_div, kARM, kInt64, kNCHW, elementwise_div_int64, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.Finalize();
REGISTER_LITE_KERNEL(
fusion_elementwise_div_activation,
kARM,
......
......@@ -86,8 +86,8 @@ class ElementwiseMaxActivationCompute
virtual ~ElementwiseMaxActivationCompute() = default;
};
class ElementwiseDivCompute
: public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
template <typename T, PrecisionType PType>
class ElementwiseDivCompute : public KernelLite<TARGET(kARM), PType> {
public:
void Run() override;
......
......@@ -73,7 +73,6 @@ void GatherCompute::Run() {
REGISTER_LITE_KERNEL(
gather, kARM, kAny, kNCHW, paddle::lite::kernels::arm::GatherCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.BindInput("Index",
{LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("Index", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.Finalize();
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册