未验证 提交 b87299fe 编写于 作者: Z zhupengyang 提交者: GitHub

scale support int32 input (#3065)

* scale support int32 input
上级 51e14609
...@@ -58,6 +58,43 @@ void scale<float>( ...@@ -58,6 +58,43 @@ void scale<float>(
} }
} }
template <>
void scale<int>(const int* din, int* dout, int num, int scale, int bias) {
int cnt = num >> 4;
int remain = num % 16;
int32x4_t vscale = vdupq_n_s32(scale);
int32x4_t vbias = vdupq_n_s32(bias);
#pragma omp parallel for
for (int i = 0; i < cnt; i++) {
const int* din_ptr = din + (i << 4);
int* dout_ptr = dout + (i << 4);
int32x4_t din0 = vld1q_s32(din_ptr);
int32x4_t din1 = vld1q_s32(din_ptr + 4);
int32x4_t din2 = vld1q_s32(din_ptr + 8);
int32x4_t din3 = vld1q_s32(din_ptr + 12);
int32x4_t vsum1 = vmlaq_s32(vbias, din0, vscale);
int32x4_t vsum2 = vmlaq_s32(vbias, din1, vscale);
int32x4_t vsum3 = vmlaq_s32(vbias, din2, vscale);
int32x4_t vsum4 = vmlaq_s32(vbias, din3, vscale);
vst1q_s32(dout_ptr, vsum1);
vst1q_s32(dout_ptr + 4, vsum2);
vst1q_s32(dout_ptr + 8, vsum3);
vst1q_s32(dout_ptr + 12, vsum4);
}
if (remain > 0) {
const int* din_ptr = din + (cnt << 4);
int* dout_ptr = dout + (cnt << 4);
for (int i = 0; i < remain; i++) {
*dout_ptr = *din_ptr * scale + bias;
dout_ptr++;
din_ptr++;
}
}
}
template <> template <>
void scale<float>(const float* din, void scale<float>(const float* din,
float* dout, float* dout,
......
...@@ -20,7 +20,7 @@ namespace arm { ...@@ -20,7 +20,7 @@ namespace arm {
namespace math { namespace math {
template <typename T> template <typename T>
void scale(const T* din, T* dout, int num, float scale, float bias); void scale(const T* din, T* dout, int num, T scale, T bias);
template <typename T> template <typename T>
void scale(const T* din, void scale(const T* din,
......
...@@ -20,18 +20,18 @@ namespace lite { ...@@ -20,18 +20,18 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
void ScaleCompute::Run() { template <typename T, PrecisionType PType>
auto& param = Param<operators::ScaleParam>(); void ScaleCompute<T, PType>::Run() {
const float* x_data = param.x->data<float>(); auto& param = this->template Param<operators::ScaleParam>();
float* output_data = param.output->mutable_data<float>(); int num = param.x->numel();
DDim x_dims = param.x->dims(); const T* x_data = param.x->template data<T>();
bool bias_after_scale = param.bias_after_scale; T* output_data = param.output->template mutable_data<T>();
float scale = param.scale; T scale = static_cast<T>(param.scale);
float bias = param.bias; T bias = static_cast<T>(param.bias);
if (!bias_after_scale) { if (!param.bias_after_scale) {
bias *= scale; bias *= scale;
} }
lite::arm::math::scale(x_data, output_data, x_dims.production(), scale, bias); lite::arm::math::scale<T>(x_data, output_data, num, scale, bias);
if (!param.x->lod().empty()) { if (!param.x->lod().empty()) {
param.output->set_lod(param.x->lod()); param.output->set_lod(param.x->lod());
} }
...@@ -42,8 +42,16 @@ void ScaleCompute::Run() { ...@@ -42,8 +42,16 @@ void ScaleCompute::Run() {
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
REGISTER_LITE_KERNEL( using scale_float =
scale, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ScaleCompute, def) paddle::lite::kernels::arm::ScaleCompute<float, PRECISION(kFloat)>;
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) REGISTER_LITE_KERNEL(scale, kARM, kFloat, kNCHW, scale_float, def)
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
.Finalize();
using scale_int32 =
paddle::lite::kernels::arm::ScaleCompute<int, PRECISION(kInt32)>;
REGISTER_LITE_KERNEL(scale, kARM, kInt32, kNCHW, scale_int32, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.Finalize(); .Finalize();
...@@ -21,7 +21,8 @@ namespace lite { ...@@ -21,7 +21,8 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
class ScaleCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> { template <typename T, PrecisionType PType>
class ScaleCompute : public KernelLite<TARGET(kARM), PType> {
public: public:
void Run() override; void Run() override;
......
...@@ -41,13 +41,13 @@ void scale_compute_ref(const operators::ScaleParam& param) { ...@@ -41,13 +41,13 @@ void scale_compute_ref(const operators::ScaleParam& param) {
} }
TEST(scale_arm, init) { TEST(scale_arm, init) {
ScaleCompute scale; ScaleCompute<float, PRECISION(kFloat)> scale;
ASSERT_EQ(scale.precision(), PRECISION(kFloat)); ASSERT_EQ(scale.precision(), PRECISION(kFloat));
ASSERT_EQ(scale.target(), TARGET(kARM)); ASSERT_EQ(scale.target(), TARGET(kARM));
} }
TEST(scale_arm, compute) { TEST(scale_arm, compute) {
ScaleCompute scale; ScaleCompute<float, PRECISION(kFloat)> scale;
operators::ScaleParam param; operators::ScaleParam param;
lite::Tensor x; lite::Tensor x;
......
...@@ -29,7 +29,8 @@ class ScaleComputeTester : public arena::TestCase { ...@@ -29,7 +29,8 @@ class ScaleComputeTester : public arena::TestCase {
DDim x_dims_{{100, 20}}; DDim x_dims_{{100, 20}};
float scale_ = 0.; float scale_ = 0.;
float bias_ = 0.; float bias_ = 0.;
bool bias_after_scale_; bool bias_after_scale_ = true;
PrecisionType x_dtype_ = PRECISION(kFloat);
public: public:
ScaleComputeTester(const Place& place, ScaleComputeTester(const Place& place,
...@@ -37,30 +38,45 @@ class ScaleComputeTester : public arena::TestCase { ...@@ -37,30 +38,45 @@ class ScaleComputeTester : public arena::TestCase {
const DDim& x_dims, const DDim& x_dims,
float scale, float scale,
float bias, float bias,
bool bias_after_scale) bool bias_after_scale = true,
PrecisionType x_dtype = PRECISION(kFloat))
: TestCase(place, alias), : TestCase(place, alias),
x_dims_(x_dims), x_dims_(x_dims),
scale_(scale), scale_(scale),
bias_(bias), bias_(bias),
bias_after_scale_(bias_after_scale) {} bias_after_scale_(bias_after_scale),
x_dtype_(x_dtype) {}
void RunBaseline(Scope* scope) override { template <typename T>
void RunBaselineHelper(Scope* scope) {
auto* x = scope->FindTensor(x_);
auto* x_data = x->data<T>();
auto* out = scope->NewTensor(out_); auto* out = scope->NewTensor(out_);
CHECK(out);
out->Resize(x_dims_); out->Resize(x_dims_);
auto* out_data = out->mutable_data<float>();
auto* x = scope->FindTensor(x_);
const auto* x_data = x->data<float>();
float bias = bias_;
T scale = static_cast<T>(scale_);
T bias = static_cast<T>(bias_);
if (!bias_after_scale_) { if (!bias_after_scale_) {
bias *= scale_; bias *= scale;
} }
auto out_data = out->mutable_data<T>();
for (int i = 0; i < x_dims_.production(); i++) { for (int i = 0; i < x_dims_.production(); i++) {
out_data[i] = x_data[i] * scale_ + bias; out_data[i] = x_data[i] * scale + bias;
}
}
void RunBaseline(Scope* scope) override {
switch (x_dtype_) {
case PRECISION(kFloat):
RunBaselineHelper<float>(scope);
break;
case PRECISION(kInt32):
RunBaselineHelper<int>(scope);
break;
default:
LOG(FATAL) << "unsupported data type: " << PrecisionToStr(x_dtype_);
break;
} }
} }
...@@ -73,13 +89,74 @@ class ScaleComputeTester : public arena::TestCase { ...@@ -73,13 +89,74 @@ class ScaleComputeTester : public arena::TestCase {
op_desc->SetAttr("bias_after_scale", bias_after_scale_); op_desc->SetAttr("bias_after_scale", bias_after_scale_);
} }
template <typename T>
void PrepareDataHelper() {
std::vector<T> dx(x_dims_.production());
fill_data_rand<T>(dx.data(), -10, 10, x_dims_.production());
SetCommonTensor(x_, x_dims_, dx.data());
}
void PrepareData() override { void PrepareData() override {
std::vector<float> x(x_dims_.production()); switch (x_dtype_) {
fill_data_rand(x.data(), -1.f, 1.f, x_dims_.production()); case PRECISION(kFloat):
SetCommonTensor(x_, x_dims_, x.data()); PrepareDataHelper<float>();
break;
case PRECISION(kInt32):
PrepareDataHelper<int>();
break;
default:
LOG(FATAL) << "unsupported data type: " << PrecisionToStr(x_dtype_);
break;
}
} }
}; };
void TestScaleShape(Place place, float abs_error) {
for (auto x_dims :
std::vector<std::vector<int64_t>>{{5, 2, 3, 4}, {8, 3, 5}, {12, 3}}) {
std::unique_ptr<arena::TestCase> tester(
new ScaleComputeTester(place, "def", DDim(x_dims), 1.5f, 0.2f));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
void TestScaleValue(Place place, float abs_error) {
for (float scale : {0.123, 0., -1.2}) {
for (float bias : {1., 0., -1.2331}) {
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester(
place, "def", DDim({5, 2, 3, 4}), scale, bias));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
void TestScaleOrder(Place place, float abs_error) {
for (bool bias_after_scale : {true, false}) {
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester(
place, "def", DDim({2, 3, 4, 5}), 1.5f, 0.2f, bias_after_scale));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
void TestScaleDtype(Place place, float abs_error) {
for (PrecisionType x_dtype : {PRECISION(kFloat), PRECISION(kInt32)}) {
if (x_dtype == PRECISION(kFloat)) {
place.precision = PRECISION(kFloat);
} else if (x_dtype == PRECISION(kInt32)) {
place.precision = PRECISION(kInt32);
} else {
LOG(FATAL) << "fatal";
}
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester(
place, "def", DDim({2, 3, 4, 5}), 2.f, 1.f, true, x_dtype));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
TEST(Scale, precision) { TEST(Scale, precision) {
Place place; Place place;
float abs_error = 2e-5; float abs_error = 2e-5;
...@@ -97,19 +174,12 @@ TEST(Scale, precision) { ...@@ -97,19 +174,12 @@ TEST(Scale, precision) {
return; return;
#endif #endif
for (auto x_dims : TestScaleShape(place, abs_error);
std::vector<std::vector<int64_t>>{{5, 2, 3, 4}, {8, 3, 5}, {12, 3}}) { TestScaleValue(place, abs_error);
for (float scale : {0.123, 2., -1.2}) { TestScaleOrder(place, abs_error);
for (float bias : {1., 0., -1.2331}) { #ifdef LITE_WITH_ARM
for (bool bias_after_scale : {true, false}) { TestScaleDtype(place, abs_error);
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester( #endif
place, "def", DDim(x_dims), scale, bias, bias_after_scale));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
} }
TEST(Scale, performance) { TEST(Scale, performance) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册