提交 dfbc4b50 编写于 作者: S shixiaowei02

Merge branch 'shixiaowei02/calib' into 'incubate/lite'

add calib kernel

See merge request inference/paddlelite!34
...@@ -16,7 +16,7 @@ cc_library(math_arm SRCS ...@@ -16,7 +16,7 @@ cc_library(math_arm SRCS
elementwise.cc elementwise.cc
concat.cc concat.cc
sgemv.cc sgemv.cc
type_trans.cpp type_trans.cc
conv_impl.cc conv_impl.cc
conv_direct_3x3s1.cc conv_direct_3x3s1.cc
conv_direct_3x3s2.cc conv_direct_3x3s2.cc
......
此差异已折叠。
...@@ -96,6 +96,7 @@ KernelRegistry::KernelRegistry() ...@@ -96,6 +96,7 @@ KernelRegistry::KernelRegistry()
INIT_FOR(kX86, kAny, kAny); INIT_FOR(kX86, kAny, kAny);
INIT_FOR(kARM, kFloat, kNCHW); INIT_FOR(kARM, kFloat, kNCHW);
INIT_FOR(kARM, kInt8, kNCHW);
INIT_FOR(kARM, kAny, kNCHW); INIT_FOR(kARM, kAny, kNCHW);
INIT_FOR(kARM, kAny, kAny); INIT_FOR(kARM, kAny, kAny);
#undef INIT_FOR #undef INIT_FOR
......
...@@ -16,6 +16,7 @@ cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_a ...@@ -16,6 +16,7 @@ cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_a
cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(concat_compute_arm SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(concat_compute_arm SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(dropout_compute_arm SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(dropout_compute_arm SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(calib_compute_arm SRCS calib_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(transpose_compute_arm SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(transpose_compute_arm SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm) lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm)
...@@ -30,6 +31,7 @@ lite_cc_test(test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm) ...@@ -30,6 +31,7 @@ lite_cc_test(test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm)
lite_cc_test(test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm) lite_cc_test(test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm)
lite_cc_test(test_concat_compute_arm SRCS concat_compute_test.cc DEPS concat_compute_arm) lite_cc_test(test_concat_compute_arm SRCS concat_compute_test.cc DEPS concat_compute_arm)
lite_cc_test(test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm) lite_cc_test(test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm)
lite_cc_test(test_calib_compute_arm SRCS calib_compute_test.cc DEPS calib_compute_arm)
lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm) lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm)
set(arm_kernels set(arm_kernels
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/calib_compute.h"
#include <vector>
#include "paddle/fluid/lite/arm/math/type_trans.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
void CalibCompute::Run() {
auto& param = this->Param<operators::CalibParam>();
std::vector<float> scale = {param.in_scale};
if (param.in_dtype == PRECISION(kFloat) &&
param.out_dtype == PRECISION(kInt8)) {
const auto* din = param.input->data<float>();
auto* dout = param.output->mutable_data<signed char>();
lite::arm::math::fp32_to_int8(din, dout, scale.data(), 1, 1,
param.input->numel());
return;
}
if (param.in_dtype == PRECISION(kInt8) &&
param.out_dtype == PRECISION(kFloat)) {
const auto* din = param.input->data<signed char>();
auto* dout = param.output->mutable_data<float>();
lite::arm::math::int8_to_fp32(din, dout, scale.data(), 1, 1,
param.input->numel());
return;
}
LOG(FATAL) << "Unsupport Dtype.";
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(calib, kARM, kInt8, kNCHW,
paddle::lite::kernels::arm::CalibCompute, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/operators/calib_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class CalibCompute : public KernelLite<TARGET(kARM), PRECISION(kInt8)> {
public:
using param_t = operators::CalibParam;
void Run() override;
~CalibCompute() override{};
private:
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/calib_compute.h"
#include <gtest/gtest.h>
#include <algorithm>
#include <iostream>
#include <memory>
#include <random>
#include <utility>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
static int get_rand(int start, int end) {
int i = rand(); // NOLINT
i = (i % (end - start)) + start;
return i;
}
static void int8_to_fp32_basic(const int8_t* din, float* dout,
const float* scale, int axis_size,
int64_t outer_size, int64_t inner_size) {
int loop_size = axis_size * outer_size;
for (int i = 0; i < loop_size; ++i) {
float scale_in = scale[i % axis_size];
for (int j = 0; j < inner_size; ++j) {
dout[j] = din[j] * scale_in;
}
dout += inner_size;
din += inner_size;
}
}
static void fp32_to_int8_basic(const float* din, int8_t* dout,
const float* scale, int axis_size,
int64_t outer_size, int64_t inner_size) {
int loop_size = axis_size * outer_size;
for (int i = 0; i < loop_size; ++i) {
float inv_scale = 1.f / scale[i % axis_size];
for (int j = 0; j < inner_size; ++j) {
dout[j] = static_cast<int8_t>(roundf(din[j] * inv_scale));
}
dout += inner_size;
din += inner_size;
}
}
void calib_ref(const operators::CalibParam& param) {
std::vector<float> scale = {param.in_scale};
if (param.in_dtype == PRECISION(kFloat) &&
param.out_dtype == PRECISION(kInt8)) {
const auto* din = param.input->data<float>();
auto* dout = param.output->mutable_data<signed char>();
fp32_to_int8_basic(din, dout, scale.data(), 1, 1, param.input->numel());
return;
}
if (param.in_dtype == PRECISION(kInt8) &&
param.out_dtype == PRECISION(kFloat)) {
const auto* din = param.input->data<signed char>();
auto* dout = param.output->mutable_data<float>();
int8_to_fp32_basic(din, dout, scale.data(), 1, 1, param.input->numel());
return;
}
LOG(FATAL) << "Unsupport Dtype.";
}
TEST(calib_arm, retrive_op) {
auto calib =
KernelRegistry::Global()
.Create<TARGET(kARM), PRECISION(kInt8), DATALAYOUT(kNCHW)>("calib");
ASSERT_FALSE(calib.empty());
ASSERT_TRUE(calib.front());
}
TEST(calib_arm, init) {
CalibCompute calib;
ASSERT_EQ(calib.precision(), PRECISION(kInt8));
ASSERT_EQ(calib.target(), TARGET(kARM));
}
TEST(calib_arm, int8_to_fp32) {
DeviceInfo::Init();
for (auto n : {1, 2}) {
for (auto c : {6, 32 /*, 128*/}) {
for (auto h : {9, 18 /*, 56 , 112, 224, 512*/}) {
for (auto w : {9, 18 /*, 56, 112, 224, 512*/}) {
Tensor x;
Tensor output;
Tensor output_ref;
// set the dims of input, output, ref output tensors
x.Resize({n, c, h, w});
output.Resize({n, c, h, w});
output_ref.Resize({n, c, h, w});
// initialize the data of input tensors
auto* x_data = x.mutable_data<char>();
auto* output_data = output.mutable_data<float>();
for (int i = 0; i < x.dims().production(); i++) {
float sign = i % 3 == 0 ? -1.0f : 1.0f;
x_data[i] = sign * static_cast<float>(i % 128) * 0.013f;
}
// prepare kernel params and run
CalibCompute calib;
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<ARMContext>();
calib.SetContext(std::move(ctx));
operators::CalibParam param;
param.in_scale = get_rand(0, 100) * 0.1f;
param.in_dtype = PRECISION(kInt8);
param.out_dtype = PRECISION(kFloat);
param.input = &x;
param.output = &output;
calib.SetParam(param);
calib.Launch();
// invoking ref implementation and compare results
param.output = &output_ref;
calib_ref(param);
auto* output_ref_data = output_ref.mutable_data<float>();
for (int i = 0; i < output.dims().production(); i++) {
EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5);
}
}
}
}
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(calib, kARM, kInt8, kNCHW, def);
...@@ -21,6 +21,7 @@ cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS}) ...@@ -21,6 +21,7 @@ cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS})
cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite) cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite)
cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS}) cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS})
cc_library(concat_op_lite SRCS concat_op.cc DEPS ${op_DEPS}) cc_library(concat_op_lite SRCS concat_op.cc DEPS ${op_DEPS})
cc_library(calib_op_lite SRCS calib_op.cc DEPS ${op_DEPS})
cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS}) cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS})
cc_library(transpose_op_lite SRCS transpose_op.cc DEPS ${op_DEPS}) cc_library(transpose_op_lite SRCS transpose_op.cc DEPS ${op_DEPS})
cc_library(fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS}) cc_library(fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS})
...@@ -46,6 +47,7 @@ set(ops_lite ...@@ -46,6 +47,7 @@ set(ops_lite
activation_ops_lite activation_ops_lite
dropout_op_lite dropout_op_lite
concat_op_lite concat_op_lite
calib_op_lite
split_op_lite split_op_lite
transpose_op_lite transpose_op_lite
fake_quant fake_quant
...@@ -64,6 +66,7 @@ lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite m ...@@ -64,6 +66,7 @@ lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite m
lite_cc_test(test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite) lite_cc_test(test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite)
lite_cc_test(test_batch_norm_op_lite SRCS batch_norm_op_test.cc DEPS batch_norm_op_lite memory_lite) lite_cc_test(test_batch_norm_op_lite SRCS batch_norm_op_test.cc DEPS batch_norm_op_lite memory_lite)
lite_cc_test(test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite) lite_cc_test(test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite)
lite_cc_test(test_calib_op_lite SRCS calib_op_test.cc DEPS calib_op_lite memory_lite ARM_DEPS calib_compute_arm)
lite_cc_test(test_fusion_elementwise_activation_ops_lite lite_cc_test(test_fusion_elementwise_activation_ops_lite
SRCS fusion_elementwise_activation_ops_test.cc SRCS fusion_elementwise_activation_ops_test.cc
DEPS fusion_elementwise_activation_ops_lite memory_lite) DEPS fusion_elementwise_activation_ops_lite memory_lite)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/calib_op.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
bool CalibOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.input);
CHECK_OR_FALSE(param_.output);
return true;
}
bool CalibOpLite::InferShape() const {
param_.output->Resize(param_.input->dims());
return true;
}
bool CalibOpLite::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
auto x_var = scope->FindVar(opdesc.Input("Input").front());
auto output_var = scope->FindVar(opdesc.Output("Out").front());
CHECK(x_var);
CHECK(output_var);
param_.input = const_cast<lite::Tensor *>(&(x_var->Get<lite::Tensor>()));
param_.output = output_var->GetMutable<lite::Tensor>();
std::vector<std::string> input_arg_names = opdesc.InputArgumentNames();
param_.in_dtype =
static_cast<lite::PrecisionType>(opdesc.GetAttr<int>("in_dtype"));
param_.out_dtype =
static_cast<lite::PrecisionType>(opdesc.GetAttr<int>("out_dtype"));
if (opdesc.HasAttr("in_scale")) {
param_.in_scale = opdesc.GetAttr<float>("in_scale");
}
CHECK(param_.input) << "Input(X) of CalibOp should not be null.";
CHECK(param_.output) << "Output(Out) of CalibOp should not be null.";
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(calib, paddle::lite::operators::CalibOpLite);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/scope.h"
#include "paddle/fluid/lite/operators/op_params.h"
#include "paddle/fluid/lite/utils/all.h"
namespace paddle {
namespace lite {
namespace operators {
/*
* The data types used by the two adjacent layers in the model should
* be the same. When the two operators accept different data types,
* we may need to implicitly add a data type conversion operator.
* Currently, this operator only supports mutual conversion of int8
* and float32 types.
*/
class CalibOpLite : public OpLite {
public:
CalibOpLite() {}
explicit CalibOpLite(const std::string &type) : OpLite(type) {}
bool CheckShape() const override;
bool InferShape() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope);
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "calib"; }
private:
mutable CalibParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/calib_op.h"
#include <gtest/gtest.h>
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
#ifdef LITE_WITH_ARM
TEST(calib_op_lite, TestARM) {
// prepare variables
Scope scope;
auto* x = scope.Var("Input")->GetMutable<Tensor>();
auto* output = scope.Var("output")->GetMutable<Tensor>();
x->Resize(DDim(std::vector<int64_t>({1, 10, 20})));
output->Resize(DDim(std::vector<int64_t>{1, 10, 20}));
// set data
for (int i = 0; i < 10 * 20; i++) {
x->mutable_data<float>()[i] = i;
}
for (int i = 0; i < 10 * 20; i++) {
output->mutable_data<float>()[i] = 0.;
}
// prepare op desc
cpp::OpDesc desc;
desc.SetType("calib");
desc.SetInput("Input", {"Input"});
desc.SetOutput("Out", {"output"});
desc.SetAttr("in_dtype", static_cast<int>(PRECISION(kInt8)));
desc.SetAttr("out_dtype", static_cast<int>(PRECISION(kFloat)));
desc.SetAttr("in_scale", 10.0f);
CalibOpLite calib("calib");
calib.SetValidPlaces({Place{TARGET(kARM), PRECISION(kInt8)}});
calib.Attach(desc, &scope);
auto kernels = calib.CreateKernels({Place{TARGET(kARM), PRECISION(kInt8)}});
ASSERT_FALSE(kernels.empty());
}
#endif
} // namespace operators
} // namespace lite
} // namespace paddle
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(calib, kARM, kInt8, kNCHW, def);
#endif
...@@ -48,6 +48,14 @@ struct IoCopyParam { ...@@ -48,6 +48,14 @@ struct IoCopyParam {
lite::Tensor* y{}; lite::Tensor* y{};
}; };
struct CalibParam {
const lite::Tensor* input{};
lite::Tensor* output{};
float in_scale;
PrecisionType in_dtype;
PrecisionType out_dtype;
};
/// -------------------------- NN operators ------------------------------------ /// -------------------------- NN operators ------------------------------------
struct FcParam { struct FcParam {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册