未验证 提交 e8344178 编写于 作者: Z zhupengyang 提交者: GitHub

get precision from real tensor or tensor_array of base_scope in arena/framwork (#3092)

* get the precision from the real tensor or tensor_array of base_scope in arena/framwork

* register assign and assign_value to kAny
上级 3efca965
develop _release/v2.6.2 github/fork/AnBaolei1984/baolei/bitmain github/fork/Cambricon/develop github/fork/DannyIsFunny/Android5 github/fork/DannyIsFunny/Update_MemOpt github/fork/DannyIsFunny/fix_pow github/fork/DannyIsFunny/fix_v26_windows github/fork/LDOUBLEV/ocr github/fork/Leonardo-Ding/dwh_dev github/fork/MaxwellDing/develop github/fork/MyPandaShaoxiang/int8 github/fork/PaddleLite-EB/merge1.4 github/fork/Wangzheee/matrix_nms_op github/fork/cathwong/patch-1 github/fork/cclauss/patch-1 github/fork/chenjiaoAngel/cherry_pic github/fork/chenjiaoAngel/conv_dw_5x5 github/fork/chenjiaoAngel/conv_dw_5x5s2 github/fork/edimetia3d/arm_update_elementwise_op github/fork/edimetia3d/host_deformable_conv github/fork/edimetia3d/matrix_nms_host github/fork/edimetia3d/update_pow_op github/fork/edimetia3d/update_yolo_box github/fork/haozech/develop github/fork/haozech/infershape_chz github/fork/haozech/parl-develop github/fork/jackzhang235/develop github/fork/jameswu2014/develop github/fork/jiansowa/jiansowa/img_nna github/fork/jiweibo/stream_manage github/fork/juncaipeng/add_cast github/fork/qili93/update_sup_model_v26 github/fork/qjing666/develop github/fork/sunsetlh/sunsetlh/xpu_multi_test github/fork/wangqunbaidu/develop github/fork/weihaoji/whj_27 github/fork/weihaoji/xpu_res2net_fusion github/fork/weihaoji/xpu_weihaoji_dev github/fork/xiebaiyuan/fix_leak_opencl github/fork/xiebaiyuan/opencl_depthwised1 github/fork/xiebaiyuan/opencl_softmax github/fork/yanghongtian/yanghongtian/add_ascend310_target_place github/fork/yiicy/computelib github/fork/yongqiangma/bm_card github/fork/yongqiangma/calib github/fork/yongqiangma/pass github/fork/yongqiangma/pool github/fork/yongqiangma/priorbox github/fork/yongqiangma/shape github/fork/yongqiangma/trans github/fork/yongqiangma/trans2 github/fork/yongqiangma/workspace github/fork/ysh329/add-cl-kernel-member-for-opencl github/fork/ysh329/cherry-pick-precision-profiler-enhance github/fork/ysh329/fix-opencl-concat github/fork/ysh329/support-int64-copy-from-to-cpu github/fork/zhaoyang-star/enable_prifile_in_tiny_publish github/fork/zhaoyang-star/fix_openc_demo github/fork/zhaoyang-star/patch-1 github/fork/zhupengyang/opt release/v2.6 release/v2.6.0 release/v2.7 revert-4368-hongming/test_v26 v2.7-beta v2.6.3-beta2 v2.6.3-beta1 v2.6.2 v2.6.1 v2.6.0
2 合并请求!3210[Opencl] fix opencl bug,!3154[arm]resize nnv12 bug
......@@ -123,6 +123,115 @@ void TestCase::PrepareInputsForInstruction() {
template <typename T>
bool TestCase::CheckTensorPrecision(const Tensor* a_tensor,
const Tensor* b_tensor,
float abs_error) {
CHECK(ShapeEquals(a_tensor->dims(), b_tensor->dims()));
CHECK(a_tensor->lod() == b_tensor->lod()) << "lod not match";
// The baseline should output in host devices.
CHECK(b_tensor->target() == TARGET(kHost) ||
b_tensor->target() == TARGET(kX86) ||
b_tensor->target() == TARGET(kARM));
const T* a_data{};
switch (a_tensor->target()) {
case TARGET(kX86):
case TARGET(kHost):
case TARGET(kARM):
a_data = static_cast<const T*>(a_tensor->raw_data());
// Before compare, need to copy data from `target` device to host.
LOG(FATAL) << "Not supported";
const T* b_data = static_cast<const T*>(b_tensor->raw_data());
bool success = true;
for (int i = 0; i < a_tensor->dims().production(); i++) {
EXPECT_NEAR(a_data[i], b_data[i], abs_error);
if (fabsf(a_data[i] - b_data[i]) > abs_error) {
success = false;
return success;
bool TestCase::CheckPrecision(const Tensor* a_tensor,
const Tensor* b_tensor,
float abs_error,
PrecisionType precision_type) {
PrecisionType precision_type_t = precision_type;
if (precision_type == PRECISION(kAny)) {
precision_type_t = b_tensor->precision();
CHECK(precision_type_t == b_tensor->precision())
<< "arg precision type and base tensor precision type are not matched! "
"arg precision type is: "
<< PrecisionToStr(precision_type) << ", base tensor precision type is: "
<< PrecisionToStr(b_tensor->precision());
CHECK(a_tensor->precision() == b_tensor->precision())
<< "real tensor precision type and base tensor precision type are not "
"matched! real tensor precision type is: "
<< PrecisionToStr(a_tensor->precision())
<< ", base tensor precision type is: "
<< PrecisionToStr(b_tensor->precision());
switch (precision_type_t) {
case PRECISION(kFloat):
return CheckTensorPrecision<float>(a_tensor, b_tensor, abs_error);
case PRECISION(kInt8):
return CheckTensorPrecision<int8_t>(a_tensor, b_tensor, abs_error);
case PRECISION(kInt32):
return CheckTensorPrecision<int32_t>(a_tensor, b_tensor, abs_error);
case PRECISION(kInt64):
return CheckTensorPrecision<int64_t>(a_tensor, b_tensor, abs_error);
case PRECISION(kBool):
return CheckTensorPrecision<bool>(a_tensor, b_tensor, abs_error);
LOG(FATAL) << "not support type: " << PrecisionToStr(precision_type);
return false;
bool TestCase::CheckPrecision(const std::string& var_name,
float abs_error,
PrecisionType precision_type) {
bool success = true;
if (inst_scope_->FindVar(var_name)->IsType<Tensor>()) {
auto a_tensor = inst_scope_->FindTensor(var_name);
auto b_tensor = base_scope_->FindTensor(var_name);
success = success &&
CheckPrecision(a_tensor, b_tensor, abs_error, precision_type);
} else if (inst_scope_->FindVar(var_name)->IsType<std::vector<Tensor>>()) {
auto a_tensor_array =
auto b_tensor_array =
CHECK_EQ(a_tensor_array->size(), b_tensor_array->size());
for (int i = 0; i < a_tensor_array->size(); i++) {
Tensor* a_tensor = &(a_tensor_array->at(i));
Tensor* b_tensor = &(b_tensor_array->at(i));
if (a_tensor->dims().size() == 0 && b_tensor->dims().size() == 0) {
success = success &&
CheckPrecision(a_tensor, b_tensor, abs_error, precision_type);
} else {
LOG(FATAL) << "unsupported var type";
return success;
TestCase::~TestCase() {
if (op_desc_->Type() == "subgraph") {
// Release the subblock desc of Subgraph op
......@@ -66,19 +66,24 @@ class TestCase {
/// output.
virtual void RunBaseline(Scope* scope) = 0;
// checkout the precision of the two tensors. b_tensor is from the baseline
// checkout the precision of the two tensors with type T. b_tensor is baseline
template <typename T>
bool CheckTensorPrecision(const Tensor* a_tensor,
const Tensor* b_tensor,
float abs_error);
// checkout the precision of the two tensors. b_tensor is baseline
bool CheckPrecision(const Tensor* a_tensor,
const Tensor* b_tensor,
float abs_error,
PrecisionType precision_type);
/// Check the precision of the output variables. It will compare the same
/// tensor
/// (or all tensors of tensor_array) in two scopes, one of the instruction
/// execution,
/// and the other for the baseline.
template <typename T>
bool CheckPrecision(const std::string& var_name, float abs_error);
/// tensor (or all tensors of the tensor_array) in two scopes, one of the
/// instruction execution, and the other for the baseline.
bool CheckPrecision(const std::string& var_name,
float abs_error,
PrecisionType precision_type);
const cpp::OpDesc& op_desc() { return *op_desc_; }
......@@ -86,20 +91,6 @@ class TestCase {
// kernel registry.
void CheckKernelConsistWithDefinition() {}
// Get the real precision of the output for check precision. When the declare
// precision obtained from the kernel is any, we should set the precision of
// the output in test case.
bool GetPrecisonType(const std::string& var_name,
PrecisionType* precision_type) {
auto res = precision_type_map_.find(var_name);
if (res == precision_type_map_.end()) {
return false;
} else {
*precision_type = precision_type_map_.at(var_name);
return true;
Scope& scope() { return *scope_; }
Scope* baseline_scope() { return base_scope_; }
......@@ -159,19 +150,6 @@ class TestCase {
// Prepare for the operator.
virtual void PrepareOpDesc(cpp::OpDesc* op_desc) = 0;
// Set the real precision of the output for check precision. When the declare
// precision obtained from the kernel is any, we should set the precision of
// the output in test case.
void SetPrecisionType(const std::string& var_name,
const PrecisionType& precision_type) {
auto res = precision_type_map_.find(var_name);
if (res == precision_type_map_.end()) {
precision_type_map_.insert({var_name, precision_type});
} else {
precision_type_map_.at(var_name) = precision_type;
const Instruction& instruction() { return *instruction_; }
......@@ -215,7 +193,6 @@ class TestCase {
Scope* base_scope_{};
std::unique_ptr<cpp::OpDesc> op_desc_;
std::unique_ptr<Instruction> instruction_;
std::unordered_map<std::string, PrecisionType> precision_type_map_;
class Arena {
......@@ -272,24 +249,7 @@ class Arena {
const Type* type =
auto precision_type = type->precision();
if (precision_type == PRECISION(kAny)) {
CHECK(tester_->GetPrecisonType(var_name, &precision_type));
switch (precision_type) {
case PRECISION(kFloat):
return tester_->CheckPrecision<float>(var_name, abs_error_);
case PRECISION(kInt8):
return tester_->CheckPrecision<int8_t>(var_name, abs_error_);
case PRECISION(kInt32):
return tester_->CheckPrecision<int32_t>(var_name, abs_error_);
case PRECISION(kInt64):
return tester_->CheckPrecision<int64_t>(var_name, abs_error_);
case PRECISION(kBool):
return tester_->CheckPrecision<bool>(var_name, abs_error_);
LOG(FATAL) << "not support type " << PrecisionToStr(type->precision());
return false;
return tester_->CheckPrecision(var_name, abs_error_, precision_type);
......@@ -298,77 +258,6 @@ class Arena {
float abs_error_;
template <typename T>
bool TestCase::CheckTensorPrecision(const Tensor* a_tensor,
const Tensor* b_tensor,
float abs_error) {
CHECK(ShapeEquals(a_tensor->dims(), b_tensor->dims()));
CHECK(a_tensor->lod() == b_tensor->lod()) << "lod not match";
// The baseline should output in host devices.
CHECK(b_tensor->target() == TARGET(kHost) ||
b_tensor->target() == TARGET(kX86) ||
b_tensor->target() == TARGET(kARM));
const T* a_data{};
switch (a_tensor->target()) {
case TARGET(kX86):
case TARGET(kHost):
case TARGET(kARM):
a_data = static_cast<const T*>(a_tensor->raw_data());
// Before compare, need to copy data from `target` device to host.
LOG(FATAL) << "Not supported";
const T* b_data = static_cast<const T*>(b_tensor->raw_data());
bool success = true;
for (int i = 0; i < a_tensor->dims().production(); i++) {
EXPECT_NEAR(a_data[i], b_data[i], abs_error);
if (fabsf(a_data[i] - b_data[i]) > abs_error) {
success = false;
return success;
template <typename T>
bool TestCase::CheckPrecision(const std::string& var_name, float abs_error) {
bool success = true;
if (inst_scope_->FindVar(var_name)->IsType<Tensor>()) {
auto a_tensor = inst_scope_->FindTensor(var_name);
auto b_tensor = base_scope_->FindTensor(var_name);
success = success && CheckTensorPrecision<T>(a_tensor, b_tensor, abs_error);
} else if (inst_scope_->FindVar(var_name)->IsType<std::vector<Tensor>>()) {
auto a_tensor_array =
auto b_tensor_array =
CHECK_EQ(a_tensor_array->size(), b_tensor_array->size());
for (int i = 0; i < a_tensor_array->size(); i++) {
Tensor* a_tensor = &(a_tensor_array->at(i));
Tensor* b_tensor = &(b_tensor_array->at(i));
if (a_tensor->dims().size() == 0 && b_tensor->dims().size() == 0) {
success =
success && CheckTensorPrecision<T>(a_tensor, b_tensor, abs_error);
} else {
LOG(FATAL) << "unsupported var type";
return success;
} // namespace arena
} // namespace lite
} // namespace paddle
......@@ -23,16 +23,9 @@ namespace lite {
namespace kernels {
namespace arm {
void AssignCompute::PrepareForRun() {
// CHECK_OR_FALSE(param_t.Out);
void AssignCompute::Run() {
// LOG(INFO) << "into kernel compute run";
auto& param = Param<param_t>();
const lite::Tensor* input = param.X;
lite::Tensor* output = param.Out;
} // namespace arm
......@@ -41,7 +34,7 @@ void AssignCompute::Run() {
} // namespace paddle
assign, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::AssignCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
assign, kARM, kAny, kNCHW, paddle::lite::kernels::arm::AssignCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
......@@ -22,10 +22,10 @@ namespace lite {
namespace kernels {
namespace arm {
class AssignCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
class AssignCompute : public KernelLite<TARGET(kARM), PRECISION(kAny)> {
using param_t = operators::AssignParam;
void PrepareForRun() override;
void Run() override;
virtual ~AssignCompute() = default;
......@@ -58,9 +58,9 @@ void AssignValueCompute::Run() {
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
......@@ -22,7 +22,7 @@ namespace lite {
namespace kernels {
namespace arm {
class AssignValueCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
class AssignValueCompute : public KernelLite<TARGET(kARM), PRECISION(kAny)> {
using param_t = operators::AssignValueParam;
......@@ -67,13 +67,14 @@ void TestAssign(const Place& place) {
TEST(Assign, precision) {
#ifdef LITE_WITH_X86
Place place(TARGET(kX86));
Place place;
Place place(TARGET(kARM));
place = {TARGET(kARM), PRECISION(kAny)};
} // namespace lite
......@@ -95,10 +95,12 @@ class AssignValueComputeTester : public arena::TestCase {
TEST(AssignValue, precision) {
LOG(INFO) << "test argmax op";
Place place;
LOG(INFO) << "test argmax arm";
Place place(TARGET(kARM));
place = {TARGET(kARM), PRECISION(kAny)};
for (int dtype : {2, 5}) {
for (int n : {1}) {
......@@ -114,7 +116,6 @@ TEST(AssignValue, precision) {
} // namespace lite
......@@ -119,35 +119,6 @@ class CastComputeTester : public arena::TestCase {
LOG(FATAL) << "unsupported data type: " << in_dtype_;
PrecisionType out_ptype;
switch (out_dtype_) {
case 0:
out_ptype = PRECISION(kBool);
case 21:
out_ptype = PRECISION(kInt8);
case 1:
out_ptype = PRECISION(kInt16);
case 2:
out_ptype = PRECISION(kInt32);
case 3:
out_ptype = PRECISION(kInt64);
case 4:
out_ptype = PRECISION(kFP16);
case 5:
out_ptype = PRECISION(kFloat);
LOG(FATAL) << "unsupported data type: " << out_dtype_;
SetPrecisionType(out_, out_ptype);
......@@ -86,8 +86,6 @@ class FillConstantBatchSizeLikeComputeTester : public arena::TestCase {
std::vector<float> din(in_dims_.production());
fill_data_rand(din.data(), -1.f, 1.f, in_dims_.production());
SetCommonTensor(input_, in_dims_, din.data(), in_lod_);
SetPrecisionType(out_, PRECISION(kFloat));
......@@ -109,7 +109,6 @@ class FillConstantComputeTester : public arena::TestCase {
SetCommonTensor(shape_tensor_list_[i], DDim({1}), dshape_tensor.data());
SetPrecisionType(out_, PRECISION(kFloat));
......@@ -68,8 +68,6 @@ class ReadFromArrayComputeTester : public arena::TestCase {
std::vector<int64_t> didn(1);
didn[0] = id_;
SetCommonTensor(idn_, DDim{{1}}, didn.data());
SetPrecisionType(out_, PRECISION(kFloat));
......@@ -107,7 +107,6 @@ class UnsqueezeComputeTester : public arena::TestCase {
void PrepareData() override {
SetPrecisionType(out_, PRECISION(kFloat));
std::vector<float> in_data(dims_.production());
for (int i = 0; i < dims_.production(); ++i) {
in_data[i] = i;
......@@ -214,7 +213,6 @@ class Unsqueeze2ComputeTester : public arena::TestCase {
void PrepareData() override {
SetPrecisionType(out_, PRECISION(kFloat));
std::vector<float> in_data(dims_.production());
for (int i = 0; i < dims_.production(); ++i) {
in_data[i] = i;
......@@ -66,8 +66,6 @@ class WriteToArrayComputeTester : public arena::TestCase {
std::vector<int64_t> didn(1);
didn[0] = id_;
SetCommonTensor(idn_, DDim{{1}}, didn.data());
SetPrecisionType(out_, PRECISION(kFloat));
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册
客服 返回