diff --git a/lite/core/arena/framework.cc b/lite/core/arena/framework.cc index 214a63457c06cb5c11e9fc229e91cf66e091da39..1138a3bcc2e3e3f3c77d94bf8128b8231f930550 100644 --- a/lite/core/arena/framework.cc +++ b/lite/core/arena/framework.cc @@ -55,7 +55,7 @@ void TestCase::CreateInstruction() { op = LiteOpRegistry::Global().Create(op_desc().Type()); } CHECK(op) << "no op for " << op_desc().Type(); - op->Attach(*op_desc_, inst_scope_); + op->Attach(*op_desc_, inst_scope_.get()); auto kernels = op->CreateKernels({place_}); // filter out the target kernel CHECK(!kernels.empty()) << "No kernel found for place " @@ -80,54 +80,35 @@ void TestCase::CreateInstruction() { void TestCase::PrepareInputsForInstruction() { for (auto& arg : op_desc().InputArgumentNames()) { for (auto& var : op_desc().Input(arg)) { - std::string kernel_key = instruction_->kernel()->key_with_alias(); - const auto* param_type = ParamTypeRegistry::Global().RetrieveInArgument( - place_, kernel_key, arg); - - const Type* inst_type = nullptr; - if (param_type->type->IsTensor()) { - inst_type = Type::GetTensorTy(TARGET(kHost)); - } else if (param_type->type->IsTensorList()) { - inst_type = Type::GetTensorListTy(TARGET(kHost)); - } else { - LOG(FATAL) << "unsupported param_type"; - } - - CHECK(scope_->FindVar(var)); - if (!TargetCompatibleTo(*inst_type, *param_type->type)) { - /// Create a tensor or tensor_array in the instruction's scope, - /// alloc memory and then copy data there. - if (param_type->type->IsTensor()) { - const auto* shared_tensor = scope_->FindTensor(var); - auto* target_tensor = - inst_scope_->LocalVar(var)->GetMutable(); - CHECK(!shared_tensor->dims().empty()) << "shared_tensor is empty yet"; - target_tensor->Resize(shared_tensor->dims()); - TargetCopy(param_type->type->target(), - target_tensor->mutable_data(param_type->type->target(), - shared_tensor->memory_size()), - shared_tensor->raw_data(), - shared_tensor->memory_size()); - } else if (param_type->type->IsTensorList()) { - const auto* shared_tensor_array = - scope_->FindVar(var)->GetMutable>(); - auto* target_tensor_array = - inst_scope_->LocalVar(var)->GetMutable>(); - CHECK(!shared_tensor_array->empty()) - << "shared_tensor_array is empty yet"; - target_tensor_array->resize(shared_tensor_array->size()); - for (size_t i = 0; i < shared_tensor_array->size(); i++) { - target_tensor_array->at(i).Resize( - shared_tensor_array->at(i).dims()); - TargetCopy(param_type->type->target(), - target_tensor_array->at(i).mutable_data( - param_type->type->target(), - shared_tensor_array->at(i).memory_size()), - shared_tensor_array->at(i).raw_data(), - shared_tensor_array->at(i).memory_size()); - } - } else { - LOG(FATAL) << "not support"; + const auto* type = instruction_->kernel()->GetInputDeclType(arg); + CHECK(base_scope_->FindVar(var)); + /// Create a tensor or tensor_array in the instruction's scope, + /// alloc memory and then copy data there. + if (type->IsTensor() && + !TargetCompatibleTo(*Type::GetTensorTy(TARGET(kHost)), *type)) { + const auto* base_tensor = base_scope_->FindTensor(var); + auto* inst_tensor = inst_scope_->FindMutableTensor(var); + CHECK(!base_tensor->dims().empty()) + << "The dims of input tensor is empty yet"; + TargetCopy(type->target(), + inst_tensor->mutable_data(type->target(), + base_tensor->memory_size()), + base_tensor->raw_data(), + base_tensor->memory_size()); + } else if (type->IsTensorList() && + !TargetCompatibleTo(*Type::GetTensorListTy(TARGET(kHost)), + *type)) { + const auto* base_tensor_list = base_scope_->FindTensorList(var); + auto* inst_tensor_list = inst_scope_->FindMutableTensorList(var); + CHECK_EQ(base_tensor_list->size(), inst_tensor_list->size()); + for (size_t i = 0; i < base_tensor_list->size(); i++) { + CHECK(!base_tensor_list->at(i).dims().empty()) + << "The dims of input tensor[" << i << "] is empty yet"; + TargetCopy(type->target(), + inst_tensor_list->at(i).mutable_data( + type->target(), base_tensor_list->at(i).memory_size()), + inst_tensor_list->at(i).raw_data(), + inst_tensor_list->at(i).memory_size()); } } } @@ -135,37 +116,36 @@ void TestCase::PrepareInputsForInstruction() { } template -bool TestCase::CheckTensorPrecision(const Tensor* a_tensor, - const Tensor* b_tensor, +bool TestCase::CheckTensorPrecision(const Tensor* inst_tensor, + const Tensor* base_tensor, float abs_error) { - CHECK(a_tensor); - CHECK(b_tensor); + CHECK(inst_tensor); + CHECK(base_tensor); - CHECK(ShapeEquals(a_tensor->dims(), b_tensor->dims())); + CHECK(ShapeEquals(inst_tensor->dims(), base_tensor->dims())); - CHECK(a_tensor->lod() == b_tensor->lod()) << "lod not match"; + CHECK(inst_tensor->lod() == base_tensor->lod()) << "lod not match"; // The baseline should output in host devices. - CHECK(b_tensor->target() == TARGET(kHost) || - b_tensor->target() == TARGET(kX86) || - b_tensor->target() == TARGET(kARM)); - - const T* a_data{}; - Tensor a_host_tensor; - a_host_tensor.Resize(a_tensor->dims()); - switch (a_tensor->target()) { + CHECK(base_tensor->target() == TARGET(kHost) || + base_tensor->target() == TARGET(kX86) || + base_tensor->target() == TARGET(kARM)); + const T* inst_data{}; + Tensor inst_host_tensor; + inst_host_tensor.Resize(inst_tensor->dims()); + switch (inst_tensor->target()) { case TARGET(kX86): case TARGET(kHost): case TARGET(kARM): - a_data = static_cast(a_tensor->raw_data()); + inst_data = static_cast(inst_tensor->raw_data()); break; #ifdef LITE_WITH_XPU case TARGET(kXPU): - CopySync(a_host_tensor.mutable_data(), - a_tensor->raw_data(), - sizeof(T) * a_tensor->dims().production(), + CopySync(inst_host_tensor.mutable_data(), + inst_tensor->raw_data(), + sizeof(T) * inst_tensor->dims().production(), IoDirection::DtoH); - a_data = a_host_tensor.data(); + inst_data = inst_host_tensor.data(); break; #endif @@ -174,50 +154,50 @@ bool TestCase::CheckTensorPrecision(const Tensor* a_tensor, LOG(FATAL) << "Not supported"; } - CHECK(a_data); + CHECK(inst_data); - const T* b_data = static_cast(b_tensor->raw_data()); + const T* base_data = static_cast(base_tensor->raw_data()); bool success = true; - for (int i = 0; i < a_tensor->dims().production(); i++) { - EXPECT_NEAR(a_data[i], b_data[i], abs_error); - if (fabsf(a_data[i] - b_data[i]) > abs_error) { + for (int i = 0; i < inst_tensor->dims().production(); i++) { + EXPECT_NEAR(inst_data[i], base_data[i], abs_error); + if (fabsf(inst_data[i] - base_data[i]) > abs_error) { success = false; } } return success; } -bool TestCase::CheckPrecision(const Tensor* a_tensor, - const Tensor* b_tensor, +bool TestCase::CheckPrecision(const Tensor* inst_tensor, + const Tensor* base_tensor, float abs_error, PrecisionType precision_type) { PrecisionType precision_type_t = precision_type; if (precision_type == PRECISION(kAny)) { - precision_type_t = b_tensor->precision(); + precision_type_t = base_tensor->precision(); } - CHECK(precision_type_t == b_tensor->precision()) + CHECK(precision_type_t == base_tensor->precision()) << "arg precision type and base tensor precision type are not matched! " "arg precision type is: " << PrecisionToStr(precision_type) << ", base tensor precision type is: " - << PrecisionToStr(b_tensor->precision()); - CHECK(a_tensor->precision() == b_tensor->precision()) + << PrecisionToStr(base_tensor->precision()); + CHECK(inst_tensor->precision() == base_tensor->precision()) << "real tensor precision type and base tensor precision type are not " "matched! real tensor precision type is: " - << PrecisionToStr(a_tensor->precision()) + << PrecisionToStr(inst_tensor->precision()) << ", base tensor precision type is: " - << PrecisionToStr(b_tensor->precision()); + << PrecisionToStr(base_tensor->precision()); switch (precision_type_t) { case PRECISION(kFloat): - return CheckTensorPrecision(a_tensor, b_tensor, abs_error); + return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); case PRECISION(kInt8): - return CheckTensorPrecision(a_tensor, b_tensor, abs_error); + return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); case PRECISION(kInt32): - return CheckTensorPrecision(a_tensor, b_tensor, abs_error); + return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); case PRECISION(kInt64): - return CheckTensorPrecision(a_tensor, b_tensor, abs_error); + return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); case PRECISION(kBool): - return CheckTensorPrecision(a_tensor, b_tensor, abs_error); + return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); default: LOG(FATAL) << "not support type: " << PrecisionToStr(precision_type); return false; @@ -229,24 +209,24 @@ bool TestCase::CheckPrecision(const std::string& var_name, PrecisionType precision_type) { bool success = true; if (inst_scope_->FindVar(var_name)->IsType()) { - auto a_tensor = inst_scope_->FindTensor(var_name); - auto b_tensor = base_scope_->FindTensor(var_name); - success = success && - CheckPrecision(a_tensor, b_tensor, abs_error, precision_type); + auto inst_tensor = inst_scope_->FindTensor(var_name); + auto base_tensor = base_scope_->FindTensor(var_name); + success = + success && + CheckPrecision(inst_tensor, base_tensor, abs_error, precision_type); } else if (inst_scope_->FindVar(var_name)->IsType>()) { - auto a_tensor_array = - inst_scope_->FindVar(var_name)->GetMutable>(); - auto b_tensor_array = - base_scope_->FindVar(var_name)->GetMutable>(); - CHECK_EQ(a_tensor_array->size(), b_tensor_array->size()); - for (size_t i = 0; i < a_tensor_array->size(); i++) { - Tensor* a_tensor = &(a_tensor_array->at(i)); - Tensor* b_tensor = &(b_tensor_array->at(i)); - if (a_tensor->dims().size() == 0 && b_tensor->dims().size() == 0) { + auto inst_tensor_list = inst_scope_->FindMutableTensorList(var_name); + auto base_tensor_list = base_scope_->FindMutableTensorList(var_name); + CHECK_EQ(inst_tensor_list->size(), base_tensor_list->size()); + for (size_t i = 0; i < inst_tensor_list->size(); i++) { + Tensor* inst_tensor = &(inst_tensor_list->at(i)); + Tensor* base_tensor = &(base_tensor_list->at(i)); + if (inst_tensor->dims().size() == 0 && base_tensor->dims().size() == 0) { continue; } - success = success && - CheckPrecision(a_tensor, b_tensor, abs_error, precision_type); + success = + success && + CheckPrecision(inst_tensor, base_tensor, abs_error, precision_type); } } else { LOG(FATAL) << "unsupported var type"; diff --git a/lite/core/arena/framework.h b/lite/core/arena/framework.h index cf864a32044e3dfd03ecd03327a0db69275ef586..450be521540eae9a3eee5e5cf7fb47c95af929e0 100644 --- a/lite/core/arena/framework.h +++ b/lite/core/arena/framework.h @@ -40,13 +40,15 @@ namespace arena { class TestCase { public: explicit TestCase(const Place& place, const std::string& alias) - : place_(place), scope_(new Scope), alias_(alias) { + : place_(place), + alias_(alias), + inst_scope_(new Scope), + base_scope_(new Scope) { ctx_ = ContextScheduler::Global().NewContext(place_.target); } virtual ~TestCase(); void Prepare() { - PrepareScopes(); PrepareData(); op_desc_.reset(new cpp::OpDesc); PrepareOpDesc(op_desc_.get()); @@ -91,16 +93,15 @@ class TestCase { // kernel registry. void CheckKernelConsistWithDefinition() {} - Scope& scope() { return *scope_; } - - Scope* baseline_scope() { return base_scope_; } - Scope* inst_scope() { return inst_scope_; } + Scope* baseline_scope() { return base_scope_.get(); } + Scope* inst_scope() { return inst_scope_.get(); } protected: // Prepare inputs in scope() for Tester. virtual void PrepareData() = 0; - /// Prepare a tensor in host. The tensors will be created in scope_. + /// Prepare a tensor in host. The tensors will be created both in base_scope_ + /// and inst_scope_. /// Need to specify the targets other than X86 or ARM. template void SetCommonTensor(const std::string& var_name, @@ -108,42 +109,47 @@ class TestCase { const T* data, const LoD& lod = {}, bool is_persistable = false) { - auto* tensor = scope_->NewTensor(var_name); - tensor->Resize(ddim); - auto* d = tensor->mutable_data(); - memcpy(d, data, ddim.production() * sizeof(T)); + // Create and fill a input tensor with the given data for baseline + auto* base_tensor = base_scope_->NewTensor(var_name); + base_tensor->Resize(ddim); + memcpy(base_tensor->mutable_data(), data, ddim.production() * sizeof(T)); // set lod - if (!lod.empty()) *tensor->mutable_lod() = lod; + if (!lod.empty()) *base_tensor->mutable_lod() = lod; // set persistable - tensor->set_persistable(is_persistable); + base_tensor->set_persistable(is_persistable); + + // Create a copy for instruction + auto* inst_tensor = inst_scope_->NewTensor(var_name); + inst_tensor->CopyDataFrom(*base_tensor); } /// Prepare a tensor_array in host. The tensors will be created in scope_. /// Need to specify the targets other than X86 or ARM. template void SetCommonTensorList(const std::string& var_name, - const std::vector& array_tensor_dims, + const std::vector& ddims, const std::vector>& datas, const std::vector& lods = {}) { - CHECK_EQ(array_tensor_dims.size(), datas.size()); + // Create a tensor array for baseline, and a copy for instruction + CHECK_EQ(ddims.size(), datas.size()); if (!lods.empty()) { - CHECK_EQ(array_tensor_dims.size(), lods.size()); + CHECK_EQ(ddims.size(), lods.size()); } - auto* tensor_array = - scope_->Var(var_name)->GetMutable>(); - for (int i = 0; i < array_tensor_dims.size(); i++) { - Tensor tmp; - tmp.Resize(array_tensor_dims[i]); - auto* tmp_data = tmp.mutable_data(); - memcpy(tmp_data, + auto* base_tensor_list = base_scope_->NewTensorList(var_name); + auto* inst_tensor_list = inst_scope_->NewTensorList(var_name); + for (int i = 0; i < ddims.size(); i++) { + Tensor item; + item.Resize(ddims[i]); + memcpy(item.mutable_data(), datas[i].data(), - array_tensor_dims[i].production() * sizeof(T)); + ddims[i].production() * sizeof(T)); if (!lods.empty()) { - tmp.set_lod(lods[i]); + item.set_lod(lods[i]); } - tensor_array->push_back(tmp); + base_tensor_list->push_back(item); + inst_tensor_list->push_back(item); } } @@ -157,11 +163,6 @@ class TestCase { std::unique_ptr ctx_; void CreateInstruction(); - void PrepareScopes() { - inst_scope_ = &scope_->NewScope(); - base_scope_ = &scope_->NewScope(); - } - // Check shape // TODO(Superjomn) Move this method to utils or DDim? bool ShapeEquals(const DDim& a, const DDim& b) { @@ -172,25 +173,23 @@ class TestCase { return true; } - /// Copy the input tensors to target devices needed by the instruction. + // Copy the host tensors to the device tensors if needed by the instruction. void PrepareInputsForInstruction(); // Create output tensors and variables. void PrepareOutputsForInstruction() { for (auto x : op_desc().output_vars()) { - inst_scope_->NewTensor(x); - base_scope_->NewTensor(x); + inst_scope_->Var(x); } } private: Place place_; - std::shared_ptr scope_; std::string alias_; // The workspace for the Instruction. - Scope* inst_scope_{}; + std::shared_ptr inst_scope_; // The workspace for the baseline implementation. - Scope* base_scope_{}; + std::shared_ptr base_scope_; std::unique_ptr op_desc_; std::unique_ptr instruction_; }; diff --git a/lite/core/scope.h b/lite/core/scope.h index 57e4e3a5e058000f963ff369cbd25e69b9c981c6..41d6ee8f4f55268e3389cd4cada7e48fb8f922d7 100644 --- a/lite/core/scope.h +++ b/lite/core/scope.h @@ -62,19 +62,36 @@ class Scope final { // Create a Tensor variable. This will create a new Variable called `name`. Tensor* NewTensor(const std::string& name) { auto* var = Var(name); - return var->GetMutable(); + return var->GetMutable(); } const Tensor* FindTensor(const std::string& name) { auto* var = FindVar(name); if (!var) return nullptr; - return &var->Get(); + return &var->Get(); } Tensor* FindMutableTensor(const std::string& name) { auto* var = FindVar(name); if (!var) return nullptr; - return var->GetMutable(); + return var->GetMutable(); + } + + std::vector* NewTensorList(const std::string& name) { + auto* var = Var(name); + return var->GetMutable>(); + } + + const std::vector* FindTensorList(const std::string& name) { + auto* var = FindVar(name); + if (!var) return nullptr; + return &var->Get>(); + } + + std::vector* FindMutableTensorList(const std::string& name) { + auto* var = FindVar(name); + if (!var) return nullptr; + return var->GetMutable>(); } private: diff --git a/lite/tests/kernels/box_clip_compute_test.cc b/lite/tests/kernels/box_clip_compute_test.cc index 72947fa4b258a894e5a73c5e8fe8cce12ef9a02c..c599e64214d3fb15a52cb14fe48de7a7d75b2868 100644 --- a/lite/tests/kernels/box_clip_compute_test.cc +++ b/lite/tests/kernels/box_clip_compute_test.cc @@ -70,9 +70,7 @@ class BoxClipComputeTester : public arena::TestCase { float sign = i % 3 == 0 ? -1.0f : 1.0f; input_data[i] = sign * static_cast((i * 7) % 20); } - SetCommonTensor(input_, input_dims_, input_data.data()); - auto input_tensor = baseline_scope()->FindMutableTensor(input_); - input_tensor->set_lod(input_lod_); + SetCommonTensor(input_, input_dims_, input_data.data(), input_lod_); std::vector im_info_data{10, 10, 1, 15, 15, 1}; SetCommonTensor(im_info_, im_info_dim_, im_info_data.data()); diff --git a/lite/tests/kernels/roi_align_compute_test.cc b/lite/tests/kernels/roi_align_compute_test.cc index 8eb84dd0337d0635dc360e2e04aa1ad047e912c0..2bbfdcd81da951bd769ab03094a0df48f3a6e13b 100644 --- a/lite/tests/kernels/roi_align_compute_test.cc +++ b/lite/tests/kernels/roi_align_compute_test.cc @@ -106,13 +106,11 @@ class RoiAlignComputeTester : public arena::TestCase { } LOG(INFO) << "Read rois data. " << datas[0] << " " << datas.back(); reader.close(); - SetCommonTensor(rois_, dims, datas.data()); - auto rois_tensor = baseline_scope()->FindMutableTensor(rois_); std::vector lod0({0, 152, 304}); LoD lod; lod.push_back(lod0); - rois_tensor->set_lod(lod); + SetCommonTensor(rois_, dims, datas.data(), lod); } }; diff --git a/lite/tests/kernels/slice_compute_test.cc b/lite/tests/kernels/slice_compute_test.cc index 8d5d2c089ec7eb0a2a146ff635e2e9afc18fbcd3..b566bfa3e86cf6067f9914b5fc3932458a6ee186 100644 --- a/lite/tests/kernels/slice_compute_test.cc +++ b/lite/tests/kernels/slice_compute_test.cc @@ -202,20 +202,15 @@ class SliceComputeTester : public arena::TestCase { DDim({static_cast(ends_.size())}), ends_.data()); } else if (use_tensor_list_) { - Scope& scope_ = this->scope(); for (int i = 0; i < starts_.size(); ++i) { - auto* tensor = scope_.NewTensor("starts_tensor_list_" + - paddle::lite::to_string(i)); - tensor->Resize(DDim({1})); - auto* d = tensor->mutable_data(); - d[0] = starts_[i]; + SetCommonTensor("starts_tensor_list_" + paddle::lite::to_string(i), + DDim({1}), + &starts_[i]); } for (int i = 0; i < ends_.size(); ++i) { - auto* tensor = - scope_.NewTensor("ends_tensor_list_" + paddle::lite::to_string(i)); - tensor->Resize(DDim({1})); - auto* d = tensor->mutable_data(); - d[0] = ends_[i]; + SetCommonTensor("ends_tensor_list_" + paddle::lite::to_string(i), + DDim({1}), + &ends_[i]); } } } diff --git a/lite/tests/kernels/softmax_compute_test.cc b/lite/tests/kernels/softmax_compute_test.cc index e0f708f3a78f7fbacb834631142d6e09afa9a607..87a94aba184a055081446b4df830b72146834ed2 100644 --- a/lite/tests/kernels/softmax_compute_test.cc +++ b/lite/tests/kernels/softmax_compute_test.cc @@ -103,7 +103,7 @@ TEST(Softmax, precision) { #if defined(LITE_WITH_NPU) place = TARGET(kNPU); abs_error = 4e-3; // Using fp16 in NPU -#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) +#elif defined(LITE_WITH_XPU) place = TARGET(kXPU); #else return; @@ -111,8 +111,12 @@ TEST(Softmax, precision) { for (auto x_dims : std::vector>{{1, 2, 3, 4}, {2, 3, 4}, {3, 4}}) { - for (auto axis : {-1, 0, 1, 2, 3}) { - if (axis >= x_dims.size()) continue; + int ndims = x_dims.size(); + for (int axis = -1; axis < ndims; axis++) { +#if defined(LITE_WITH_XPU) + if (axis != -1 && axis != ndims - 1) + continue; // -1 and dims.size() - 1 are only supported by XPU +#endif std::unique_ptr tester( new SoftmaxComputeTest(place, "def", DDim(x_dims), axis)); arena::Arena arena(std::move(tester), place, abs_error);