// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/core/arena/framework.h" #include #include "lite/core/context.h" #include "lite/operators/subgraph_op.h" namespace paddle { namespace lite { namespace arena { void TestCase::CreateInstruction() { std::shared_ptr op = nullptr; static const std::set subgraph_op_supported_targets( {TARGET(kNPU), TARGET(kXPU)}); bool enable_subgraph_op = subgraph_op_supported_targets.find(place_.target) != subgraph_op_supported_targets.end(); #if defined(LITE_WITH_XPU) && !defined(LITE_WITH_XTCL) enable_subgraph_op = false; // Use XPU kernel directly if XTCL is disabled. #endif if (enable_subgraph_op) { // Create a new block desc to wrap the original op desc auto sub_program_desc = std::make_shared(); int sub_block_idx = 0; auto sub_block_desc = sub_program_desc->AddBlock(); sub_block_desc->ClearOps(); sub_block_desc->ClearVars(); auto sub_op_desc = sub_block_desc->AddOp(); *sub_op_desc = *op_desc_; // Add the block desc into the subgraph op which used to replace the // original op op_desc_.reset(new cpp::OpDesc()); op_desc_->SetType("subgraph"); op_desc_->SetAttr("sub_block", sub_block_idx); auto in_names = sub_op_desc->input_vars(); auto out_names = sub_op_desc->output_vars(); op_desc_->SetInput("Inputs", in_names); op_desc_->SetOutput("Outputs", out_names); op_desc_->SetAttr>("input_data_names", in_names); op_desc_->SetAttr>("output_data_names", out_names); op = LiteOpRegistry::Global().Create(op_desc().Type()); static_cast(op.get())->SetProgramDesc( sub_program_desc); } else { op = LiteOpRegistry::Global().Create(op_desc().Type()); } CHECK(op) << "no op for " << op_desc().Type(); op->Attach(*op_desc_, inst_scope_.get()); auto kernels = op->CreateKernels({place_}); // filter out the target kernel CHECK(!kernels.empty()) << "No kernel found for place " << place_.DebugString(); auto it = std::find_if( kernels.begin(), kernels.end(), [&](std::unique_ptr& k) { return k->alias() == alias_; }); CHECK(it != kernels.end()) << "failed to create the kernel in " << place_.DebugString() << " with alias: " << alias_; // reset final place place_ = (*it)->place(); // prepare context (*it)->SetContext(std::move(ctx_)); instruction_.reset(new Instruction(op, std::move(*it))); #ifdef LITE_WITH_PROFILE instruction_->set_profiler(new profile::Profiler()); #endif } void TestCase::PrepareInputsForInstruction() { for (auto& arg : op_desc().InputArgumentNames()) { for (auto& var : op_desc().Input(arg)) { const auto* type = instruction_->kernel()->GetInputDeclType(arg); CHECK(base_scope_->FindVar(var)); /// Create a tensor or tensor_array in the instruction's scope, /// alloc memory and then copy data there. if (type->IsTensor() && !TargetCompatibleTo(*Type::GetTensorTy(TARGET(kHost)), *type)) { const auto* base_tensor = base_scope_->FindTensor(var); auto* inst_tensor = inst_scope_->FindMutableTensor(var); CHECK(!base_tensor->dims().empty()) << "The dims of input tensor is empty yet"; TargetCopy(type->target(), inst_tensor->mutable_data(type->target(), base_tensor->memory_size()), base_tensor->raw_data(), base_tensor->memory_size()); } else if (type->IsTensorList() && !TargetCompatibleTo(*Type::GetTensorListTy(TARGET(kHost)), *type)) { const auto* base_tensor_list = base_scope_->FindTensorList(var); auto* inst_tensor_list = inst_scope_->FindMutableTensorList(var); CHECK_EQ(base_tensor_list->size(), inst_tensor_list->size()); for (size_t i = 0; i < base_tensor_list->size(); i++) { CHECK(!base_tensor_list->at(i).dims().empty()) << "The dims of input tensor[" << i << "] is empty yet"; TargetCopy(type->target(), inst_tensor_list->at(i).mutable_data( type->target(), base_tensor_list->at(i).memory_size()), inst_tensor_list->at(i).raw_data(), inst_tensor_list->at(i).memory_size()); } } } } } template bool TestCase::CheckTensorPrecision(const Tensor* inst_tensor, const Tensor* base_tensor, float abs_error) { CHECK(inst_tensor); CHECK(base_tensor); CHECK(ShapeEquals(inst_tensor->dims(), base_tensor->dims())); CHECK(inst_tensor->lod() == base_tensor->lod()) << "lod not match"; // The baseline should output in host devices. CHECK(base_tensor->target() == TARGET(kHost) || base_tensor->target() == TARGET(kX86) || base_tensor->target() == TARGET(kARM)); const T* inst_data{}; Tensor inst_host_tensor; inst_host_tensor.Resize(inst_tensor->dims()); switch (inst_tensor->target()) { case TARGET(kX86): case TARGET(kHost): case TARGET(kARM): inst_data = static_cast(inst_tensor->raw_data()); break; #ifdef LITE_WITH_XPU case TARGET(kXPU): CopySync(inst_host_tensor.mutable_data(), inst_tensor->raw_data(), sizeof(T) * inst_tensor->dims().production(), IoDirection::DtoH); inst_data = inst_host_tensor.data(); break; #endif default: // Before compare, need to copy data from `target` device to host. LOG(FATAL) << "Not supported"; } CHECK(inst_data); const T* base_data = static_cast(base_tensor->raw_data()); bool success = true; for (int i = 0; i < inst_tensor->dims().production(); i++) { EXPECT_NEAR(inst_data[i], base_data[i], abs_error); if (fabsf(inst_data[i] - base_data[i]) > abs_error) { success = false; } } return success; } bool TestCase::CheckPrecision(const Tensor* inst_tensor, const Tensor* base_tensor, float abs_error, PrecisionType precision_type) { PrecisionType precision_type_t = precision_type; if (precision_type == PRECISION(kAny)) { precision_type_t = base_tensor->precision(); } CHECK(precision_type_t == base_tensor->precision()) << "arg precision type and base tensor precision type are not matched! " "arg precision type is: " << PrecisionToStr(precision_type) << ", base tensor precision type is: " << PrecisionToStr(base_tensor->precision()); CHECK(inst_tensor->precision() == base_tensor->precision()) << "real tensor precision type and base tensor precision type are not " "matched! real tensor precision type is: " << PrecisionToStr(inst_tensor->precision()) << ", base tensor precision type is: " << PrecisionToStr(base_tensor->precision()); switch (precision_type_t) { case PRECISION(kFloat): return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); case PRECISION(kInt8): return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); case PRECISION(kInt32): return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); case PRECISION(kInt64): return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); case PRECISION(kBool): return CheckTensorPrecision(inst_tensor, base_tensor, abs_error); default: LOG(FATAL) << "not support type: " << PrecisionToStr(precision_type); return false; } } bool TestCase::CheckPrecision(const std::string& var_name, float abs_error, PrecisionType precision_type) { bool success = true; if (inst_scope_->FindVar(var_name)->IsType()) { auto inst_tensor = inst_scope_->FindTensor(var_name); auto base_tensor = base_scope_->FindTensor(var_name); success = success && CheckPrecision(inst_tensor, base_tensor, abs_error, precision_type); } else if (inst_scope_->FindVar(var_name)->IsType>()) { auto inst_tensor_list = inst_scope_->FindMutableTensorList(var_name); auto base_tensor_list = base_scope_->FindMutableTensorList(var_name); CHECK_EQ(inst_tensor_list->size(), base_tensor_list->size()); for (size_t i = 0; i < inst_tensor_list->size(); i++) { Tensor* inst_tensor = &(inst_tensor_list->at(i)); Tensor* base_tensor = &(base_tensor_list->at(i)); if (inst_tensor->dims().size() == 0 && base_tensor->dims().size() == 0) { continue; } success = success && CheckPrecision(inst_tensor, base_tensor, abs_error, precision_type); } } else { LOG(FATAL) << "unsupported var type"; } return success; } } // namespace arena } // namespace lite } // namespace paddle