framework.cc 10.0 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/core/arena/framework.h"
16
#include <set>
Y
Yan Chunwei 已提交
17
#include "lite/core/context.h"
18
#include "lite/operators/subgraph_op.h"
Y
Yan Chunwei 已提交
19 20 21 22 23 24

namespace paddle {
namespace lite {
namespace arena {

void TestCase::CreateInstruction() {
25
  std::shared_ptr<lite::OpLite> op = nullptr;
26 27 28 29 30 31 32 33
  static const std::set<TargetType> subgraph_op_supported_targets(
      {TARGET(kNPU), TARGET(kXPU)});
  bool enable_subgraph_op = subgraph_op_supported_targets.find(place_.target) !=
                            subgraph_op_supported_targets.end();
#if defined(LITE_WITH_XPU) && !defined(LITE_WITH_XTCL)
  enable_subgraph_op = false;  // Use XPU kernel directly if XTCL is disabled.
#endif
  if (enable_subgraph_op) {
34 35 36 37 38 39 40 41 42 43 44 45
    // Create a new block desc to wrap the original op desc
    int sub_block_idx = 0;
    auto sub_block_desc = new cpp::BlockDesc();
    sub_block_desc->ClearOps();
    sub_block_desc->ClearVars();
    auto sub_block_op_desc = sub_block_desc->AddOp<cpp::OpDesc>();
    *sub_block_op_desc = *op_desc_;
    // Add the block desc into the subgraph op which used to replace the
    // original op
    op_desc_.reset(new cpp::OpDesc());
    op_desc_->SetType("subgraph");
    op_desc_->SetAttr<int32_t>("sub_block", sub_block_idx);
46 47 48 49 50 51
    auto in_names = sub_block_op_desc->input_vars();
    auto out_names = sub_block_op_desc->output_vars();
    op_desc_->SetInput("Inputs", in_names);
    op_desc_->SetOutput("Outputs", out_names);
    op_desc_->SetAttr<std::vector<std::string>>("input_data_names", in_names);
    op_desc_->SetAttr<std::vector<std::string>>("output_data_names", out_names);
52 53 54 55 56
    op = LiteOpRegistry::Global().Create(op_desc().Type());
    static_cast<operators::SubgraphOp*>(op.get())->SetSubBlock(sub_block_desc);
  } else {
    op = LiteOpRegistry::Global().Create(op_desc().Type());
  }
Y
Yan Chunwei 已提交
57
  CHECK(op) << "no op for " << op_desc().Type();
58
  op->Attach(*op_desc_, inst_scope_.get());
Y
Yan Chunwei 已提交
59 60 61 62 63 64 65 66 67 68 69
  auto kernels = op->CreateKernels({place_});
  // filter out the target kernel
  CHECK(!kernels.empty()) << "No kernel found for place "
                          << place_.DebugString();
  auto it = std::remove_if(
      kernels.begin(), kernels.end(), [&](std::unique_ptr<KernelBase>& k) {
        return k->alias() == alias_;
      });
  CHECK(it != kernels.end()) << "failed to create the kernel in "
                             << place_.DebugString()
                             << " with alias: " << alias_;
70 71
  // reset final place
  place_ = (*it)->place();
Y
Yan Chunwei 已提交
72 73 74
  // prepare context
  (*it)->SetContext(std::move(ctx_));
  instruction_.reset(new Instruction(op, std::move(*it)));
75 76 77
#ifdef LITE_WITH_PROFILE
  instruction_->set_profiler(new profile::Profiler());
#endif
Y
Yan Chunwei 已提交
78 79 80 81 82
}

void TestCase::PrepareInputsForInstruction() {
  for (auto& arg : op_desc().InputArgumentNames()) {
    for (auto& var : op_desc().Input(arg)) {
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
      const auto* type = instruction_->kernel()->GetInputDeclType(arg);
      CHECK(base_scope_->FindVar(var));
      /// Create a tensor or tensor_array in the instruction's scope,
      /// alloc memory and then copy data there.
      if (type->IsTensor() &&
          !TargetCompatibleTo(*Type::GetTensorTy(TARGET(kHost)), *type)) {
        const auto* base_tensor = base_scope_->FindTensor(var);
        auto* inst_tensor = inst_scope_->FindMutableTensor(var);
        CHECK(!base_tensor->dims().empty())
            << "The dims of input tensor is empty yet";
        TargetCopy(type->target(),
                   inst_tensor->mutable_data(type->target(),
                                             base_tensor->memory_size()),
                   base_tensor->raw_data(),
                   base_tensor->memory_size());
      } else if (type->IsTensorList() &&
                 !TargetCompatibleTo(*Type::GetTensorListTy(TARGET(kHost)),
                                     *type)) {
        const auto* base_tensor_list = base_scope_->FindTensorList(var);
        auto* inst_tensor_list = inst_scope_->FindMutableTensorList(var);
        CHECK_EQ(base_tensor_list->size(), inst_tensor_list->size());
        for (size_t i = 0; i < base_tensor_list->size(); i++) {
          CHECK(!base_tensor_list->at(i).dims().empty())
              << "The dims of input tensor[" << i << "] is empty yet";
          TargetCopy(type->target(),
                     inst_tensor_list->at(i).mutable_data(
                         type->target(), base_tensor_list->at(i).memory_size()),
                     inst_tensor_list->at(i).raw_data(),
                     inst_tensor_list->at(i).memory_size());
112
        }
Y
Yan Chunwei 已提交
113 114 115 116 117
      }
    }
  }
}

118
template <typename T>
119 120
bool TestCase::CheckTensorPrecision(const Tensor* inst_tensor,
                                    const Tensor* base_tensor,
121
                                    float abs_error) {
122 123
  CHECK(inst_tensor);
  CHECK(base_tensor);
124

125
  CHECK(ShapeEquals(inst_tensor->dims(), base_tensor->dims()));
126

127
  CHECK(inst_tensor->lod() == base_tensor->lod()) << "lod not match";
128 129

  // The baseline should output in host devices.
130 131 132 133 134 135 136
  CHECK(base_tensor->target() == TARGET(kHost) ||
        base_tensor->target() == TARGET(kX86) ||
        base_tensor->target() == TARGET(kARM));
  const T* inst_data{};
  Tensor inst_host_tensor;
  inst_host_tensor.Resize(inst_tensor->dims());
  switch (inst_tensor->target()) {
137 138 139
    case TARGET(kX86):
    case TARGET(kHost):
    case TARGET(kARM):
140
      inst_data = static_cast<const T*>(inst_tensor->raw_data());
141
      break;
142 143
#ifdef LITE_WITH_XPU
    case TARGET(kXPU):
144 145 146
      CopySync<TARGET(kXPU)>(inst_host_tensor.mutable_data<T>(),
                             inst_tensor->raw_data(),
                             sizeof(T) * inst_tensor->dims().production(),
147
                             IoDirection::DtoH);
148
      inst_data = inst_host_tensor.data<T>();
149 150
      break;
#endif
151 152 153 154 155 156

    default:
      // Before compare, need to copy data from `target` device to host.
      LOG(FATAL) << "Not supported";
  }

157
  CHECK(inst_data);
158

159
  const T* base_data = static_cast<const T*>(base_tensor->raw_data());
160 161

  bool success = true;
162 163 164
  for (int i = 0; i < inst_tensor->dims().production(); i++) {
    EXPECT_NEAR(inst_data[i], base_data[i], abs_error);
    if (fabsf(inst_data[i] - base_data[i]) > abs_error) {
165 166 167 168 169 170
      success = false;
    }
  }
  return success;
}

171 172
bool TestCase::CheckPrecision(const Tensor* inst_tensor,
                              const Tensor* base_tensor,
173 174 175 176
                              float abs_error,
                              PrecisionType precision_type) {
  PrecisionType precision_type_t = precision_type;
  if (precision_type == PRECISION(kAny)) {
177
    precision_type_t = base_tensor->precision();
178
  }
179
  CHECK(precision_type_t == base_tensor->precision())
180 181 182
      << "arg precision type and base tensor precision type are not matched! "
         "arg precision type is: "
      << PrecisionToStr(precision_type) << ", base tensor precision type is: "
183 184
      << PrecisionToStr(base_tensor->precision());
  CHECK(inst_tensor->precision() == base_tensor->precision())
185 186
      << "real tensor precision type and base tensor precision type are not "
         "matched! real tensor precision type is: "
187
      << PrecisionToStr(inst_tensor->precision())
188
      << ", base tensor precision type is: "
189
      << PrecisionToStr(base_tensor->precision());
190 191
  switch (precision_type_t) {
    case PRECISION(kFloat):
192
      return CheckTensorPrecision<float>(inst_tensor, base_tensor, abs_error);
193
    case PRECISION(kInt8):
194
      return CheckTensorPrecision<int8_t>(inst_tensor, base_tensor, abs_error);
195
    case PRECISION(kInt32):
196
      return CheckTensorPrecision<int32_t>(inst_tensor, base_tensor, abs_error);
197
    case PRECISION(kInt64):
198
      return CheckTensorPrecision<int64_t>(inst_tensor, base_tensor, abs_error);
199
    case PRECISION(kBool):
200
      return CheckTensorPrecision<bool>(inst_tensor, base_tensor, abs_error);
201 202 203 204 205 206 207 208 209 210 211
    default:
      LOG(FATAL) << "not support type: " << PrecisionToStr(precision_type);
      return false;
  }
}

bool TestCase::CheckPrecision(const std::string& var_name,
                              float abs_error,
                              PrecisionType precision_type) {
  bool success = true;
  if (inst_scope_->FindVar(var_name)->IsType<Tensor>()) {
212 213 214 215 216
    auto inst_tensor = inst_scope_->FindTensor(var_name);
    auto base_tensor = base_scope_->FindTensor(var_name);
    success =
        success &&
        CheckPrecision(inst_tensor, base_tensor, abs_error, precision_type);
217
  } else if (inst_scope_->FindVar(var_name)->IsType<std::vector<Tensor>>()) {
218 219 220 221 222 223 224
    auto inst_tensor_list = inst_scope_->FindMutableTensorList(var_name);
    auto base_tensor_list = base_scope_->FindMutableTensorList(var_name);
    CHECK_EQ(inst_tensor_list->size(), base_tensor_list->size());
    for (size_t i = 0; i < inst_tensor_list->size(); i++) {
      Tensor* inst_tensor = &(inst_tensor_list->at(i));
      Tensor* base_tensor = &(base_tensor_list->at(i));
      if (inst_tensor->dims().size() == 0 && base_tensor->dims().size() == 0) {
225 226
        continue;
      }
227 228 229
      success =
          success &&
          CheckPrecision(inst_tensor, base_tensor, abs_error, precision_type);
230 231 232 233 234 235 236
    }
  } else {
    LOG(FATAL) << "unsupported var type";
  }
  return success;
}

237 238 239 240 241 242 243 244 245 246 247 248 249
TestCase::~TestCase() {
  if (op_desc_->Type() == "subgraph") {
    // Release the subblock desc of Subgraph op
    auto subgraph_op = const_cast<operators::SubgraphOp*>(
        static_cast<const operators::SubgraphOp*>(instruction_->op()));
    CHECK(subgraph_op);
    auto sub_block_desc = subgraph_op->GetSubBlock();
    if (sub_block_desc) {
      delete sub_block_desc;
    }
  }
}

Y
Yan Chunwei 已提交
250 251 252
}  // namespace arena
}  // namespace lite
}  // namespace paddle