framework.cc 9.7 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/core/arena/framework.h"
16
#include <set>
Y
Yan Chunwei 已提交
17
#include "lite/core/context.h"
18
#include "lite/operators/subgraph_op.h"
Y
Yan Chunwei 已提交
19 20 21 22 23 24

namespace paddle {
namespace lite {
namespace arena {

void TestCase::CreateInstruction() {
25
  std::shared_ptr<lite::OpLite> op = nullptr;
26 27 28 29 30 31 32 33
  static const std::set<TargetType> subgraph_op_supported_targets(
      {TARGET(kNPU), TARGET(kXPU)});
  bool enable_subgraph_op = subgraph_op_supported_targets.find(place_.target) !=
                            subgraph_op_supported_targets.end();
#if defined(LITE_WITH_XPU) && !defined(LITE_WITH_XTCL)
  enable_subgraph_op = false;  // Use XPU kernel directly if XTCL is disabled.
#endif
  if (enable_subgraph_op) {
34
    // Create a new block desc to wrap the original op desc
35
    auto sub_program_desc = std::make_shared<cpp::ProgramDesc>();
36
    int sub_block_idx = 0;
37
    auto sub_block_desc = sub_program_desc->AddBlock<cpp::BlockDesc>();
38 39
    sub_block_desc->ClearOps();
    sub_block_desc->ClearVars();
40 41
    auto sub_op_desc = sub_block_desc->AddOp<cpp::OpDesc>();
    *sub_op_desc = *op_desc_;
42 43 44 45 46
    // Add the block desc into the subgraph op which used to replace the
    // original op
    op_desc_.reset(new cpp::OpDesc());
    op_desc_->SetType("subgraph");
    op_desc_->SetAttr<int32_t>("sub_block", sub_block_idx);
47 48
    auto in_names = sub_op_desc->input_vars();
    auto out_names = sub_op_desc->output_vars();
49 50 51 52
    op_desc_->SetInput("Inputs", in_names);
    op_desc_->SetOutput("Outputs", out_names);
    op_desc_->SetAttr<std::vector<std::string>>("input_data_names", in_names);
    op_desc_->SetAttr<std::vector<std::string>>("output_data_names", out_names);
53
    op = LiteOpRegistry::Global().Create(op_desc().Type());
54 55
    static_cast<operators::SubgraphOp*>(op.get())->SetProgramDesc(
        sub_program_desc);
56 57 58
  } else {
    op = LiteOpRegistry::Global().Create(op_desc().Type());
  }
Y
Yan Chunwei 已提交
59
  CHECK(op) << "no op for " << op_desc().Type();
60
  op->Attach(*op_desc_, inst_scope_.get());
Y
Yan Chunwei 已提交
61 62 63 64
  auto kernels = op->CreateKernels({place_});
  // filter out the target kernel
  CHECK(!kernels.empty()) << "No kernel found for place "
                          << place_.DebugString();
65
  auto it = std::find_if(
Y
Yan Chunwei 已提交
66 67 68 69 70 71
      kernels.begin(), kernels.end(), [&](std::unique_ptr<KernelBase>& k) {
        return k->alias() == alias_;
      });
  CHECK(it != kernels.end()) << "failed to create the kernel in "
                             << place_.DebugString()
                             << " with alias: " << alias_;
72 73
  // reset final place
  place_ = (*it)->place();
Y
Yan Chunwei 已提交
74 75 76
  // prepare context
  (*it)->SetContext(std::move(ctx_));
  instruction_.reset(new Instruction(op, std::move(*it)));
77 78 79
#ifdef LITE_WITH_PROFILE
  instruction_->set_profiler(new profile::Profiler());
#endif
Y
Yan Chunwei 已提交
80 81 82 83 84
}

void TestCase::PrepareInputsForInstruction() {
  for (auto& arg : op_desc().InputArgumentNames()) {
    for (auto& var : op_desc().Input(arg)) {
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
      const auto* type = instruction_->kernel()->GetInputDeclType(arg);
      CHECK(base_scope_->FindVar(var));
      /// Create a tensor or tensor_array in the instruction's scope,
      /// alloc memory and then copy data there.
      if (type->IsTensor() &&
          !TargetCompatibleTo(*Type::GetTensorTy(TARGET(kHost)), *type)) {
        const auto* base_tensor = base_scope_->FindTensor(var);
        auto* inst_tensor = inst_scope_->FindMutableTensor(var);
        CHECK(!base_tensor->dims().empty())
            << "The dims of input tensor is empty yet";
        TargetCopy(type->target(),
                   inst_tensor->mutable_data(type->target(),
                                             base_tensor->memory_size()),
                   base_tensor->raw_data(),
                   base_tensor->memory_size());
      } else if (type->IsTensorList() &&
                 !TargetCompatibleTo(*Type::GetTensorListTy(TARGET(kHost)),
                                     *type)) {
        const auto* base_tensor_list = base_scope_->FindTensorList(var);
        auto* inst_tensor_list = inst_scope_->FindMutableTensorList(var);
        CHECK_EQ(base_tensor_list->size(), inst_tensor_list->size());
        for (size_t i = 0; i < base_tensor_list->size(); i++) {
          CHECK(!base_tensor_list->at(i).dims().empty())
              << "The dims of input tensor[" << i << "] is empty yet";
          TargetCopy(type->target(),
                     inst_tensor_list->at(i).mutable_data(
                         type->target(), base_tensor_list->at(i).memory_size()),
                     inst_tensor_list->at(i).raw_data(),
                     inst_tensor_list->at(i).memory_size());
114
        }
Y
Yan Chunwei 已提交
115 116 117 118 119
      }
    }
  }
}

120
template <typename T>
121 122
bool TestCase::CheckTensorPrecision(const Tensor* inst_tensor,
                                    const Tensor* base_tensor,
123
                                    float abs_error) {
124 125
  CHECK(inst_tensor);
  CHECK(base_tensor);
126

127
  CHECK(ShapeEquals(inst_tensor->dims(), base_tensor->dims()));
128

129
  CHECK(inst_tensor->lod() == base_tensor->lod()) << "lod not match";
130 131

  // The baseline should output in host devices.
132 133 134 135 136 137 138
  CHECK(base_tensor->target() == TARGET(kHost) ||
        base_tensor->target() == TARGET(kX86) ||
        base_tensor->target() == TARGET(kARM));
  const T* inst_data{};
  Tensor inst_host_tensor;
  inst_host_tensor.Resize(inst_tensor->dims());
  switch (inst_tensor->target()) {
139 140 141
    case TARGET(kX86):
    case TARGET(kHost):
    case TARGET(kARM):
142
      inst_data = static_cast<const T*>(inst_tensor->raw_data());
143
      break;
144 145
#ifdef LITE_WITH_XPU
    case TARGET(kXPU):
146 147 148
      CopySync<TARGET(kXPU)>(inst_host_tensor.mutable_data<T>(),
                             inst_tensor->raw_data(),
                             sizeof(T) * inst_tensor->dims().production(),
149
                             IoDirection::DtoH);
150
      inst_data = inst_host_tensor.data<T>();
151 152
      break;
#endif
153 154 155 156 157 158

    default:
      // Before compare, need to copy data from `target` device to host.
      LOG(FATAL) << "Not supported";
  }

159
  CHECK(inst_data);
160

161
  const T* base_data = static_cast<const T*>(base_tensor->raw_data());
162 163

  bool success = true;
164 165 166
  for (int i = 0; i < inst_tensor->dims().production(); i++) {
    EXPECT_NEAR(inst_data[i], base_data[i], abs_error);
    if (fabsf(inst_data[i] - base_data[i]) > abs_error) {
167 168 169 170 171 172
      success = false;
    }
  }
  return success;
}

173 174
bool TestCase::CheckPrecision(const Tensor* inst_tensor,
                              const Tensor* base_tensor,
175 176 177 178
                              float abs_error,
                              PrecisionType precision_type) {
  PrecisionType precision_type_t = precision_type;
  if (precision_type == PRECISION(kAny)) {
179
    precision_type_t = base_tensor->precision();
180
  }
181
  CHECK(precision_type_t == base_tensor->precision())
182 183 184
      << "arg precision type and base tensor precision type are not matched! "
         "arg precision type is: "
      << PrecisionToStr(precision_type) << ", base tensor precision type is: "
185 186
      << PrecisionToStr(base_tensor->precision());
  CHECK(inst_tensor->precision() == base_tensor->precision())
187 188
      << "real tensor precision type and base tensor precision type are not "
         "matched! real tensor precision type is: "
189
      << PrecisionToStr(inst_tensor->precision())
190
      << ", base tensor precision type is: "
191
      << PrecisionToStr(base_tensor->precision());
192 193
  switch (precision_type_t) {
    case PRECISION(kFloat):
194
      return CheckTensorPrecision<float>(inst_tensor, base_tensor, abs_error);
195
    case PRECISION(kInt8):
196
      return CheckTensorPrecision<int8_t>(inst_tensor, base_tensor, abs_error);
197
    case PRECISION(kInt32):
198
      return CheckTensorPrecision<int32_t>(inst_tensor, base_tensor, abs_error);
199
    case PRECISION(kInt64):
200
      return CheckTensorPrecision<int64_t>(inst_tensor, base_tensor, abs_error);
201
    case PRECISION(kBool):
202
      return CheckTensorPrecision<bool>(inst_tensor, base_tensor, abs_error);
203 204 205 206 207 208 209 210 211 212 213
    default:
      LOG(FATAL) << "not support type: " << PrecisionToStr(precision_type);
      return false;
  }
}

bool TestCase::CheckPrecision(const std::string& var_name,
                              float abs_error,
                              PrecisionType precision_type) {
  bool success = true;
  if (inst_scope_->FindVar(var_name)->IsType<Tensor>()) {
214 215 216 217 218
    auto inst_tensor = inst_scope_->FindTensor(var_name);
    auto base_tensor = base_scope_->FindTensor(var_name);
    success =
        success &&
        CheckPrecision(inst_tensor, base_tensor, abs_error, precision_type);
219
  } else if (inst_scope_->FindVar(var_name)->IsType<std::vector<Tensor>>()) {
220 221 222 223 224 225 226
    auto inst_tensor_list = inst_scope_->FindMutableTensorList(var_name);
    auto base_tensor_list = base_scope_->FindMutableTensorList(var_name);
    CHECK_EQ(inst_tensor_list->size(), base_tensor_list->size());
    for (size_t i = 0; i < inst_tensor_list->size(); i++) {
      Tensor* inst_tensor = &(inst_tensor_list->at(i));
      Tensor* base_tensor = &(base_tensor_list->at(i));
      if (inst_tensor->dims().size() == 0 && base_tensor->dims().size() == 0) {
227 228
        continue;
      }
229 230 231
      success =
          success &&
          CheckPrecision(inst_tensor, base_tensor, abs_error, precision_type);
232 233 234 235 236 237 238
    }
  } else {
    LOG(FATAL) << "unsupported var type";
  }
  return success;
}

Y
Yan Chunwei 已提交
239 240 241
}  // namespace arena
}  // namespace lite
}  // namespace paddle