fc_compute_test.cc 7.7 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
19 20
#include "lite/tests/utils/fill_data.h"
#include "lite/tests/utils/naive_math_impl.h"
21 22 23
#ifdef LITE_WITH_X86
#include "lite/backends/x86/parallel.h"
#endif
Y
Yan Chunwei 已提交
24 25 26 27

namespace paddle {
namespace lite {

28
void AddBias(float* out, const float* bias, int num, int channel) {
Y
Yan Chunwei 已提交
29 30 31 32 33 34 35 36 37 38
  int remain = channel;
  for (int j = 0; j < num; ++j) {
    const float* ptr_bias = bias;
    float* ptr_out = out + j * channel;
    for (int i = 0; i < remain; ++i) {
      *(ptr_out++) += *(ptr_bias++);
    }
  }
}

39 40 41 42 43 44 45 46 47
void Relu(float* out, int num, int channel) {
  for (int i = 0; i < num * channel; ++i) {
    if (out[i] < 0) {
      out[i] = 0;
    }
  }
}

DDim ComputeOutDim(const DDim& dim_in, const DDim& wdim, int in_num_col_dim) {
Y
Yan Chunwei 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
  std::vector<int64_t> out_dim;
  out_dim.resize(in_num_col_dim + 1);
  auto in_mat_dims = dim_in.Flatten2D(in_num_col_dim);
  for (int i = 0; i < in_num_col_dim; ++i) {
    out_dim[i] = dim_in[i];
  }
  out_dim[in_num_col_dim] = wdim[1];
  return DDim(out_dim);
}

class FcOPTest : public arena::TestCase {
 protected:
  // common attributes for this op.
  std::string input_ = "x";
  std::string weight_ = "w";
63
  std::string weight_padding_ = "w_padding";
Y
Yan Chunwei 已提交
64 65 66 67
  std::string bias_ = "b";
  std::string out_ = "out";
  DDim dims_{{1, 128}};
  DDim wdims_{{128, 4}};
68
  DDim wdims_padding_;
Y
Yan Chunwei 已提交
69
  DDim bdims_{{4}};
70
  int in_num_col_dims_{1};
71 72
  bool with_relu_{false};
  bool padding_weights_{false};
Y
Yan Chunwei 已提交
73 74 75 76 77 78 79

 public:
  FcOPTest(const Place& place,
           const std::string& alias,
           DDim dim_in,
           DDim dim_w,
           DDim dim_b,
80 81 82
           int in_num_col_dims,
           bool with_relu,
           bool padding)
Y
Yan Chunwei 已提交
83 84 85 86
      : TestCase(place, alias),
        dims_(std::move(dim_in)),
        wdims_(std::move(dim_w)),
        bdims_(dim_b),
87 88 89 90 91 92 93 94 95
        in_num_col_dims_(in_num_col_dims),
        with_relu_(with_relu) {
#ifdef LITE_WITH_X86
    if (padding && wdims_[0] % 128 == 0 && wdims_[1] % 128 == 0) {
      padding_weights_ = true;
      wdims_padding_ = DDim({wdims_[0] + 4, wdims_[1] + 4});
    }
#endif
  }
Y
Yan Chunwei 已提交
96 97 98 99 100 101 102 103

  void RunBaseline(Scope* scope) override {
    auto x = scope->FindTensor(input_);
    auto w = scope->FindTensor(weight_);
    auto b = scope->FindTensor(bias_);
    bool flag_bias = b;
    auto out = scope->NewTensor(out_);
    CHECK(out);
104
    DDim out_dim = ComputeOutDim(x->dims(), w->dims(), in_num_col_dims_);
Y
Yan Chunwei 已提交
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    out->Resize(out_dim);

    auto x_data = x->data<float>();
    auto w_data = w->data<float>();
    const float* b_data = nullptr;
    if (flag_bias) {
      b_data = b->data<float>();
    }
    auto out_data = out->mutable_data<float>();

    int m = x->dims().count(0, in_num_col_dims_);
    CHECK_EQ(wdims_[0], x->dims().count(in_num_col_dims_, x->dims().size()));
    int k = wdims_[0];
    int n = wdims_[1];

120 121 122
    LOG(INFO) << "M=" << m << ", N=" << n << ", K=" << k
              << ", bias=" << flag_bias << ", with_relu=" << with_relu_
              << ", padding_weights=" << padding_weights_;
Y
Yan Chunwei 已提交
123 124 125 126 127 128 129 130 131 132 133

    if (m == 1) {
      basic_gemv(n,
                 k,
                 w_data,
                 x_data,
                 b_data,
                 out_data,
                 1.f,
                 0.f,
                 true,
134
                 static_cast<int>(flag_bias),
Y
Yan Chunwei 已提交
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
                 false);
    } else {
      basic_gemm(false,
                 false,
                 m,
                 n,
                 k,
                 1.f,
                 x_data,
                 k,
                 w_data,
                 n,
                 0.f,
                 out_data,
                 n,
                 b_data,
                 false,
                 false);
      if (flag_bias) {
154
        AddBias(out_data, b_data, m, n);
Y
Yan Chunwei 已提交
155 156
      }
    }
157 158 159 160 161
#ifdef LITE_WITH_X86
    if (flag_bias && with_relu_) {
      Relu(out_data, m, n);
    }
#endif
Y
Yan Chunwei 已提交
162 163 164 165 166
  }

  void PrepareOpDesc(cpp::OpDesc* op_desc) {
    op_desc->SetType("fc");
    op_desc->SetInput("Input", {input_});
167 168 169 170 171
    if (padding_weights_) {
      op_desc->SetInput("W", {weight_padding_});
    } else {
      op_desc->SetInput("W", {weight_});
    }
Y
Yan Chunwei 已提交
172 173 174 175 176
    if (bdims_.production() > 0) {
      op_desc->SetInput("Bias", {bias_});
    }
    op_desc->SetOutput("Out", {out_});
    op_desc->SetAttr<int>("in_num_col_dims", in_num_col_dims_);
177 178 179 180 181
#ifdef LITE_WITH_X86
    std::string activation_type = with_relu_ ? "relu" : "";
    op_desc->SetAttr<std::string>("activation_type", activation_type);
    op_desc->SetAttr<bool>("padding_weights", padding_weights_);
#endif
Y
Yan Chunwei 已提交
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
  }

  void PrepareData() override {
    std::vector<float> din(dims_.production());
    fill_data_rand(din.data(), -1.f, 1.f, dims_.production());

    std::vector<float> win(wdims_.production());
    fill_data_rand(win.data(), -1.f, 1.f, wdims_.production());

    bool flag_bias = bdims_.production() > 0;
    std::vector<float> bin(bdims_.production());
    fill_data_rand(bin.data(), -1.f, 1.f, bdims_.production());

    SetCommonTensor(input_, dims_, din.data());
    SetCommonTensor(weight_, wdims_, win.data());
197 198 199 200 201 202 203 204 205
    if (padding_weights_) {
      std::vector<float> win_padding(wdims_padding_.production());
      for (int64_t i = 0; i < wdims_[0]; ++i) {
        memcpy(&(win_padding[i * wdims_padding_[1]]),
               &(win[i * wdims_[1]]),
               wdims_[1] * sizeof(float));
      }
      SetCommonTensor(weight_padding_, wdims_padding_, win_padding.data());
    }
Y
Yan Chunwei 已提交
206 207 208 209 210 211
    if (flag_bias) {
      SetCommonTensor(bias_, bdims_, bin.data());
    }
  }
};

212 213 214 215
void TestFCMain(Place place,
                float abs_error,
                bool with_relu = false,
                bool padding = false) {
Y
Yan Chunwei 已提交
216 217 218 219
  for (auto& m : {1, 3, 16}) {
    for (auto& n : {1, 4, 16, 128, 256, 1024}) {
      for (auto& k : {1, 16, 128, 1024}) {
        for (auto& bflag : {false, true}) {
220 221 222
          if (!bflag && with_relu) {
            continue;
          }
Y
Yan Chunwei 已提交
223 224 225
          DDim dim_in{{m, k}};
          DDim wdim{{k, n}};
          DDim bdim{{bflag ? n : 0}};
226 227
          std::unique_ptr<arena::TestCase> tester(new FcOPTest(
              place, "def", dim_in, wdim, bdim, 1, with_relu, padding));
228
#ifdef LITE_WITH_ARM
229 230 231 232
          if (place == TARGET(kARM)) {
            auto& ctx = tester->context()->As<ARMContext>();
            ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 1);
          }
Y
Yan Chunwei 已提交
233
#endif
234
          arena::Arena arena(std::move(tester), place, abs_error);
Y
Yan Chunwei 已提交
235 236 237 238 239 240 241 242 243 244 245 246
          if (!arena.TestPrecision()) {
            LOG(ERROR) << "run m: " << m << ", n: " << n << ", k: " << k
                       << ", bias: " << (bflag ? "true" : "false") << " failed";
            return;
          }
        }
      }
    }
  }
}

TEST(FcOP, precision) {
247 248 249 250 251
  Place place;
  float abs_error = 6e-5;
#if defined(LITE_WITH_NPU)
  place = TARGET(kNPU);
  abs_error = 2e-1;  // Using fp16 in NPU
252 253 254
#elif defined(LITE_WITH_X86)
  place = TARGET(kX86);
  abs_error = 1e-4;
255 256 257 258
#elif defined(LITE_WITH_ARM)
  place = TARGET(kARM);
#else
  return;
Y
Yan Chunwei 已提交
259
#endif
260
  TestFCMain(place, abs_error);
Y
Yan Chunwei 已提交
261 262
}

263 264 265 266 267 268 269 270 271
#ifdef LITE_WITH_X86
TEST(FcOP, padding_and_parallel) {
  Place place(TARGET(kX86));
  float abs_error = 1e-4;
  x86::SetNumThreads(4);
  TestFCMain(place, abs_error, true, true);
}
#endif

Y
Yan Chunwei 已提交
272 273
}  // namespace lite
}  // namespace paddle