conv_2d_test.cc 26.7 KB
Newer Older
L
Liangliang He 已提交
1 2 3 4
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//

5
#include <fstream>
L
Liangliang He 已提交
6
#include "mace/ops/conv_2d.h"
7
#include "mace/ops/ops_test_util.h"
L
Liangliang He 已提交
8 9 10 11 12

using namespace mace;

class Conv2dOpTest : public OpsTestBase {};

13
template <DeviceType D>
L
liuqi 已提交
14 15
void TestSimple3x3VALID() {
  OpsTestNet net;
李寅 已提交
16
  OpDefBuilder("Conv2D", "Conv2dTest")
L
Liangliang He 已提交
17 18 19 20
      .Input("Input")
      .Input("Filter")
      .Input("Bias")
      .Output("Output")
L
Liangliang He 已提交
21 22 23
      .AddIntsArg("strides", {1, 1})
      .AddIntArg("padding", Padding::VALID)
      .AddIntsArg("dilations", {1, 1})
24
      .Finalize(net.NewOperatorDef());
L
Liangliang He 已提交
25 26 27 28

  // Add args

  // Add input data
L
liuqi 已提交
29
  net.AddInputFromArray<D, float>(
L
Liangliang He 已提交
30 31
      "Input", {1, 2, 3, 3},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
L
liuqi 已提交
32
  net.AddInputFromArray<D, float>(
L
Liangliang He 已提交
33 34 35
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
L
liuqi 已提交
36
  net.AddInputFromArray<D, float>("Bias", {1}, {0.1f});
L
Liangliang He 已提交
37 38

  // Run
L
liuqi 已提交
39
  net.RunOp(D);
L
Liangliang He 已提交
40 41

  // Check
李寅 已提交
42
  auto expected = CreateTensor<float>({1, 1, 1, 1}, {18.1f});
L
Liangliang He 已提交
43

李寅 已提交
44
  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
L
Liangliang He 已提交
45 46
}

47
template <DeviceType D>
L
liuqi 已提交
48 49
void TestSimple3x3SAME() {
  OpsTestNet net;
李寅 已提交
50
  OpDefBuilder("Conv2D", "Conv2dTest")
L
Liangliang He 已提交
51 52 53 54
      .Input("Input")
      .Input("Filter")
      .Input("Bias")
      .Output("Output")
L
Liangliang He 已提交
55 56 57
      .AddIntsArg("strides", {1, 1})
      .AddIntArg("padding", Padding::SAME)
      .AddIntsArg("dilations", {1, 1})
58
      .Finalize(net.NewOperatorDef());
L
Liangliang He 已提交
59 60

  // Add input data
L
liuqi 已提交
61
  net.AddInputFromArray<D, float>(
L
Liangliang He 已提交
62 63
      "Input", {1, 2, 3, 3},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
L
liuqi 已提交
64
  net.AddInputFromArray<D, float>(
L
Liangliang He 已提交
65 66 67
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
L
liuqi 已提交
68
  net.AddInputFromArray<D, float>("Bias", {1}, {0.1f});
L
Liangliang He 已提交
69 70

  // Run
L
liuqi 已提交
71
  net.RunOp(D);
L
Liangliang He 已提交
72 73

  // Check
L
Liangliang He 已提交
74 75 76
  auto expected = CreateTensor<float>(
      {1, 1, 3, 3},
      {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f});
L
Liangliang He 已提交
77

李寅 已提交
78
  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
L
Liangliang He 已提交
79 80
}

81
#if __ARM_NEON
L
liuqi 已提交
82 83 84 85
TEST_F(Conv2dOpTest, NEONSimple) {
  TestSimple3x3VALID<DeviceType::NEON>();
  TestSimple3x3SAME<DeviceType::NEON>();
}
86
#endif
L
liuqi 已提交
87

88
template <DeviceType D, typename T>
L
liuqi 已提交
89 90 91
void TestNHWCSimple3x3VALID() {
  OpsTestNet net;
  // Add input data
L
liuqi 已提交
92
  net.AddInputFromArray<D, T>(
L
liuqi 已提交
93 94
      "Input", {1, 3, 3, 2},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
L
liuqi 已提交
95
  net.AddInputFromArray<D, T>(
L
liuqi 已提交
96 97 98
      "Filter", {3, 3, 2, 1},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
L
liuqi 已提交
99
  net.AddInputFromArray<D, T>("Bias", {1}, {0.1f});
L
liuqi 已提交
100 101

  if (D == DeviceType::OPENCL) {
102 103 104 105 106 107
    BufferToImage<D, T>(net, "Input", "InputImage",
                        kernels::BufferType::IN_OUT);
    BufferToImage<D, T>(net, "Filter", "FilterImage",
                        kernels::BufferType::FILTER);
    BufferToImage<D, T>(net, "Bias", "BiasImage",
                        kernels::BufferType::ARGUMENT);
L
liuqi 已提交
108 109 110 111 112 113 114 115
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("InputImage")
        .Input("FilterImage")
        .Input("BiasImage")
        .Output("OutputImage")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
116
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
L
liuqi 已提交
117 118 119 120 121
        .Finalize(net.NewOperatorDef());

    net.RunOp(D);

    // Transfer output
122 123
    ImageToBuffer<D, T>(net, "OutputImage", "Output",
                        kernels::BufferType::IN_OUT);
L
liuqi 已提交
124 125 126 127 128 129 130 131 132 133

  } else {
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
134
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
L
liuqi 已提交
135 136 137 138 139
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
  }

140 141
  auto expected = CreateTensor<float>({1, 1, 1, 1}, {18.1f});
  ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
L
liuqi 已提交
142 143
}

144
template <DeviceType D, typename T>
L
liuqi 已提交
145 146 147 148
void TestNHWCSimple3x3SAME() {
  OpsTestNet net;

  // Add input data
L
liuqi 已提交
149
  net.AddInputFromArray<D, T>(
L
liuqi 已提交
150 151
      "Input", {1, 3, 3, 2},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
L
liuqi 已提交
152
  net.AddInputFromArray<D, T>(
L
liuqi 已提交
153 154 155
      "Filter", {3, 3, 2, 1},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
L
liuqi 已提交
156
  net.AddInputFromArray<D, T>("Bias", {1}, {0.1f});
L
liuqi 已提交
157 158

  if (D == DeviceType::OPENCL) {
159 160 161 162 163 164
    BufferToImage<D, T>(net, "Input", "InputImage",
                        kernels::BufferType::IN_OUT);
    BufferToImage<D, T>(net, "Filter", "FilterImage",
                        kernels::BufferType::FILTER);
    BufferToImage<D, T>(net, "Bias", "BiasImage",
                        kernels::BufferType::ARGUMENT);
L
liuqi 已提交
165 166 167 168 169 170 171 172
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("InputImage")
        .Input("FilterImage")
        .Input("BiasImage")
        .Output("OutputImage")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
173
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
L
liuqi 已提交
174 175 176 177 178
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);

    // Transfer output
179 180
    ImageToBuffer<D, T>(net, "OutputImage", "Output",
                        kernels::BufferType::IN_OUT);
L
liuqi 已提交
181 182 183 184 185 186 187 188 189 190

  } else {
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
191
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
L
liuqi 已提交
192 193 194 195 196
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
  }

197
  auto expected = CreateTensor<float>(
L
liuqi 已提交
198 199 200
      {1, 3, 3, 1},
      {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f});

201
  ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
L
liuqi 已提交
202 203 204
}

TEST_F(Conv2dOpTest, CPUSimple) {
L
liuqi 已提交
205 206
  TestNHWCSimple3x3VALID<DeviceType::CPU, float>();
  TestNHWCSimple3x3SAME<DeviceType::CPU, float>();
L
liuqi 已提交
207 208
}

L
liuqi 已提交
209
TEST_F(Conv2dOpTest, OPENCLSimple) {
L
liuqi 已提交
210 211
  TestNHWCSimple3x3VALID<DeviceType::OPENCL, float>();
  TestNHWCSimple3x3SAME<DeviceType::OPENCL, float>();
L
liuqi 已提交
212 213
}

214
template <DeviceType D>
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
void TestSimple3x3WithoutBias() {
  OpsTestNet net;
  OpDefBuilder("Conv2D", "Conv2dTest")
      .Input("Input")
      .Input("Filter")
      .Output("Output")
      .AddIntsArg("strides", {1, 1})
      .AddIntArg("padding", Padding::VALID)
      .AddIntsArg("dilations", {1, 1})
      .Finalize(net.NewOperatorDef());

  // Add input data
  net.AddInputFromArray<D, float>(
      "Input", {1, 2, 3, 3},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
  net.AddInputFromArray<D, float>(
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});

  // Run
  net.RunOp(D);

  // Check
  auto expected = CreateTensor<float>({1, 1, 1, 1}, {18.0f});

  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}

244
#ifdef __ARM_NEON
245 246 247
TEST_F(Conv2dOpTest, NEONWithouBias) {
  TestSimple3x3WithoutBias<DeviceType::NEON>();
}
248
#endif
249

250
template <DeviceType D, typename T>
L
liuqi 已提交
251 252 253 254
void TestNHWCSimple3x3WithoutBias() {
  OpsTestNet net;

  // Add input data
255
  net.AddInputFromArray<D, T>(
L
liuqi 已提交
256 257
      "Input", {1, 3, 3, 2},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
258
  net.AddInputFromArray<D, T>(
L
liuqi 已提交
259 260 261 262 263
      "Filter", {3, 3, 2, 1},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});

  if (D == DeviceType::OPENCL) {
264 265 266 267
    BufferToImage<D, T>(net, "Input", "InputImage",
                        kernels::BufferType::IN_OUT);
    BufferToImage<D, T>(net, "Filter", "FilterImage",
                        kernels::BufferType::FILTER);
L
liuqi 已提交
268 269 270 271 272 273 274 275

    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("InputImage")
        .Input("FilterImage")
        .Output("OutputImage")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
276
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
L
liuqi 已提交
277 278 279 280
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
    // Transfer output
281 282
    ImageToBuffer<D, T>(net, "OutputImage", "Output",
                        kernels::BufferType::IN_OUT);
L
liuqi 已提交
283 284 285 286 287 288 289 290
  } else {
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("Input")
        .Input("Filter")
        .Output("Output")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
291
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
L
liuqi 已提交
292 293 294 295 296 297 298 299 300
        .Finalize(net.NewOperatorDef());

    // Run
    net.RunOp(D);
  }

  // Check
  auto expected = CreateTensor<float>({1, 1, 1, 1}, {18.0f});

301
  ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
L
liuqi 已提交
302 303 304
}

TEST_F(Conv2dOpTest, CPUWithoutBias) {
305
  TestNHWCSimple3x3WithoutBias<DeviceType::CPU, float>();
L
liuqi 已提交
306 307
}

308
TEST_F(Conv2dOpTest, OPENCLWithoutBias) {
309
  TestNHWCSimple3x3WithoutBias<DeviceType::OPENCL, float>();
310 311
}

312
template <DeviceType D>
L
liuqi 已提交
313
static void TestCombined3x3() {
L
Liangliang He 已提交
314
  // Construct graph
L
liuqi 已提交
315
  OpsTestNet net;
李寅 已提交
316
  OpDefBuilder("Conv2D", "Conv2DTest")
L
Liangliang He 已提交
317 318 319 320
      .Input("Input")
      .Input("Filter")
      .Input("Bias")
      .Output("Output")
L
Liangliang He 已提交
321 322 323
      .AddIntsArg("strides", {2, 2})
      .AddIntArg("padding", Padding::SAME)
      .AddIntsArg("dilations", {1, 1})
324
      .Finalize(net.NewOperatorDef());
L
Liangliang He 已提交
325 326

  // Add input data
L
liuqi 已提交
327
  net.AddInputFromArray<D, float>(
L
Liangliang He 已提交
328 329 330
      "Input", {1, 2, 5, 5}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
L
liuqi 已提交
331
  net.AddInputFromArray<D, float>(
L
Liangliang He 已提交
332 333 334 335
      "Filter", {2, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
       0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f});
L
liuqi 已提交
336
  net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f});
L
Liangliang He 已提交
337 338

  // Run
L
liuqi 已提交
339
  net.RunOp(D);
L
Liangliang He 已提交
340 341

  // Check
L
Liangliang He 已提交
342 343 344
  auto expected = CreateTensor<float>(
      {1, 2, 3, 3}, {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f,
                     4.2f, 6.2f, 4.2f, 6.2f, 9.2f, 6.2f, 4.2f, 6.2f, 4.2f});
L
Liangliang He 已提交
345

李寅 已提交
346
  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
L
liuqi 已提交
347 348
}

349
#ifdef __ARM_NEON
350
TEST_F(Conv2dOpTest, NEONCombined) { TestCombined3x3<DeviceType::NEON>(); }
351
#endif
L
liuqi 已提交
352

353
template <DeviceType D, typename T>
L
liuqi 已提交
354 355 356 357 358
static void TestNHWCCombined3x3() {
  // Construct graph
  OpsTestNet net;

  // Add input data
359
  net.AddInputFromArray<D, T>(
L
liuqi 已提交
360
      "Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
361 362
                              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
363
  net.AddInputFromArray<D, T>(
L
liuqi 已提交
364 365 366 367
      "Filter", {3, 3, 2, 2},
      {1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f,
       1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f,
       1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f});
368
  net.AddInputFromArray<D, T>("Bias", {2}, {0.1f, 0.2f});
L
liuqi 已提交
369 370

  if (D == DeviceType::OPENCL) {
371 372 373 374 375 376
    BufferToImage<D, T>(net, "Input", "InputImage",
                        kernels::BufferType::IN_OUT);
    BufferToImage<D, T>(net, "Filter", "FilterImage",
                        kernels::BufferType::FILTER);
    BufferToImage<D, T>(net, "Bias", "BiasImage",
                        kernels::BufferType::ARGUMENT);
L
liuqi 已提交
377 378 379 380 381 382 383 384 385

    OpDefBuilder("Conv2D", "Conv2DTest")
        .Input("InputImage")
        .Input("FilterImage")
        .Input("BiasImage")
        .Output("OutputImage")
        .AddIntsArg("strides", {2, 2})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
386
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
L
liuqi 已提交
387 388 389 390
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);

391 392
    ImageToBuffer<D, T>(net, "OutputImage", "Output",
                        kernels::BufferType::IN_OUT);
L
liuqi 已提交
393 394 395 396 397 398 399 400 401
  } else {
    OpDefBuilder("Conv2D", "Conv2DTest")
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .AddIntsArg("strides", {2, 2})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
402
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
L
liuqi 已提交
403 404 405 406 407 408 409
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
  }

  // Check
  auto expected = CreateTensor<float>(
410 411
      {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f,
                     9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f});
412 413
  ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
}
L
liuqi 已提交
414

415 416
TEST_F(Conv2dOpTest, CPUStride2) {
  TestNHWCCombined3x3<DeviceType::CPU, float>();
L
liuqi 已提交
417 418
}

419 420
TEST_F(Conv2dOpTest, OPENCLStride2) {
  TestNHWCCombined3x3<DeviceType::OPENCL, float>();
L
Liangliang He 已提交
421 422
}

423
template <DeviceType D>
424
void TestConv1x1() {
425
  // Construct graph
426
  OpsTestNet net;
L
Liangliang He 已提交
427 428

  // Add input data
429
  net.AddInputFromArray<D, float>(
L
liuqi 已提交
430
      "Input", {1, 3, 10, 5},
L
Liangliang He 已提交
431 432 433 434 435 436 437
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
438
  net.AddInputFromArray<D, float>(
L
liuqi 已提交
439 440
      "Filter", {1, 1, 5, 2},
      {1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f});
441
  net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f});
L
Liangliang He 已提交
442

443
  if (D == DeviceType::OPENCL) {
444 445 446 447 448 449
    BufferToImage<D, float>(net, "Input", "InputImage",
                            kernels::BufferType::IN_OUT);
    BufferToImage<D, float>(net, "Filter", "FilterImage",
                            kernels::BufferType::FILTER);
    BufferToImage<D, float>(net, "Bias", "BiasImage",
                            kernels::BufferType::ARGUMENT);
450 451 452 453 454 455 456 457 458 459 460 461 462

    OpDefBuilder("Conv2D", "Conv2DTest")
        .Input("InputImage")
        .Input("FilterImage")
        .Input("BiasImage")
        .Output("OutputImage")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);

463 464
    ImageToBuffer<D, float>(net, "OutputImage", "Output",
                            kernels::BufferType::IN_OUT);
465 466 467 468 469 470 471 472 473 474 475 476 477
  } else {
    OpDefBuilder("Conv2D", "Conv2DTest")
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
  }
L
Liangliang He 已提交
478 479

  // Check
L
Liangliang He 已提交
480
  auto expected = CreateTensor<float>(
L
liuqi 已提交
481 482 483 484 485 486 487
      {1, 3, 10, 2},
      {5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f});
L
Liangliang He 已提交
488

李寅 已提交
489
  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
L
Liangliang He 已提交
490 491
}

492
TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
L
liuqi 已提交
493

494
TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::OPENCL>(); }
495

496
template <DeviceType D, typename T>
L
liuqi 已提交
497
static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
李寅 已提交
498
  testing::internal::LogToStderr();
L
Liangliang He 已提交
499
  auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
500
                  Padding type) {
501 502 503
    srand(time(NULL));

    // generate random input
L
liuqi 已提交
504
    index_t batch = 3 + (rand() % 10);
L
liuqi 已提交
505 506
    index_t height = shape[0];
    index_t width = shape[1];
L
liuqi 已提交
507 508
    index_t input_channels = shape[2] + (rand() % 10);
    index_t output_channels = shape[3] + (rand() % 10);
L
liuqi 已提交
509
    // Construct graph
L
liuqi 已提交
510
    OpsTestNet net;
L
liuqi 已提交
511 512 513
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("Input")
        .Input("Filter")
L
liuqi 已提交
514
        .Input("Bias")
L
liuqi 已提交
515
        .Output("Output")
L
Liangliang He 已提交
516 517 518
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", {1, 1})
519
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
520
        .Finalize(net.NewOperatorDef());
L
liuqi 已提交
521 522

    // Add input data
523 524
    net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
    net.AddRandomInput<D, T>(
L
liuqi 已提交
525
        "Filter", {kernel_h, kernel_w, input_channels, output_channels});
526
    net.AddRandomInput<D, T>("Bias", {output_channels});
L
liuqi 已提交
527

L
liuqi 已提交
528 529
    // run on cpu
    net.RunOp();
L
liuqi 已提交
530 531 532 533
    // Check
    Tensor expected;
    expected.Copy(*net.GetOutput("Output"));

L
liuqi 已提交
534
    // run on gpu
535 536 537 538 539 540
    BufferToImage<D, T>(net, "Input", "InputImage",
                        kernels::BufferType::IN_OUT);
    BufferToImage<D, T>(net, "Filter", "FilterImage",
                        kernels::BufferType::FILTER);
    BufferToImage<D, T>(net, "Bias", "BiasImage",
                        kernels::BufferType::ARGUMENT);
L
liuqi 已提交
541

李寅 已提交
542
    OpDefBuilder("Conv2D", "Conv2dTest")
L
liuqi 已提交
543 544
        .Input("InputImage")
        .Input("FilterImage")
L
liuqi 已提交
545
        .Input("BiasImage")
L
liuqi 已提交
546
        .Output("OutputImage")
L
Liangliang He 已提交
547 548 549
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", {1, 1})
550
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
551
        .Finalize(net.NewOperatorDef());
L
liuqi 已提交
552 553
    // Run on device
    net.RunOp(D);
554

555 556
    ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
                        kernels::BufferType::IN_OUT);
L
liuqi 已提交
557
    ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
558 559
  };

L
Liangliang He 已提交
560
  for (int kernel_size : {1, 3}) {
561
    for (int stride : {1, 2}) {
L
liuqi 已提交
562
      func(kernel_size, kernel_size, stride, stride, VALID);
563 564 565
      func(kernel_size, kernel_size, stride, stride, SAME);
    }
  }
566
}
L
liuqi 已提交
567

L
liuqi 已提交
568
TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) {
L
liuqi 已提交
569
  TestComplexConvNxNS12<DeviceType::OPENCL, float>({32, 32, 32, 64});
L
liuqi 已提交
570 571 572
}

TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) {
573
  TestComplexConvNxNS12<DeviceType::OPENCL, float>({107, 113, 5, 7});
L
liuqi 已提交
574
}
575

L
liuqi 已提交
576
template<DeviceType D>
L
liuqi 已提交
577
static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
578 579
                                      const std::vector<index_t> &filter_shape,
                                      const std::vector<int> &dilations) {
580
  testing::internal::LogToStderr();
L
liuqi 已提交
581
  srand(time(NULL));
582

L
liuqi 已提交
583
  auto func = [&](int stride_h, int stride_w, Padding padding) {
584
    // generate random input
585
    index_t batch = 3;
L
liuqi 已提交
586 587 588 589
    index_t height = input_shape[0];
    index_t width = input_shape[1];
    index_t kernel_h = filter_shape[0];
    index_t kernel_w = filter_shape[1];
590 591
    index_t input_channels = filter_shape[2];
    index_t output_channels = filter_shape[3];
592 593 594 595 596 597 598 599
    // Construct graph
    OpsTestNet net;
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .AddIntsArg("strides", {stride_h, stride_w})
L
liuqi 已提交
600
        .AddIntArg("padding", padding)
601
        .AddIntsArg("dilations", {dilations[0], dilations[1]})
602 603 604
        .Finalize(net.NewOperatorDef());

    std::vector<float> float_input_data;
605 606
    GenerateRandomRealTypeData({batch, height, width, input_channels},
                               float_input_data);
607
    std::vector<float> float_filter_data;
608 609 610
    GenerateRandomRealTypeData(
        {kernel_h, kernel_w, input_channels, output_channels},
        float_filter_data);
611 612 613 614
    std::vector<float> float_bias_data;
    GenerateRandomRealTypeData({output_channels}, float_bias_data);
    // Add input data
    net.AddInputFromArray<D, float>(
615 616 617 618
        "Input", {batch, height, width, input_channels}, float_input_data);
    net.AddInputFromArray<D, float>(
        "Filter", {kernel_h, kernel_w, input_channels, output_channels},
        float_filter_data);
619 620 621 622 623 624 625 626 627
    net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data);

    // run on cpu
    net.RunOp();
    // Check
    Tensor expected;
    expected.Copy(*net.GetOutput("Output"));

    // run on gpu
628 629 630 631 632 633
    BufferToImage<D, half>(net, "Input", "InputImage",
                           kernels::BufferType::IN_OUT);
    BufferToImage<D, half>(net, "Filter", "FilterImage",
                           kernels::BufferType::FILTER);
    BufferToImage<D, half>(net, "Bias", "BiasImage",
                           kernels::BufferType::ARGUMENT);
634 635 636 637 638 639 640

    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("InputImage")
        .Input("FilterImage")
        .Input("BiasImage")
        .Output("OutputImage")
        .AddIntsArg("strides", {stride_h, stride_w})
L
liuqi 已提交
641
        .AddIntArg("padding", padding)
642
        .AddIntsArg("dilations", {dilations[0], dilations[1]})
643
        .AddIntArg("T", static_cast<int>(DataType::DT_HALF))
644 645 646 647
        .Finalize(net.NewOperatorDef());
    // Run on device
    net.RunOp(D);

648 649
    ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput",
                            kernels::BufferType::IN_OUT);
650

L
liuqi 已提交
651
    ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
652 653
  };

654 655 656 657 658
  func(1, 1, VALID);
  func(1, 1, SAME);
  if (dilations[0] == 1) {
    func(2, 2, VALID);
    func(2, 2, SAME);
659 660 661
  }
}

L
liuqi 已提交
662 663
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x1S12) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32},
664 665
                                                {1, 1, 32, 64},
                                                {1, 1});
L
liuqi 已提交
666 667 668 669
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32},
670 671
                                                {3, 3, 32, 64},
                                                {1, 1});
L
liuqi 已提交
672 673 674 675
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv15x1S12) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32},
676 677
                                                {15, 1, 256, 2},
                                                {1, 1});
L
liuqi 已提交
678 679 680 681
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x15S12) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32},
682 683
                                                {1, 15, 256, 2},
                                                {1, 1});
L
liuqi 已提交
684 685 686 687
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x75S12) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32},
688 689
                                                {7, 7, 3, 64},
                                                {1, 1});
L
liuqi 已提交
690 691 692 693
}

TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv1x1S12) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({107, 113},
694 695
                                                {1, 1, 5, 7},
                                                {1, 1});
L
liuqi 已提交
696 697
}

L
liuqi 已提交
698 699
TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({107, 113},
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
                                                {3, 3, 5, 7},
                                                {1, 1});
}

TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({64, 64},
                                                {5, 5, 16, 16},
                                                {2, 2});
}

TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation2) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({64, 64},
                                                {7, 7, 16, 16},
                                                {2, 2});
}

TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation4) {
  TestHalfComplexConvNxNS12<DeviceType::OPENCL>({63, 67},
                                                {7, 7, 16, 16},
                                                {4, 4});
}

template<DeviceType D, typename T>
static void TestDilationConvNxN(const std::vector<index_t> &shape, const int dilation_rate) {
  testing::internal::LogToStderr();
  auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
                  Padding type) {
    srand(time(NULL));

    // generate random input
    index_t batch = 1;
    index_t height = shape[0];
    index_t width = shape[1];
    index_t input_channels = shape[2];
    index_t output_channels = shape[3];
    // Construct graph
    OpsTestNet net;
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", {dilation_rate, dilation_rate})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());

    // Add input data
    net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
    net.AddRandomInput<D, T>(
        "Filter", {kernel_h, kernel_w, input_channels, output_channels});
    net.AddRandomInput<D, T>("Bias", {output_channels});

    // run on cpu
    net.RunOp();
    // Check
    Tensor expected;
    expected.Copy(*net.GetOutput("Output"));

    // run on gpu
    BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
    BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
    BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);

    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("InputImage")
        .Input("FilterImage")
        .Input("BiasImage")
        .Output("OutputImage")
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", {dilation_rate, dilation_rate})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());
    // Run on device
    net.RunOp(D);

    ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
    ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
  };

  for (int kernel_size : {3}) {
    for (int stride : {1}) {
      func(kernel_size, kernel_size, stride, stride, VALID);
      func(kernel_size, kernel_size, stride, stride, SAME);
    }
  }
L
liuqi 已提交
788
}
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804

TEST_F(Conv2dOpTest, OPENCLAlignedDilation2) {
  TestDilationConvNxN<DeviceType::OPENCL, float>({32, 32, 32, 64},
                                                 2);
}

TEST_F(Conv2dOpTest, OPENCLAligned2Dilation4) {
  TestDilationConvNxN<DeviceType::OPENCL, float>({128, 128, 16, 16},
                                                 4);
}

TEST_F(Conv2dOpTest, OPENCLUnalignedDilation4) {
  TestDilationConvNxN<DeviceType::OPENCL, float>({107, 113, 5, 7},
                                                 4);
}